1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the PPCISelLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCMCTargetDesc.h"
15 #include "MCTargetDesc/PPCPredicates.h"
17 #include "PPCCCState.h"
18 #include "PPCCallingConv.h"
19 #include "PPCFrameLowering.h"
20 #include "PPCInstrInfo.h"
21 #include "PPCMachineFunctionInfo.h"
22 #include "PPCPerfectShuffle.h"
23 #include "PPCRegisterInfo.h"
24 #include "PPCSubtarget.h"
25 #include "PPCTargetMachine.h"
26 #include "llvm/ADT/APFloat.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/APSInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/ADT/StringSwitch.h"
38 #include "llvm/CodeGen/CallingConvLower.h"
39 #include "llvm/CodeGen/ISDOpcodes.h"
40 #include "llvm/CodeGen/MachineBasicBlock.h"
41 #include "llvm/CodeGen/MachineFrameInfo.h"
42 #include "llvm/CodeGen/MachineFunction.h"
43 #include "llvm/CodeGen/MachineInstr.h"
44 #include "llvm/CodeGen/MachineInstrBuilder.h"
45 #include "llvm/CodeGen/MachineJumpTableInfo.h"
46 #include "llvm/CodeGen/MachineLoopInfo.h"
47 #include "llvm/CodeGen/MachineMemOperand.h"
48 #include "llvm/CodeGen/MachineModuleInfo.h"
49 #include "llvm/CodeGen/MachineOperand.h"
50 #include "llvm/CodeGen/MachineRegisterInfo.h"
51 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
52 #include "llvm/CodeGen/SelectionDAG.h"
53 #include "llvm/CodeGen/SelectionDAGNodes.h"
54 #include "llvm/CodeGen/TargetInstrInfo.h"
55 #include "llvm/CodeGen/TargetLowering.h"
56 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
57 #include "llvm/CodeGen/TargetRegisterInfo.h"
58 #include "llvm/CodeGen/ValueTypes.h"
59 #include "llvm/CodeGenTypes/MachineValueType.h"
60 #include "llvm/IR/CallingConv.h"
61 #include "llvm/IR/Constant.h"
62 #include "llvm/IR/Constants.h"
63 #include "llvm/IR/DataLayout.h"
64 #include "llvm/IR/DebugLoc.h"
65 #include "llvm/IR/DerivedTypes.h"
66 #include "llvm/IR/Function.h"
67 #include "llvm/IR/GlobalValue.h"
68 #include "llvm/IR/IRBuilder.h"
69 #include "llvm/IR/Instructions.h"
70 #include "llvm/IR/Intrinsics.h"
71 #include "llvm/IR/IntrinsicsPowerPC.h"
72 #include "llvm/IR/Module.h"
73 #include "llvm/IR/Type.h"
74 #include "llvm/IR/Use.h"
75 #include "llvm/IR/Value.h"
76 #include "llvm/MC/MCContext.h"
77 #include "llvm/MC/MCExpr.h"
78 #include "llvm/MC/MCRegisterInfo.h"
79 #include "llvm/MC/MCSectionXCOFF.h"
80 #include "llvm/MC/MCSymbolXCOFF.h"
81 #include "llvm/Support/AtomicOrdering.h"
82 #include "llvm/Support/BranchProbability.h"
83 #include "llvm/Support/Casting.h"
84 #include "llvm/Support/CodeGen.h"
85 #include "llvm/Support/CommandLine.h"
86 #include "llvm/Support/Compiler.h"
87 #include "llvm/Support/Debug.h"
88 #include "llvm/Support/ErrorHandling.h"
89 #include "llvm/Support/Format.h"
90 #include "llvm/Support/KnownBits.h"
91 #include "llvm/Support/MathExtras.h"
92 #include "llvm/Support/raw_ostream.h"
93 #include "llvm/Target/TargetMachine.h"
94 #include "llvm/Target/TargetOptions.h"
104 using namespace llvm
;
106 #define DEBUG_TYPE "ppc-lowering"
108 static cl::opt
<bool> DisablePPCPreinc("disable-ppc-preinc",
109 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden
);
111 static cl::opt
<bool> DisableILPPref("disable-ppc-ilp-pref",
112 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden
);
114 static cl::opt
<bool> DisablePPCUnaligned("disable-ppc-unaligned",
115 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden
);
117 static cl::opt
<bool> DisableSCO("disable-ppc-sco",
118 cl::desc("disable sibling call optimization on ppc"), cl::Hidden
);
120 static cl::opt
<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
121 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden
);
123 static cl::opt
<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
124 cl::desc("use absolute jump tables on ppc"), cl::Hidden
);
127 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
128 cl::desc("disable vector permute decomposition"),
129 cl::init(true), cl::Hidden
);
131 cl::opt
<bool> DisableAutoPairedVecSt(
132 "disable-auto-paired-vec-st",
133 cl::desc("disable automatically generated 32byte paired vector stores"),
134 cl::init(true), cl::Hidden
);
136 static cl::opt
<unsigned> PPCMinimumJumpTableEntries(
137 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden
,
138 cl::desc("Set minimum number of entries to use a jump table on PPC"));
140 static cl::opt
<unsigned> PPCGatherAllAliasesMaxDepth(
141 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden
,
142 cl::desc("max depth when checking alias info in GatherAllAliases()"));
144 static cl::opt
<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
145 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden
,
146 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
147 "function to use initial-exec"));
149 STATISTIC(NumTailCalls
, "Number of tail calls");
150 STATISTIC(NumSiblingCalls
, "Number of sibling calls");
151 STATISTIC(ShufflesHandledWithVPERM
,
152 "Number of shuffles lowered to a VPERM or XXPERM");
153 STATISTIC(NumDynamicAllocaProbed
, "Number of dynamic stack allocation probed");
155 static bool isNByteElemShuffleMask(ShuffleVectorSDNode
*, unsigned, int);
157 static SDValue
widenVec(SelectionDAG
&DAG
, SDValue Vec
, const SDLoc
&dl
);
159 static const char AIXSSPCanaryWordName
[] = "__ssp_canary_word";
161 // A faster local-[exec|dynamic] TLS access sequence (enabled with the
162 // -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
163 // variables; consistent with the IBM XL compiler, we apply a max size of
164 // slightly under 32KB.
165 constexpr uint64_t AIXSmallTlsPolicySizeLimit
= 32751;
167 // FIXME: Remove this once the bug has been fixed!
168 extern cl::opt
<bool> ANDIGlueBug
;
170 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine
&TM
,
171 const PPCSubtarget
&STI
)
172 : TargetLowering(TM
), Subtarget(STI
) {
173 // Initialize map that relates the PPC addressing modes to the computed flags
174 // of a load/store instruction. The map is used to determine the optimal
175 // addressing mode when selecting load and stores.
176 initializeAddrModeMap();
177 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
178 // arguments are at least 4/8 bytes aligned.
179 bool isPPC64
= Subtarget
.isPPC64();
180 setMinStackArgumentAlignment(isPPC64
? Align(8) : Align(4));
182 // Set up the register classes.
183 addRegisterClass(MVT::i32
, &PPC::GPRCRegClass
);
184 if (!useSoftFloat()) {
186 addRegisterClass(MVT::f32
, &PPC::GPRCRegClass
);
187 // EFPU2 APU only supports f32
188 if (!Subtarget
.hasEFPU2())
189 addRegisterClass(MVT::f64
, &PPC::SPERCRegClass
);
191 addRegisterClass(MVT::f32
, &PPC::F4RCRegClass
);
192 addRegisterClass(MVT::f64
, &PPC::F8RCRegClass
);
196 // Match BITREVERSE to customized fast code sequence in the td file.
197 setOperationAction(ISD::BITREVERSE
, MVT::i32
, Legal
);
198 setOperationAction(ISD::BITREVERSE
, MVT::i64
, Legal
);
200 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
201 setOperationAction(ISD::ATOMIC_CMP_SWAP
, MVT::i32
, Custom
);
203 // Custom lower inline assembly to check for special registers.
204 setOperationAction(ISD::INLINEASM
, MVT::Other
, Custom
);
205 setOperationAction(ISD::INLINEASM_BR
, MVT::Other
, Custom
);
207 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
208 for (MVT VT
: MVT::integer_valuetypes()) {
209 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
210 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i8
, Expand
);
213 if (Subtarget
.isISA3_0()) {
214 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f16
, Legal
);
215 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, MVT::f16
, Legal
);
216 setTruncStoreAction(MVT::f64
, MVT::f16
, Legal
);
217 setTruncStoreAction(MVT::f32
, MVT::f16
, Legal
);
219 // No extending loads from f16 or HW conversions back and forth.
220 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f16
, Expand
);
221 setOperationAction(ISD::FP16_TO_FP
, MVT::f64
, Expand
);
222 setOperationAction(ISD::FP_TO_FP16
, MVT::f64
, Expand
);
223 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, MVT::f16
, Expand
);
224 setOperationAction(ISD::FP16_TO_FP
, MVT::f32
, Expand
);
225 setOperationAction(ISD::FP_TO_FP16
, MVT::f32
, Expand
);
226 setTruncStoreAction(MVT::f64
, MVT::f16
, Expand
);
227 setTruncStoreAction(MVT::f32
, MVT::f16
, Expand
);
230 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
232 // PowerPC has pre-inc load and store's.
233 setIndexedLoadAction(ISD::PRE_INC
, MVT::i1
, Legal
);
234 setIndexedLoadAction(ISD::PRE_INC
, MVT::i8
, Legal
);
235 setIndexedLoadAction(ISD::PRE_INC
, MVT::i16
, Legal
);
236 setIndexedLoadAction(ISD::PRE_INC
, MVT::i32
, Legal
);
237 setIndexedLoadAction(ISD::PRE_INC
, MVT::i64
, Legal
);
238 setIndexedStoreAction(ISD::PRE_INC
, MVT::i1
, Legal
);
239 setIndexedStoreAction(ISD::PRE_INC
, MVT::i8
, Legal
);
240 setIndexedStoreAction(ISD::PRE_INC
, MVT::i16
, Legal
);
241 setIndexedStoreAction(ISD::PRE_INC
, MVT::i32
, Legal
);
242 setIndexedStoreAction(ISD::PRE_INC
, MVT::i64
, Legal
);
243 if (!Subtarget
.hasSPE()) {
244 setIndexedLoadAction(ISD::PRE_INC
, MVT::f32
, Legal
);
245 setIndexedLoadAction(ISD::PRE_INC
, MVT::f64
, Legal
);
246 setIndexedStoreAction(ISD::PRE_INC
, MVT::f32
, Legal
);
247 setIndexedStoreAction(ISD::PRE_INC
, MVT::f64
, Legal
);
250 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
251 const MVT ScalarIntVTs
[] = { MVT::i32
, MVT::i64
};
252 for (MVT VT
: ScalarIntVTs
) {
253 setOperationAction(ISD::ADDC
, VT
, Legal
);
254 setOperationAction(ISD::ADDE
, VT
, Legal
);
255 setOperationAction(ISD::SUBC
, VT
, Legal
);
256 setOperationAction(ISD::SUBE
, VT
, Legal
);
259 if (Subtarget
.useCRBits()) {
260 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
262 if (isPPC64
|| Subtarget
.hasFPCVT()) {
263 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::i1
, Promote
);
264 AddPromotedToType(ISD::STRICT_SINT_TO_FP
, MVT::i1
,
265 isPPC64
? MVT::i64
: MVT::i32
);
266 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::i1
, Promote
);
267 AddPromotedToType(ISD::STRICT_UINT_TO_FP
, MVT::i1
,
268 isPPC64
? MVT::i64
: MVT::i32
);
270 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Promote
);
271 AddPromotedToType (ISD::SINT_TO_FP
, MVT::i1
,
272 isPPC64
? MVT::i64
: MVT::i32
);
273 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Promote
);
274 AddPromotedToType(ISD::UINT_TO_FP
, MVT::i1
,
275 isPPC64
? MVT::i64
: MVT::i32
);
277 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::i1
, Promote
);
278 AddPromotedToType(ISD::STRICT_FP_TO_SINT
, MVT::i1
,
279 isPPC64
? MVT::i64
: MVT::i32
);
280 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::i1
, Promote
);
281 AddPromotedToType(ISD::STRICT_FP_TO_UINT
, MVT::i1
,
282 isPPC64
? MVT::i64
: MVT::i32
);
284 setOperationAction(ISD::FP_TO_SINT
, MVT::i1
, Promote
);
285 AddPromotedToType(ISD::FP_TO_SINT
, MVT::i1
,
286 isPPC64
? MVT::i64
: MVT::i32
);
287 setOperationAction(ISD::FP_TO_UINT
, MVT::i1
, Promote
);
288 AddPromotedToType(ISD::FP_TO_UINT
, MVT::i1
,
289 isPPC64
? MVT::i64
: MVT::i32
);
291 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::i1
, Custom
);
292 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::i1
, Custom
);
293 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Custom
);
294 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Custom
);
297 // PowerPC does not support direct load/store of condition registers.
298 setOperationAction(ISD::LOAD
, MVT::i1
, Custom
);
299 setOperationAction(ISD::STORE
, MVT::i1
, Custom
);
301 // FIXME: Remove this once the ANDI glue bug is fixed:
303 setOperationAction(ISD::TRUNCATE
, MVT::i1
, Custom
);
305 for (MVT VT
: MVT::integer_valuetypes()) {
306 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
307 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
308 setTruncStoreAction(VT
, MVT::i1
, Expand
);
311 addRegisterClass(MVT::i1
, &PPC::CRBITRCRegClass
);
314 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
315 // PPC (the libcall is not available).
316 setOperationAction(ISD::FP_TO_SINT
, MVT::ppcf128
, Custom
);
317 setOperationAction(ISD::FP_TO_UINT
, MVT::ppcf128
, Custom
);
318 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::ppcf128
, Custom
);
319 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::ppcf128
, Custom
);
321 // We do not currently implement these libm ops for PowerPC.
322 setOperationAction(ISD::FFLOOR
, MVT::ppcf128
, Expand
);
323 setOperationAction(ISD::FCEIL
, MVT::ppcf128
, Expand
);
324 setOperationAction(ISD::FTRUNC
, MVT::ppcf128
, Expand
);
325 setOperationAction(ISD::FRINT
, MVT::ppcf128
, Expand
);
326 setOperationAction(ISD::FNEARBYINT
, MVT::ppcf128
, Expand
);
327 setOperationAction(ISD::FREM
, MVT::ppcf128
, Expand
);
329 // PowerPC has no SREM/UREM instructions unless we are on P9
330 // On P9 we may use a hardware instruction to compute the remainder.
331 // When the result of both the remainder and the division is required it is
332 // more efficient to compute the remainder from the result of the division
333 // rather than use the remainder instruction. The instructions are legalized
334 // directly because the DivRemPairsPass performs the transformation at the IR
336 if (Subtarget
.isISA3_0()) {
337 setOperationAction(ISD::SREM
, MVT::i32
, Legal
);
338 setOperationAction(ISD::UREM
, MVT::i32
, Legal
);
339 setOperationAction(ISD::SREM
, MVT::i64
, Legal
);
340 setOperationAction(ISD::UREM
, MVT::i64
, Legal
);
342 setOperationAction(ISD::SREM
, MVT::i32
, Expand
);
343 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
344 setOperationAction(ISD::SREM
, MVT::i64
, Expand
);
345 setOperationAction(ISD::UREM
, MVT::i64
, Expand
);
348 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
349 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
350 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
351 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Expand
);
352 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Expand
);
353 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
354 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
355 setOperationAction(ISD::UDIVREM
, MVT::i64
, Expand
);
356 setOperationAction(ISD::SDIVREM
, MVT::i64
, Expand
);
358 // Handle constrained floating-point operations of scalar.
359 // TODO: Handle SPE specific operation.
360 setOperationAction(ISD::STRICT_FADD
, MVT::f32
, Legal
);
361 setOperationAction(ISD::STRICT_FSUB
, MVT::f32
, Legal
);
362 setOperationAction(ISD::STRICT_FMUL
, MVT::f32
, Legal
);
363 setOperationAction(ISD::STRICT_FDIV
, MVT::f32
, Legal
);
364 setOperationAction(ISD::STRICT_FP_ROUND
, MVT::f32
, Legal
);
366 setOperationAction(ISD::STRICT_FADD
, MVT::f64
, Legal
);
367 setOperationAction(ISD::STRICT_FSUB
, MVT::f64
, Legal
);
368 setOperationAction(ISD::STRICT_FMUL
, MVT::f64
, Legal
);
369 setOperationAction(ISD::STRICT_FDIV
, MVT::f64
, Legal
);
371 if (!Subtarget
.hasSPE()) {
372 setOperationAction(ISD::STRICT_FMA
, MVT::f32
, Legal
);
373 setOperationAction(ISD::STRICT_FMA
, MVT::f64
, Legal
);
376 if (Subtarget
.hasVSX()) {
377 setOperationAction(ISD::STRICT_FRINT
, MVT::f32
, Legal
);
378 setOperationAction(ISD::STRICT_FRINT
, MVT::f64
, Legal
);
381 if (Subtarget
.hasFSQRT()) {
382 setOperationAction(ISD::STRICT_FSQRT
, MVT::f32
, Legal
);
383 setOperationAction(ISD::STRICT_FSQRT
, MVT::f64
, Legal
);
386 if (Subtarget
.hasFPRND()) {
387 setOperationAction(ISD::STRICT_FFLOOR
, MVT::f32
, Legal
);
388 setOperationAction(ISD::STRICT_FCEIL
, MVT::f32
, Legal
);
389 setOperationAction(ISD::STRICT_FTRUNC
, MVT::f32
, Legal
);
390 setOperationAction(ISD::STRICT_FROUND
, MVT::f32
, Legal
);
392 setOperationAction(ISD::STRICT_FFLOOR
, MVT::f64
, Legal
);
393 setOperationAction(ISD::STRICT_FCEIL
, MVT::f64
, Legal
);
394 setOperationAction(ISD::STRICT_FTRUNC
, MVT::f64
, Legal
);
395 setOperationAction(ISD::STRICT_FROUND
, MVT::f64
, Legal
);
398 // We don't support sin/cos/sqrt/fmod/pow
399 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
400 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
401 setOperationAction(ISD::FSINCOS
, MVT::f64
, Expand
);
402 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
403 setOperationAction(ISD::FPOW
, MVT::f64
, Expand
);
404 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
405 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
406 setOperationAction(ISD::FSINCOS
, MVT::f32
, Expand
);
407 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
408 setOperationAction(ISD::FPOW
, MVT::f32
, Expand
);
410 // MASS transformation for LLVM intrinsics with replicating fast-math flag
411 // to be consistent to PPCGenScalarMASSEntries pass
412 if (TM
.getOptLevel() == CodeGenOptLevel::Aggressive
) {
413 setOperationAction(ISD::FSIN
, MVT::f64
, Custom
);
414 setOperationAction(ISD::FCOS
, MVT::f64
, Custom
);
415 setOperationAction(ISD::FPOW
, MVT::f64
, Custom
);
416 setOperationAction(ISD::FLOG
, MVT::f64
, Custom
);
417 setOperationAction(ISD::FLOG10
, MVT::f64
, Custom
);
418 setOperationAction(ISD::FEXP
, MVT::f64
, Custom
);
419 setOperationAction(ISD::FSIN
, MVT::f32
, Custom
);
420 setOperationAction(ISD::FCOS
, MVT::f32
, Custom
);
421 setOperationAction(ISD::FPOW
, MVT::f32
, Custom
);
422 setOperationAction(ISD::FLOG
, MVT::f32
, Custom
);
423 setOperationAction(ISD::FLOG10
, MVT::f32
, Custom
);
424 setOperationAction(ISD::FEXP
, MVT::f32
, Custom
);
427 if (Subtarget
.hasSPE()) {
428 setOperationAction(ISD::FMA
, MVT::f64
, Expand
);
429 setOperationAction(ISD::FMA
, MVT::f32
, Expand
);
431 setOperationAction(ISD::FMA
, MVT::f64
, Legal
);
432 setOperationAction(ISD::FMA
, MVT::f32
, Legal
);
435 if (Subtarget
.hasSPE())
436 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f32
, Expand
);
438 setOperationAction(ISD::GET_ROUNDING
, MVT::i32
, Custom
);
440 // If we're enabling GP optimizations, use hardware square root
441 if (!Subtarget
.hasFSQRT() &&
442 !(TM
.Options
.UnsafeFPMath
&& Subtarget
.hasFRSQRTE() &&
444 setOperationAction(ISD::FSQRT
, MVT::f64
, Expand
);
446 if (!Subtarget
.hasFSQRT() &&
447 !(TM
.Options
.UnsafeFPMath
&& Subtarget
.hasFRSQRTES() &&
448 Subtarget
.hasFRES()))
449 setOperationAction(ISD::FSQRT
, MVT::f32
, Expand
);
451 if (Subtarget
.hasFCPSGN()) {
452 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Legal
);
453 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Legal
);
455 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Expand
);
456 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Expand
);
459 if (Subtarget
.hasFPRND()) {
460 setOperationAction(ISD::FFLOOR
, MVT::f64
, Legal
);
461 setOperationAction(ISD::FCEIL
, MVT::f64
, Legal
);
462 setOperationAction(ISD::FTRUNC
, MVT::f64
, Legal
);
463 setOperationAction(ISD::FROUND
, MVT::f64
, Legal
);
465 setOperationAction(ISD::FFLOOR
, MVT::f32
, Legal
);
466 setOperationAction(ISD::FCEIL
, MVT::f32
, Legal
);
467 setOperationAction(ISD::FTRUNC
, MVT::f32
, Legal
);
468 setOperationAction(ISD::FROUND
, MVT::f32
, Legal
);
471 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
472 // instruction xxbrd to speed up scalar BSWAP64.
473 if (Subtarget
.isISA3_1()) {
474 setOperationAction(ISD::BSWAP
, MVT::i32
, Legal
);
475 setOperationAction(ISD::BSWAP
, MVT::i64
, Legal
);
477 setOperationAction(ISD::BSWAP
, MVT::i32
, Expand
);
479 ISD::BSWAP
, MVT::i64
,
480 (Subtarget
.hasP9Vector() && Subtarget
.isPPC64()) ? Custom
: Expand
);
483 // CTPOP or CTTZ were introduced in P8/P9 respectively
484 if (Subtarget
.isISA3_0()) {
485 setOperationAction(ISD::CTTZ
, MVT::i32
, Legal
);
486 setOperationAction(ISD::CTTZ
, MVT::i64
, Legal
);
488 setOperationAction(ISD::CTTZ
, MVT::i32
, Expand
);
489 setOperationAction(ISD::CTTZ
, MVT::i64
, Expand
);
492 if (Subtarget
.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast
) {
493 setOperationAction(ISD::CTPOP
, MVT::i32
, Legal
);
494 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
496 setOperationAction(ISD::CTPOP
, MVT::i32
, Expand
);
497 setOperationAction(ISD::CTPOP
, MVT::i64
, Expand
);
500 // PowerPC does not have ROTR
501 setOperationAction(ISD::ROTR
, MVT::i32
, Expand
);
502 setOperationAction(ISD::ROTR
, MVT::i64
, Expand
);
504 if (!Subtarget
.useCRBits()) {
505 // PowerPC does not have Select
506 setOperationAction(ISD::SELECT
, MVT::i32
, Expand
);
507 setOperationAction(ISD::SELECT
, MVT::i64
, Expand
);
508 setOperationAction(ISD::SELECT
, MVT::f32
, Expand
);
509 setOperationAction(ISD::SELECT
, MVT::f64
, Expand
);
512 // PowerPC wants to turn select_cc of FP into fsel when possible.
513 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
514 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Custom
);
516 // PowerPC wants to optimize integer setcc a bit
517 if (!Subtarget
.useCRBits())
518 setOperationAction(ISD::SETCC
, MVT::i32
, Custom
);
520 if (Subtarget
.hasFPU()) {
521 setOperationAction(ISD::STRICT_FSETCC
, MVT::f32
, Legal
);
522 setOperationAction(ISD::STRICT_FSETCC
, MVT::f64
, Legal
);
523 setOperationAction(ISD::STRICT_FSETCC
, MVT::f128
, Legal
);
525 setOperationAction(ISD::STRICT_FSETCCS
, MVT::f32
, Legal
);
526 setOperationAction(ISD::STRICT_FSETCCS
, MVT::f64
, Legal
);
527 setOperationAction(ISD::STRICT_FSETCCS
, MVT::f128
, Legal
);
530 // PowerPC does not have BRCOND which requires SetCC
531 if (!Subtarget
.useCRBits())
532 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
534 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
536 if (Subtarget
.hasSPE()) {
537 // SPE has built-in conversions
538 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::i32
, Legal
);
539 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::i32
, Legal
);
540 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::i32
, Legal
);
541 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Legal
);
542 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Legal
);
543 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Legal
);
545 // SPE supports signaling compare of f32/f64.
546 setOperationAction(ISD::STRICT_FSETCCS
, MVT::f32
, Legal
);
547 setOperationAction(ISD::STRICT_FSETCCS
, MVT::f64
, Legal
);
549 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
550 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::i32
, Custom
);
551 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
553 // PowerPC does not have [U|S]INT_TO_FP
554 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::i32
, Expand
);
555 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::i32
, Expand
);
556 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Expand
);
557 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Expand
);
560 if (Subtarget
.hasDirectMove() && isPPC64
) {
561 setOperationAction(ISD::BITCAST
, MVT::f32
, Legal
);
562 setOperationAction(ISD::BITCAST
, MVT::i32
, Legal
);
563 setOperationAction(ISD::BITCAST
, MVT::i64
, Legal
);
564 setOperationAction(ISD::BITCAST
, MVT::f64
, Legal
);
565 if (TM
.Options
.UnsafeFPMath
) {
566 setOperationAction(ISD::LRINT
, MVT::f64
, Legal
);
567 setOperationAction(ISD::LRINT
, MVT::f32
, Legal
);
568 setOperationAction(ISD::LLRINT
, MVT::f64
, Legal
);
569 setOperationAction(ISD::LLRINT
, MVT::f32
, Legal
);
570 setOperationAction(ISD::LROUND
, MVT::f64
, Legal
);
571 setOperationAction(ISD::LROUND
, MVT::f32
, Legal
);
572 setOperationAction(ISD::LLROUND
, MVT::f64
, Legal
);
573 setOperationAction(ISD::LLROUND
, MVT::f32
, Legal
);
576 setOperationAction(ISD::BITCAST
, MVT::f32
, Expand
);
577 setOperationAction(ISD::BITCAST
, MVT::i32
, Expand
);
578 setOperationAction(ISD::BITCAST
, MVT::i64
, Expand
);
579 setOperationAction(ISD::BITCAST
, MVT::f64
, Expand
);
582 // We cannot sextinreg(i1). Expand to shifts.
583 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
585 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
586 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
587 // support continuation, user-level threading, and etc.. As a result, no
588 // other SjLj exception interfaces are implemented and please don't build
589 // your own exception handling based on them.
590 // LLVM/Clang supports zero-cost DWARF exception handling.
591 setOperationAction(ISD::EH_SJLJ_SETJMP
, MVT::i32
, Custom
);
592 setOperationAction(ISD::EH_SJLJ_LONGJMP
, MVT::Other
, Custom
);
594 // We want to legalize GlobalAddress and ConstantPool nodes into the
595 // appropriate instructions to materialize the address.
596 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
597 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
598 setOperationAction(ISD::BlockAddress
, MVT::i32
, Custom
);
599 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
600 setOperationAction(ISD::JumpTable
, MVT::i32
, Custom
);
601 setOperationAction(ISD::GlobalAddress
, MVT::i64
, Custom
);
602 setOperationAction(ISD::GlobalTLSAddress
, MVT::i64
, Custom
);
603 setOperationAction(ISD::BlockAddress
, MVT::i64
, Custom
);
604 setOperationAction(ISD::ConstantPool
, MVT::i64
, Custom
);
605 setOperationAction(ISD::JumpTable
, MVT::i64
, Custom
);
608 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
610 // TRAMPOLINE is custom lowered.
611 setOperationAction(ISD::INIT_TRAMPOLINE
, MVT::Other
, Custom
);
612 setOperationAction(ISD::ADJUST_TRAMPOLINE
, MVT::Other
, Custom
);
614 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
615 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
617 if (Subtarget
.is64BitELFABI()) {
618 // VAARG always uses double-word chunks, so promote anything smaller.
619 setOperationAction(ISD::VAARG
, MVT::i1
, Promote
);
620 AddPromotedToType(ISD::VAARG
, MVT::i1
, MVT::i64
);
621 setOperationAction(ISD::VAARG
, MVT::i8
, Promote
);
622 AddPromotedToType(ISD::VAARG
, MVT::i8
, MVT::i64
);
623 setOperationAction(ISD::VAARG
, MVT::i16
, Promote
);
624 AddPromotedToType(ISD::VAARG
, MVT::i16
, MVT::i64
);
625 setOperationAction(ISD::VAARG
, MVT::i32
, Promote
);
626 AddPromotedToType(ISD::VAARG
, MVT::i32
, MVT::i64
);
627 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
628 } else if (Subtarget
.is32BitELFABI()) {
629 // VAARG is custom lowered with the 32-bit SVR4 ABI.
630 setOperationAction(ISD::VAARG
, MVT::Other
, Custom
);
631 setOperationAction(ISD::VAARG
, MVT::i64
, Custom
);
633 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
635 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
636 if (Subtarget
.is32BitELFABI())
637 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
639 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
641 // Use the default implementation.
642 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
643 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
644 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Custom
);
645 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
646 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Custom
);
647 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, MVT::i32
, Custom
);
648 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, MVT::i64
, Custom
);
649 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i32
, Custom
);
650 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i64
, Custom
);
652 // We want to custom lower some of our intrinsics.
653 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
654 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::f64
, Custom
);
655 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::ppcf128
, Custom
);
656 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::v4f32
, Custom
);
657 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::v2f64
, Custom
);
659 // To handle counter-based loop conditions.
660 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::i1
, Custom
);
662 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i8
, Custom
);
663 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i16
, Custom
);
664 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i32
, Custom
);
665 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
667 // Comparisons that require checking two conditions.
668 if (Subtarget
.hasSPE()) {
669 setCondCodeAction(ISD::SETO
, MVT::f32
, Expand
);
670 setCondCodeAction(ISD::SETO
, MVT::f64
, Expand
);
671 setCondCodeAction(ISD::SETUO
, MVT::f32
, Expand
);
672 setCondCodeAction(ISD::SETUO
, MVT::f64
, Expand
);
674 setCondCodeAction(ISD::SETULT
, MVT::f32
, Expand
);
675 setCondCodeAction(ISD::SETULT
, MVT::f64
, Expand
);
676 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
677 setCondCodeAction(ISD::SETUGT
, MVT::f64
, Expand
);
678 setCondCodeAction(ISD::SETUEQ
, MVT::f32
, Expand
);
679 setCondCodeAction(ISD::SETUEQ
, MVT::f64
, Expand
);
680 setCondCodeAction(ISD::SETOGE
, MVT::f32
, Expand
);
681 setCondCodeAction(ISD::SETOGE
, MVT::f64
, Expand
);
682 setCondCodeAction(ISD::SETOLE
, MVT::f32
, Expand
);
683 setCondCodeAction(ISD::SETOLE
, MVT::f64
, Expand
);
684 setCondCodeAction(ISD::SETONE
, MVT::f32
, Expand
);
685 setCondCodeAction(ISD::SETONE
, MVT::f64
, Expand
);
687 setOperationAction(ISD::STRICT_FP_EXTEND
, MVT::f32
, Legal
);
688 setOperationAction(ISD::STRICT_FP_EXTEND
, MVT::f64
, Legal
);
690 if (Subtarget
.has64BitSupport()) {
691 // They also have instructions for converting between i64 and fp.
692 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::i64
, Custom
);
693 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::i64
, Expand
);
694 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::i64
, Custom
);
695 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::i64
, Expand
);
696 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
697 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Expand
);
698 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
699 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
700 // This is just the low 32 bits of a (signed) fp->i64 conversion.
701 // We cannot do this with Promote because i64 is not a legal type.
702 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::i32
, Custom
);
703 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
705 if (Subtarget
.hasLFIWAX() || Subtarget
.isPPC64()) {
706 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
707 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::i32
, Custom
);
710 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
711 if (Subtarget
.hasSPE()) {
712 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::i32
, Legal
);
713 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Legal
);
715 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::i32
, Expand
);
716 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Expand
);
720 // With the instructions enabled under FPCVT, we can do everything.
721 if (Subtarget
.hasFPCVT()) {
722 if (Subtarget
.has64BitSupport()) {
723 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::i64
, Custom
);
724 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::i64
, Custom
);
725 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::i64
, Custom
);
726 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::i64
, Custom
);
727 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
728 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Custom
);
729 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
730 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Custom
);
733 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::i32
, Custom
);
734 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::i32
, Custom
);
735 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::i32
, Custom
);
736 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::i32
, Custom
);
737 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
738 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
739 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
740 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Custom
);
743 if (Subtarget
.use64BitRegs()) {
744 // 64-bit PowerPC implementations can support i64 types directly
745 addRegisterClass(MVT::i64
, &PPC::G8RCRegClass
);
746 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
747 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
748 // 64-bit PowerPC wants to expand i128 shifts itself.
749 setOperationAction(ISD::SHL_PARTS
, MVT::i64
, Custom
);
750 setOperationAction(ISD::SRA_PARTS
, MVT::i64
, Custom
);
751 setOperationAction(ISD::SRL_PARTS
, MVT::i64
, Custom
);
753 // 32-bit PowerPC wants to expand i64 shifts itself.
754 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Custom
);
755 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Custom
);
756 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Custom
);
759 // PowerPC has better expansions for funnel shifts than the generic
760 // TargetLowering::expandFunnelShift.
761 if (Subtarget
.has64BitSupport()) {
762 setOperationAction(ISD::FSHL
, MVT::i64
, Custom
);
763 setOperationAction(ISD::FSHR
, MVT::i64
, Custom
);
765 setOperationAction(ISD::FSHL
, MVT::i32
, Custom
);
766 setOperationAction(ISD::FSHR
, MVT::i32
, Custom
);
768 if (Subtarget
.hasVSX()) {
769 setOperationAction(ISD::FMAXNUM_IEEE
, MVT::f64
, Legal
);
770 setOperationAction(ISD::FMAXNUM_IEEE
, MVT::f32
, Legal
);
771 setOperationAction(ISD::FMINNUM_IEEE
, MVT::f64
, Legal
);
772 setOperationAction(ISD::FMINNUM_IEEE
, MVT::f32
, Legal
);
775 if (Subtarget
.hasAltivec()) {
776 for (MVT VT
: { MVT::v16i8
, MVT::v8i16
, MVT::v4i32
}) {
777 setOperationAction(ISD::SADDSAT
, VT
, Legal
);
778 setOperationAction(ISD::SSUBSAT
, VT
, Legal
);
779 setOperationAction(ISD::UADDSAT
, VT
, Legal
);
780 setOperationAction(ISD::USUBSAT
, VT
, Legal
);
782 // First set operation action for all vector types to expand. Then we
783 // will selectively turn on ones that can be effectively codegen'd.
784 for (MVT VT
: MVT::fixedlen_vector_valuetypes()) {
785 // add/sub are legal for all supported vector VT's.
786 setOperationAction(ISD::ADD
, VT
, Legal
);
787 setOperationAction(ISD::SUB
, VT
, Legal
);
789 // For v2i64, these are only valid with P8Vector. This is corrected after
791 if (VT
.getSizeInBits() <= 128 && VT
.getScalarSizeInBits() <= 64) {
792 setOperationAction(ISD::SMAX
, VT
, Legal
);
793 setOperationAction(ISD::SMIN
, VT
, Legal
);
794 setOperationAction(ISD::UMAX
, VT
, Legal
);
795 setOperationAction(ISD::UMIN
, VT
, Legal
);
798 setOperationAction(ISD::SMAX
, VT
, Expand
);
799 setOperationAction(ISD::SMIN
, VT
, Expand
);
800 setOperationAction(ISD::UMAX
, VT
, Expand
);
801 setOperationAction(ISD::UMIN
, VT
, Expand
);
804 if (Subtarget
.hasVSX()) {
805 setOperationAction(ISD::FMAXNUM
, VT
, Legal
);
806 setOperationAction(ISD::FMINNUM
, VT
, Legal
);
809 // Vector instructions introduced in P8
810 if (Subtarget
.hasP8Altivec() && (VT
.SimpleTy
!= MVT::v1i128
)) {
811 setOperationAction(ISD::CTPOP
, VT
, Legal
);
812 setOperationAction(ISD::CTLZ
, VT
, Legal
);
815 setOperationAction(ISD::CTPOP
, VT
, Expand
);
816 setOperationAction(ISD::CTLZ
, VT
, Expand
);
819 // Vector instructions introduced in P9
820 if (Subtarget
.hasP9Altivec() && (VT
.SimpleTy
!= MVT::v1i128
))
821 setOperationAction(ISD::CTTZ
, VT
, Legal
);
823 setOperationAction(ISD::CTTZ
, VT
, Expand
);
825 // We promote all shuffles to v16i8.
826 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Promote
);
827 AddPromotedToType (ISD::VECTOR_SHUFFLE
, VT
, MVT::v16i8
);
829 // We promote all non-typed operations to v4i32.
830 setOperationAction(ISD::AND
, VT
, Promote
);
831 AddPromotedToType (ISD::AND
, VT
, MVT::v4i32
);
832 setOperationAction(ISD::OR
, VT
, Promote
);
833 AddPromotedToType (ISD::OR
, VT
, MVT::v4i32
);
834 setOperationAction(ISD::XOR
, VT
, Promote
);
835 AddPromotedToType (ISD::XOR
, VT
, MVT::v4i32
);
836 setOperationAction(ISD::LOAD
, VT
, Promote
);
837 AddPromotedToType (ISD::LOAD
, VT
, MVT::v4i32
);
838 setOperationAction(ISD::SELECT
, VT
, Promote
);
839 AddPromotedToType (ISD::SELECT
, VT
, MVT::v4i32
);
840 setOperationAction(ISD::VSELECT
, VT
, Legal
);
841 setOperationAction(ISD::SELECT_CC
, VT
, Promote
);
842 AddPromotedToType (ISD::SELECT_CC
, VT
, MVT::v4i32
);
843 setOperationAction(ISD::STORE
, VT
, Promote
);
844 AddPromotedToType (ISD::STORE
, VT
, MVT::v4i32
);
846 // No other operations are legal.
847 setOperationAction(ISD::MUL
, VT
, Expand
);
848 setOperationAction(ISD::SDIV
, VT
, Expand
);
849 setOperationAction(ISD::SREM
, VT
, Expand
);
850 setOperationAction(ISD::UDIV
, VT
, Expand
);
851 setOperationAction(ISD::UREM
, VT
, Expand
);
852 setOperationAction(ISD::FDIV
, VT
, Expand
);
853 setOperationAction(ISD::FREM
, VT
, Expand
);
854 setOperationAction(ISD::FNEG
, VT
, Expand
);
855 setOperationAction(ISD::FSQRT
, VT
, Expand
);
856 setOperationAction(ISD::FLOG
, VT
, Expand
);
857 setOperationAction(ISD::FLOG10
, VT
, Expand
);
858 setOperationAction(ISD::FLOG2
, VT
, Expand
);
859 setOperationAction(ISD::FEXP
, VT
, Expand
);
860 setOperationAction(ISD::FEXP2
, VT
, Expand
);
861 setOperationAction(ISD::FSIN
, VT
, Expand
);
862 setOperationAction(ISD::FCOS
, VT
, Expand
);
863 setOperationAction(ISD::FABS
, VT
, Expand
);
864 setOperationAction(ISD::FFLOOR
, VT
, Expand
);
865 setOperationAction(ISD::FCEIL
, VT
, Expand
);
866 setOperationAction(ISD::FTRUNC
, VT
, Expand
);
867 setOperationAction(ISD::FRINT
, VT
, Expand
);
868 setOperationAction(ISD::FLDEXP
, VT
, Expand
);
869 setOperationAction(ISD::FNEARBYINT
, VT
, Expand
);
870 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Expand
);
871 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Expand
);
872 setOperationAction(ISD::BUILD_VECTOR
, VT
, Expand
);
873 setOperationAction(ISD::MULHU
, VT
, Expand
);
874 setOperationAction(ISD::MULHS
, VT
, Expand
);
875 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
876 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
877 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
878 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
879 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Expand
);
880 setOperationAction(ISD::FPOW
, VT
, Expand
);
881 setOperationAction(ISD::BSWAP
, VT
, Expand
);
882 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Expand
);
883 setOperationAction(ISD::ROTL
, VT
, Expand
);
884 setOperationAction(ISD::ROTR
, VT
, Expand
);
886 for (MVT InnerVT
: MVT::fixedlen_vector_valuetypes()) {
887 setTruncStoreAction(VT
, InnerVT
, Expand
);
888 setLoadExtAction(ISD::SEXTLOAD
, VT
, InnerVT
, Expand
);
889 setLoadExtAction(ISD::ZEXTLOAD
, VT
, InnerVT
, Expand
);
890 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
893 setOperationAction(ISD::SELECT_CC
, MVT::v4i32
, Expand
);
894 if (!Subtarget
.hasP8Vector()) {
895 setOperationAction(ISD::SMAX
, MVT::v2i64
, Expand
);
896 setOperationAction(ISD::SMIN
, MVT::v2i64
, Expand
);
897 setOperationAction(ISD::UMAX
, MVT::v2i64
, Expand
);
898 setOperationAction(ISD::UMIN
, MVT::v2i64
, Expand
);
901 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
902 // with merges, splats, etc.
903 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v16i8
, Custom
);
905 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
906 // are cheap, so handle them before they get expanded to scalar.
907 setOperationAction(ISD::TRUNCATE
, MVT::v8i8
, Custom
);
908 setOperationAction(ISD::TRUNCATE
, MVT::v4i8
, Custom
);
909 setOperationAction(ISD::TRUNCATE
, MVT::v2i8
, Custom
);
910 setOperationAction(ISD::TRUNCATE
, MVT::v4i16
, Custom
);
911 setOperationAction(ISD::TRUNCATE
, MVT::v2i16
, Custom
);
913 setOperationAction(ISD::AND
, MVT::v4i32
, Legal
);
914 setOperationAction(ISD::OR
, MVT::v4i32
, Legal
);
915 setOperationAction(ISD::XOR
, MVT::v4i32
, Legal
);
916 setOperationAction(ISD::LOAD
, MVT::v4i32
, Legal
);
917 setOperationAction(ISD::SELECT
, MVT::v4i32
,
918 Subtarget
.useCRBits() ? Legal
: Expand
);
919 setOperationAction(ISD::STORE
, MVT::v4i32
, Legal
);
920 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v4i32
, Legal
);
921 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v4i32
, Legal
);
922 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::v4i32
, Legal
);
923 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::v4i32
, Legal
);
924 setOperationAction(ISD::FP_TO_SINT
, MVT::v4i32
, Legal
);
925 setOperationAction(ISD::FP_TO_UINT
, MVT::v4i32
, Legal
);
926 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i32
, Legal
);
927 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i32
, Legal
);
928 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
929 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
930 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
931 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Legal
);
933 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
934 setOperationAction(ISD::ROTL
, MVT::v1i128
, Custom
);
935 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
936 if (Subtarget
.hasAltivec())
937 for (auto VT
: {MVT::v4i32
, MVT::v8i16
, MVT::v16i8
})
938 setOperationAction(ISD::ROTL
, VT
, Legal
);
939 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
940 if (Subtarget
.hasP8Altivec())
941 setOperationAction(ISD::ROTL
, MVT::v2i64
, Legal
);
943 addRegisterClass(MVT::v4f32
, &PPC::VRRCRegClass
);
944 addRegisterClass(MVT::v4i32
, &PPC::VRRCRegClass
);
945 addRegisterClass(MVT::v8i16
, &PPC::VRRCRegClass
);
946 addRegisterClass(MVT::v16i8
, &PPC::VRRCRegClass
);
948 setOperationAction(ISD::MUL
, MVT::v4f32
, Legal
);
949 setOperationAction(ISD::FMA
, MVT::v4f32
, Legal
);
951 if (Subtarget
.hasVSX()) {
952 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
953 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
954 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v2f64
, Custom
);
957 if (Subtarget
.hasP8Altivec())
958 setOperationAction(ISD::MUL
, MVT::v4i32
, Legal
);
960 setOperationAction(ISD::MUL
, MVT::v4i32
, Custom
);
962 if (Subtarget
.isISA3_1()) {
963 setOperationAction(ISD::MUL
, MVT::v2i64
, Legal
);
964 setOperationAction(ISD::MULHS
, MVT::v2i64
, Legal
);
965 setOperationAction(ISD::MULHU
, MVT::v2i64
, Legal
);
966 setOperationAction(ISD::MULHS
, MVT::v4i32
, Legal
);
967 setOperationAction(ISD::MULHU
, MVT::v4i32
, Legal
);
968 setOperationAction(ISD::UDIV
, MVT::v2i64
, Legal
);
969 setOperationAction(ISD::SDIV
, MVT::v2i64
, Legal
);
970 setOperationAction(ISD::UDIV
, MVT::v4i32
, Legal
);
971 setOperationAction(ISD::SDIV
, MVT::v4i32
, Legal
);
972 setOperationAction(ISD::UREM
, MVT::v2i64
, Legal
);
973 setOperationAction(ISD::SREM
, MVT::v2i64
, Legal
);
974 setOperationAction(ISD::UREM
, MVT::v4i32
, Legal
);
975 setOperationAction(ISD::SREM
, MVT::v4i32
, Legal
);
976 setOperationAction(ISD::UREM
, MVT::v1i128
, Legal
);
977 setOperationAction(ISD::SREM
, MVT::v1i128
, Legal
);
978 setOperationAction(ISD::UDIV
, MVT::v1i128
, Legal
);
979 setOperationAction(ISD::SDIV
, MVT::v1i128
, Legal
);
980 setOperationAction(ISD::ROTL
, MVT::v1i128
, Legal
);
983 setOperationAction(ISD::MUL
, MVT::v8i16
, Legal
);
984 setOperationAction(ISD::MUL
, MVT::v16i8
, Custom
);
986 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Custom
);
987 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i32
, Custom
);
989 setOperationAction(ISD::BUILD_VECTOR
, MVT::v16i8
, Custom
);
990 setOperationAction(ISD::BUILD_VECTOR
, MVT::v8i16
, Custom
);
991 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4i32
, Custom
);
992 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f32
, Custom
);
994 // Altivec does not contain unordered floating-point compare instructions
995 setCondCodeAction(ISD::SETUO
, MVT::v4f32
, Expand
);
996 setCondCodeAction(ISD::SETUEQ
, MVT::v4f32
, Expand
);
997 setCondCodeAction(ISD::SETO
, MVT::v4f32
, Expand
);
998 setCondCodeAction(ISD::SETONE
, MVT::v4f32
, Expand
);
1000 if (Subtarget
.hasVSX()) {
1001 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2f64
, Legal
);
1002 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Legal
);
1003 if (Subtarget
.hasP8Vector()) {
1004 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
1005 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Legal
);
1007 if (Subtarget
.hasDirectMove() && isPPC64
) {
1008 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v16i8
, Legal
);
1009 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v8i16
, Legal
);
1010 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i32
, Legal
);
1011 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2i64
, Legal
);
1012 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v16i8
, Legal
);
1013 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v8i16
, Legal
);
1014 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4i32
, Legal
);
1015 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2i64
, Legal
);
1017 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Legal
);
1019 // The nearbyint variants are not allowed to raise the inexact exception
1020 // so we can only code-gen them with unsafe math.
1021 if (TM
.Options
.UnsafeFPMath
) {
1022 setOperationAction(ISD::FNEARBYINT
, MVT::f64
, Legal
);
1023 setOperationAction(ISD::FNEARBYINT
, MVT::f32
, Legal
);
1026 setOperationAction(ISD::FFLOOR
, MVT::v2f64
, Legal
);
1027 setOperationAction(ISD::FCEIL
, MVT::v2f64
, Legal
);
1028 setOperationAction(ISD::FTRUNC
, MVT::v2f64
, Legal
);
1029 setOperationAction(ISD::FNEARBYINT
, MVT::v2f64
, Legal
);
1030 setOperationAction(ISD::FRINT
, MVT::v2f64
, Legal
);
1031 setOperationAction(ISD::FROUND
, MVT::v2f64
, Legal
);
1032 setOperationAction(ISD::FROUND
, MVT::f64
, Legal
);
1033 setOperationAction(ISD::FRINT
, MVT::f64
, Legal
);
1035 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Legal
);
1036 setOperationAction(ISD::FRINT
, MVT::v4f32
, Legal
);
1037 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
1038 setOperationAction(ISD::FROUND
, MVT::f32
, Legal
);
1039 setOperationAction(ISD::FRINT
, MVT::f32
, Legal
);
1041 setOperationAction(ISD::MUL
, MVT::v2f64
, Legal
);
1042 setOperationAction(ISD::FMA
, MVT::v2f64
, Legal
);
1044 setOperationAction(ISD::FDIV
, MVT::v2f64
, Legal
);
1045 setOperationAction(ISD::FSQRT
, MVT::v2f64
, Legal
);
1047 // Share the Altivec comparison restrictions.
1048 setCondCodeAction(ISD::SETUO
, MVT::v2f64
, Expand
);
1049 setCondCodeAction(ISD::SETUEQ
, MVT::v2f64
, Expand
);
1050 setCondCodeAction(ISD::SETO
, MVT::v2f64
, Expand
);
1051 setCondCodeAction(ISD::SETONE
, MVT::v2f64
, Expand
);
1053 setOperationAction(ISD::LOAD
, MVT::v2f64
, Legal
);
1054 setOperationAction(ISD::STORE
, MVT::v2f64
, Legal
);
1056 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2f64
, Custom
);
1058 if (Subtarget
.hasP8Vector())
1059 addRegisterClass(MVT::f32
, &PPC::VSSRCRegClass
);
1061 addRegisterClass(MVT::f64
, &PPC::VSFRCRegClass
);
1063 addRegisterClass(MVT::v4i32
, &PPC::VSRCRegClass
);
1064 addRegisterClass(MVT::v4f32
, &PPC::VSRCRegClass
);
1065 addRegisterClass(MVT::v2f64
, &PPC::VSRCRegClass
);
1067 if (Subtarget
.hasP8Altivec()) {
1068 setOperationAction(ISD::SHL
, MVT::v2i64
, Legal
);
1069 setOperationAction(ISD::SRA
, MVT::v2i64
, Legal
);
1070 setOperationAction(ISD::SRL
, MVT::v2i64
, Legal
);
1072 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1073 // SRL, but not for SRA because of the instructions available:
1074 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1076 setOperationAction(ISD::SHL
, MVT::v1i128
, Expand
);
1077 setOperationAction(ISD::SRL
, MVT::v1i128
, Expand
);
1078 setOperationAction(ISD::SRA
, MVT::v1i128
, Expand
);
1080 setOperationAction(ISD::SETCC
, MVT::v2i64
, Legal
);
1083 setOperationAction(ISD::SHL
, MVT::v2i64
, Expand
);
1084 setOperationAction(ISD::SRA
, MVT::v2i64
, Expand
);
1085 setOperationAction(ISD::SRL
, MVT::v2i64
, Expand
);
1087 setOperationAction(ISD::SETCC
, MVT::v2i64
, Custom
);
1089 // VSX v2i64 only supports non-arithmetic operations.
1090 setOperationAction(ISD::ADD
, MVT::v2i64
, Expand
);
1091 setOperationAction(ISD::SUB
, MVT::v2i64
, Expand
);
1094 if (Subtarget
.isISA3_1())
1095 setOperationAction(ISD::SETCC
, MVT::v1i128
, Legal
);
1097 setOperationAction(ISD::SETCC
, MVT::v1i128
, Expand
);
1099 setOperationAction(ISD::LOAD
, MVT::v2i64
, Promote
);
1100 AddPromotedToType (ISD::LOAD
, MVT::v2i64
, MVT::v2f64
);
1101 setOperationAction(ISD::STORE
, MVT::v2i64
, Promote
);
1102 AddPromotedToType (ISD::STORE
, MVT::v2i64
, MVT::v2f64
);
1104 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2i64
, Custom
);
1106 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::v2i64
, Legal
);
1107 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::v2i64
, Legal
);
1108 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v2i64
, Legal
);
1109 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v2i64
, Legal
);
1110 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i64
, Legal
);
1111 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Legal
);
1112 setOperationAction(ISD::FP_TO_SINT
, MVT::v2i64
, Legal
);
1113 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Legal
);
1115 // Custom handling for partial vectors of integers converted to
1116 // floating point. We already have optimal handling for v2i32 through
1117 // the DAG combine, so those aren't necessary.
1118 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::v2i8
, Custom
);
1119 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::v4i8
, Custom
);
1120 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::v2i16
, Custom
);
1121 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::v4i16
, Custom
);
1122 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::v2i8
, Custom
);
1123 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::v4i8
, Custom
);
1124 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::v2i16
, Custom
);
1125 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::v4i16
, Custom
);
1126 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i8
, Custom
);
1127 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i8
, Custom
);
1128 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i16
, Custom
);
1129 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i16
, Custom
);
1130 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i8
, Custom
);
1131 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i8
, Custom
);
1132 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i16
, Custom
);
1133 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i16
, Custom
);
1135 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
1136 setOperationAction(ISD::FNEG
, MVT::v2f64
, Legal
);
1137 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
1138 setOperationAction(ISD::FABS
, MVT::v2f64
, Legal
);
1139 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f32
, Legal
);
1140 setOperationAction(ISD::FCOPYSIGN
, MVT::v2f64
, Legal
);
1142 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2i64
, Custom
);
1143 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2f64
, Custom
);
1145 // Handle constrained floating-point operations of vector.
1146 // The predictor is `hasVSX` because altivec instruction has
1147 // no exception but VSX vector instruction has.
1148 setOperationAction(ISD::STRICT_FADD
, MVT::v4f32
, Legal
);
1149 setOperationAction(ISD::STRICT_FSUB
, MVT::v4f32
, Legal
);
1150 setOperationAction(ISD::STRICT_FMUL
, MVT::v4f32
, Legal
);
1151 setOperationAction(ISD::STRICT_FDIV
, MVT::v4f32
, Legal
);
1152 setOperationAction(ISD::STRICT_FMA
, MVT::v4f32
, Legal
);
1153 setOperationAction(ISD::STRICT_FSQRT
, MVT::v4f32
, Legal
);
1154 setOperationAction(ISD::STRICT_FMAXNUM
, MVT::v4f32
, Legal
);
1155 setOperationAction(ISD::STRICT_FMINNUM
, MVT::v4f32
, Legal
);
1156 setOperationAction(ISD::STRICT_FRINT
, MVT::v4f32
, Legal
);
1157 setOperationAction(ISD::STRICT_FFLOOR
, MVT::v4f32
, Legal
);
1158 setOperationAction(ISD::STRICT_FCEIL
, MVT::v4f32
, Legal
);
1159 setOperationAction(ISD::STRICT_FTRUNC
, MVT::v4f32
, Legal
);
1160 setOperationAction(ISD::STRICT_FROUND
, MVT::v4f32
, Legal
);
1162 setOperationAction(ISD::STRICT_FADD
, MVT::v2f64
, Legal
);
1163 setOperationAction(ISD::STRICT_FSUB
, MVT::v2f64
, Legal
);
1164 setOperationAction(ISD::STRICT_FMUL
, MVT::v2f64
, Legal
);
1165 setOperationAction(ISD::STRICT_FDIV
, MVT::v2f64
, Legal
);
1166 setOperationAction(ISD::STRICT_FMA
, MVT::v2f64
, Legal
);
1167 setOperationAction(ISD::STRICT_FSQRT
, MVT::v2f64
, Legal
);
1168 setOperationAction(ISD::STRICT_FMAXNUM
, MVT::v2f64
, Legal
);
1169 setOperationAction(ISD::STRICT_FMINNUM
, MVT::v2f64
, Legal
);
1170 setOperationAction(ISD::STRICT_FRINT
, MVT::v2f64
, Legal
);
1171 setOperationAction(ISD::STRICT_FFLOOR
, MVT::v2f64
, Legal
);
1172 setOperationAction(ISD::STRICT_FCEIL
, MVT::v2f64
, Legal
);
1173 setOperationAction(ISD::STRICT_FTRUNC
, MVT::v2f64
, Legal
);
1174 setOperationAction(ISD::STRICT_FROUND
, MVT::v2f64
, Legal
);
1176 addRegisterClass(MVT::v2i64
, &PPC::VSRCRegClass
);
1177 addRegisterClass(MVT::f128
, &PPC::VRRCRegClass
);
1179 for (MVT FPT
: MVT::fp_valuetypes())
1180 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, FPT
, Expand
);
1182 // Expand the SELECT to SELECT_CC
1183 setOperationAction(ISD::SELECT
, MVT::f128
, Expand
);
1185 setTruncStoreAction(MVT::f128
, MVT::f64
, Expand
);
1186 setTruncStoreAction(MVT::f128
, MVT::f32
, Expand
);
1188 // No implementation for these ops for PowerPC.
1189 setOperationAction(ISD::FSINCOS
, MVT::f128
, Expand
);
1190 setOperationAction(ISD::FSIN
, MVT::f128
, Expand
);
1191 setOperationAction(ISD::FCOS
, MVT::f128
, Expand
);
1192 setOperationAction(ISD::FPOW
, MVT::f128
, Expand
);
1193 setOperationAction(ISD::FPOWI
, MVT::f128
, Expand
);
1194 setOperationAction(ISD::FREM
, MVT::f128
, Expand
);
1197 if (Subtarget
.hasP8Altivec()) {
1198 addRegisterClass(MVT::v2i64
, &PPC::VRRCRegClass
);
1199 addRegisterClass(MVT::v1i128
, &PPC::VRRCRegClass
);
1202 if (Subtarget
.hasP9Vector()) {
1203 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i32
, Custom
);
1204 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
1206 // Test data class instructions store results in CR bits.
1207 if (Subtarget
.useCRBits()) {
1208 setOperationAction(ISD::IS_FPCLASS
, MVT::f32
, Custom
);
1209 setOperationAction(ISD::IS_FPCLASS
, MVT::f64
, Custom
);
1210 setOperationAction(ISD::IS_FPCLASS
, MVT::f128
, Custom
);
1213 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1214 // SRL, but not for SRA because of the instructions available:
1215 // VS{RL} and VS{RL}O.
1216 setOperationAction(ISD::SHL
, MVT::v1i128
, Legal
);
1217 setOperationAction(ISD::SRL
, MVT::v1i128
, Legal
);
1218 setOperationAction(ISD::SRA
, MVT::v1i128
, Expand
);
1220 setOperationAction(ISD::FADD
, MVT::f128
, Legal
);
1221 setOperationAction(ISD::FSUB
, MVT::f128
, Legal
);
1222 setOperationAction(ISD::FDIV
, MVT::f128
, Legal
);
1223 setOperationAction(ISD::FMUL
, MVT::f128
, Legal
);
1224 setOperationAction(ISD::FP_EXTEND
, MVT::f128
, Legal
);
1226 setOperationAction(ISD::FMA
, MVT::f128
, Legal
);
1227 setCondCodeAction(ISD::SETULT
, MVT::f128
, Expand
);
1228 setCondCodeAction(ISD::SETUGT
, MVT::f128
, Expand
);
1229 setCondCodeAction(ISD::SETUEQ
, MVT::f128
, Expand
);
1230 setCondCodeAction(ISD::SETOGE
, MVT::f128
, Expand
);
1231 setCondCodeAction(ISD::SETOLE
, MVT::f128
, Expand
);
1232 setCondCodeAction(ISD::SETONE
, MVT::f128
, Expand
);
1234 setOperationAction(ISD::FTRUNC
, MVT::f128
, Legal
);
1235 setOperationAction(ISD::FRINT
, MVT::f128
, Legal
);
1236 setOperationAction(ISD::FFLOOR
, MVT::f128
, Legal
);
1237 setOperationAction(ISD::FCEIL
, MVT::f128
, Legal
);
1238 setOperationAction(ISD::FNEARBYINT
, MVT::f128
, Legal
);
1239 setOperationAction(ISD::FROUND
, MVT::f128
, Legal
);
1241 setOperationAction(ISD::FP_ROUND
, MVT::f64
, Legal
);
1242 setOperationAction(ISD::FP_ROUND
, MVT::f32
, Legal
);
1243 setOperationAction(ISD::BITCAST
, MVT::i128
, Custom
);
1245 // Handle constrained floating-point operations of fp128
1246 setOperationAction(ISD::STRICT_FADD
, MVT::f128
, Legal
);
1247 setOperationAction(ISD::STRICT_FSUB
, MVT::f128
, Legal
);
1248 setOperationAction(ISD::STRICT_FMUL
, MVT::f128
, Legal
);
1249 setOperationAction(ISD::STRICT_FDIV
, MVT::f128
, Legal
);
1250 setOperationAction(ISD::STRICT_FMA
, MVT::f128
, Legal
);
1251 setOperationAction(ISD::STRICT_FSQRT
, MVT::f128
, Legal
);
1252 setOperationAction(ISD::STRICT_FP_EXTEND
, MVT::f128
, Legal
);
1253 setOperationAction(ISD::STRICT_FP_ROUND
, MVT::f64
, Legal
);
1254 setOperationAction(ISD::STRICT_FP_ROUND
, MVT::f32
, Legal
);
1255 setOperationAction(ISD::STRICT_FRINT
, MVT::f128
, Legal
);
1256 setOperationAction(ISD::STRICT_FNEARBYINT
, MVT::f128
, Legal
);
1257 setOperationAction(ISD::STRICT_FFLOOR
, MVT::f128
, Legal
);
1258 setOperationAction(ISD::STRICT_FCEIL
, MVT::f128
, Legal
);
1259 setOperationAction(ISD::STRICT_FTRUNC
, MVT::f128
, Legal
);
1260 setOperationAction(ISD::STRICT_FROUND
, MVT::f128
, Legal
);
1261 setOperationAction(ISD::FP_EXTEND
, MVT::v2f32
, Custom
);
1262 setOperationAction(ISD::BSWAP
, MVT::v8i16
, Legal
);
1263 setOperationAction(ISD::BSWAP
, MVT::v4i32
, Legal
);
1264 setOperationAction(ISD::BSWAP
, MVT::v2i64
, Legal
);
1265 setOperationAction(ISD::BSWAP
, MVT::v1i128
, Legal
);
1266 } else if (Subtarget
.hasVSX()) {
1267 setOperationAction(ISD::LOAD
, MVT::f128
, Promote
);
1268 setOperationAction(ISD::STORE
, MVT::f128
, Promote
);
1270 AddPromotedToType(ISD::LOAD
, MVT::f128
, MVT::v4i32
);
1271 AddPromotedToType(ISD::STORE
, MVT::f128
, MVT::v4i32
);
1273 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1274 // fp_to_uint and int_to_fp.
1275 setOperationAction(ISD::FADD
, MVT::f128
, LibCall
);
1276 setOperationAction(ISD::FSUB
, MVT::f128
, LibCall
);
1278 setOperationAction(ISD::FMUL
, MVT::f128
, Expand
);
1279 setOperationAction(ISD::FDIV
, MVT::f128
, Expand
);
1280 setOperationAction(ISD::FNEG
, MVT::f128
, Expand
);
1281 setOperationAction(ISD::FABS
, MVT::f128
, Expand
);
1282 setOperationAction(ISD::FSQRT
, MVT::f128
, Expand
);
1283 setOperationAction(ISD::FMA
, MVT::f128
, Expand
);
1284 setOperationAction(ISD::FCOPYSIGN
, MVT::f128
, Expand
);
1286 // Expand the fp_extend if the target type is fp128.
1287 setOperationAction(ISD::FP_EXTEND
, MVT::f128
, Expand
);
1288 setOperationAction(ISD::STRICT_FP_EXTEND
, MVT::f128
, Expand
);
1290 // Expand the fp_round if the source type is fp128.
1291 for (MVT VT
: {MVT::f32
, MVT::f64
}) {
1292 setOperationAction(ISD::FP_ROUND
, VT
, Custom
);
1293 setOperationAction(ISD::STRICT_FP_ROUND
, VT
, Custom
);
1296 setOperationAction(ISD::SETCC
, MVT::f128
, Custom
);
1297 setOperationAction(ISD::STRICT_FSETCC
, MVT::f128
, Custom
);
1298 setOperationAction(ISD::STRICT_FSETCCS
, MVT::f128
, Custom
);
1299 setOperationAction(ISD::BR_CC
, MVT::f128
, Expand
);
1301 // Lower following f128 select_cc pattern:
1302 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1303 setOperationAction(ISD::SELECT_CC
, MVT::f128
, Custom
);
1305 // We need to handle f128 SELECT_CC with integer result type.
1306 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
1307 setOperationAction(ISD::SELECT_CC
, MVT::i64
, isPPC64
? Custom
: Expand
);
1310 if (Subtarget
.hasP9Altivec()) {
1311 if (Subtarget
.isISA3_1()) {
1312 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v2i64
, Legal
);
1313 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v8i16
, Legal
);
1314 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v16i8
, Legal
);
1315 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i32
, Legal
);
1317 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v8i16
, Custom
);
1318 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v16i8
, Custom
);
1320 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v4i8
, Legal
);
1321 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v4i16
, Legal
);
1322 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v4i32
, Legal
);
1323 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i8
, Legal
);
1324 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i16
, Legal
);
1325 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i32
, Legal
);
1326 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i64
, Legal
);
1328 setOperationAction(ISD::ABDU
, MVT::v16i8
, Legal
);
1329 setOperationAction(ISD::ABDU
, MVT::v8i16
, Legal
);
1330 setOperationAction(ISD::ABDU
, MVT::v4i32
, Legal
);
1331 setOperationAction(ISD::ABDS
, MVT::v4i32
, Legal
);
1334 if (Subtarget
.hasP10Vector()) {
1335 setOperationAction(ISD::SELECT_CC
, MVT::f128
, Custom
);
1339 if (Subtarget
.pairedVectorMemops()) {
1340 addRegisterClass(MVT::v256i1
, &PPC::VSRpRCRegClass
);
1341 setOperationAction(ISD::LOAD
, MVT::v256i1
, Custom
);
1342 setOperationAction(ISD::STORE
, MVT::v256i1
, Custom
);
1344 if (Subtarget
.hasMMA()) {
1345 if (Subtarget
.isISAFuture())
1346 addRegisterClass(MVT::v512i1
, &PPC::WACCRCRegClass
);
1348 addRegisterClass(MVT::v512i1
, &PPC::UACCRCRegClass
);
1349 setOperationAction(ISD::LOAD
, MVT::v512i1
, Custom
);
1350 setOperationAction(ISD::STORE
, MVT::v512i1
, Custom
);
1351 setOperationAction(ISD::BUILD_VECTOR
, MVT::v512i1
, Custom
);
1354 if (Subtarget
.has64BitSupport())
1355 setOperationAction(ISD::PREFETCH
, MVT::Other
, Legal
);
1357 if (Subtarget
.isISA3_1())
1358 setOperationAction(ISD::SRA
, MVT::v1i128
, Legal
);
1360 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
, isPPC64
? Legal
: Custom
);
1363 setOperationAction(ISD::ATOMIC_LOAD
, MVT::i64
, Expand
);
1364 setOperationAction(ISD::ATOMIC_STORE
, MVT::i64
, Expand
);
1367 if (shouldInlineQuadwordAtomics()) {
1368 setOperationAction(ISD::ATOMIC_LOAD
, MVT::i128
, Custom
);
1369 setOperationAction(ISD::ATOMIC_STORE
, MVT::i128
, Custom
);
1370 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i128
, Custom
);
1373 setBooleanContents(ZeroOrOneBooleanContent
);
1375 if (Subtarget
.hasAltivec()) {
1376 // Altivec instructions set fields to all zeros or all ones.
1377 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
1380 if (shouldInlineQuadwordAtomics())
1381 setMaxAtomicSizeInBitsSupported(128);
1383 setMaxAtomicSizeInBitsSupported(64);
1385 setMaxAtomicSizeInBitsSupported(32);
1387 setStackPointerRegisterToSaveRestore(isPPC64
? PPC::X1
: PPC::R1
);
1389 // We have target-specific dag combine patterns for the following nodes:
1390 setTargetDAGCombine({ISD::AND
, ISD::ADD
, ISD::SHL
, ISD::SRA
, ISD::SRL
,
1391 ISD::MUL
, ISD::FMA
, ISD::SINT_TO_FP
, ISD::BUILD_VECTOR
});
1392 if (Subtarget
.hasFPCVT())
1393 setTargetDAGCombine(ISD::UINT_TO_FP
);
1394 setTargetDAGCombine({ISD::LOAD
, ISD::STORE
, ISD::BR_CC
});
1395 if (Subtarget
.useCRBits())
1396 setTargetDAGCombine(ISD::BRCOND
);
1397 setTargetDAGCombine({ISD::BSWAP
, ISD::INTRINSIC_WO_CHAIN
,
1398 ISD::INTRINSIC_W_CHAIN
, ISD::INTRINSIC_VOID
});
1400 setTargetDAGCombine({ISD::SIGN_EXTEND
, ISD::ZERO_EXTEND
, ISD::ANY_EXTEND
});
1402 setTargetDAGCombine({ISD::TRUNCATE
, ISD::VECTOR_SHUFFLE
});
1404 if (Subtarget
.useCRBits()) {
1405 setTargetDAGCombine({ISD::TRUNCATE
, ISD::SETCC
, ISD::SELECT_CC
});
1408 setLibcallName(RTLIB::LOG_F128
, "logf128");
1409 setLibcallName(RTLIB::LOG2_F128
, "log2f128");
1410 setLibcallName(RTLIB::LOG10_F128
, "log10f128");
1411 setLibcallName(RTLIB::EXP_F128
, "expf128");
1412 setLibcallName(RTLIB::EXP2_F128
, "exp2f128");
1413 setLibcallName(RTLIB::SIN_F128
, "sinf128");
1414 setLibcallName(RTLIB::COS_F128
, "cosf128");
1415 setLibcallName(RTLIB::SINCOS_F128
, "sincosf128");
1416 setLibcallName(RTLIB::POW_F128
, "powf128");
1417 setLibcallName(RTLIB::FMIN_F128
, "fminf128");
1418 setLibcallName(RTLIB::FMAX_F128
, "fmaxf128");
1419 setLibcallName(RTLIB::REM_F128
, "fmodf128");
1420 setLibcallName(RTLIB::SQRT_F128
, "sqrtf128");
1421 setLibcallName(RTLIB::CEIL_F128
, "ceilf128");
1422 setLibcallName(RTLIB::FLOOR_F128
, "floorf128");
1423 setLibcallName(RTLIB::TRUNC_F128
, "truncf128");
1424 setLibcallName(RTLIB::ROUND_F128
, "roundf128");
1425 setLibcallName(RTLIB::LROUND_F128
, "lroundf128");
1426 setLibcallName(RTLIB::LLROUND_F128
, "llroundf128");
1427 setLibcallName(RTLIB::RINT_F128
, "rintf128");
1428 setLibcallName(RTLIB::LRINT_F128
, "lrintf128");
1429 setLibcallName(RTLIB::LLRINT_F128
, "llrintf128");
1430 setLibcallName(RTLIB::NEARBYINT_F128
, "nearbyintf128");
1431 setLibcallName(RTLIB::FMA_F128
, "fmaf128");
1432 setLibcallName(RTLIB::FREXP_F128
, "frexpf128");
1434 if (Subtarget
.isAIXABI()) {
1435 setLibcallName(RTLIB::MEMCPY
, isPPC64
? "___memmove64" : "___memmove");
1436 setLibcallName(RTLIB::MEMMOVE
, isPPC64
? "___memmove64" : "___memmove");
1437 setLibcallName(RTLIB::MEMSET
, isPPC64
? "___memset64" : "___memset");
1438 setLibcallName(RTLIB::BZERO
, isPPC64
? "___bzero64" : "___bzero");
1441 // With 32 condition bits, we don't need to sink (and duplicate) compares
1442 // aggressively in CodeGenPrep.
1443 if (Subtarget
.useCRBits()) {
1444 setHasMultipleConditionRegisters();
1445 setJumpIsExpensive();
1448 // TODO: The default entry number is set to 64. This stops most jump table
1449 // generation on PPC. But it is good for current PPC HWs because the indirect
1450 // branch instruction mtctr to the jump table may lead to bad branch predict.
1451 // Re-evaluate this value on future HWs that can do better with mtctr.
1452 setMinimumJumpTableEntries(PPCMinimumJumpTableEntries
);
1454 setMinFunctionAlignment(Align(4));
1456 switch (Subtarget
.getCPUDirective()) {
1461 case PPC::DIR_E500mc
:
1462 case PPC::DIR_E5500
:
1465 case PPC::DIR_PWR5X
:
1467 case PPC::DIR_PWR6X
:
1471 case PPC::DIR_PWR10
:
1472 case PPC::DIR_PWR11
:
1473 case PPC::DIR_PWR_FUTURE
:
1474 setPrefLoopAlignment(Align(16));
1475 setPrefFunctionAlignment(Align(16));
1479 if (Subtarget
.enableMachineScheduler())
1480 setSchedulingPreference(Sched::Source
);
1482 setSchedulingPreference(Sched::Hybrid
);
1484 computeRegisterProperties(STI
.getRegisterInfo());
1486 // The Freescale cores do better with aggressive inlining of memcpy and
1487 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1488 if (Subtarget
.getCPUDirective() == PPC::DIR_E500mc
||
1489 Subtarget
.getCPUDirective() == PPC::DIR_E5500
) {
1490 MaxStoresPerMemset
= 32;
1491 MaxStoresPerMemsetOptSize
= 16;
1492 MaxStoresPerMemcpy
= 32;
1493 MaxStoresPerMemcpyOptSize
= 8;
1494 MaxStoresPerMemmove
= 32;
1495 MaxStoresPerMemmoveOptSize
= 8;
1496 } else if (Subtarget
.getCPUDirective() == PPC::DIR_A2
) {
1497 // The A2 also benefits from (very) aggressive inlining of memcpy and
1498 // friends. The overhead of a the function call, even when warm, can be
1499 // over one hundred cycles.
1500 MaxStoresPerMemset
= 128;
1501 MaxStoresPerMemcpy
= 128;
1502 MaxStoresPerMemmove
= 128;
1503 MaxLoadsPerMemcmp
= 128;
1505 MaxLoadsPerMemcmp
= 8;
1506 MaxLoadsPerMemcmpOptSize
= 4;
1509 IsStrictFPEnabled
= true;
1511 // Let the subtarget (CPU) decide if a predictable select is more expensive
1512 // than the corresponding branch. This information is used in CGP to decide
1513 // when to convert selects into branches.
1514 PredictableSelectIsExpensive
= Subtarget
.isPredictableSelectIsExpensive();
1516 GatherAllAliasesMaxDepth
= PPCGatherAllAliasesMaxDepth
;
1519 // *********************************** NOTE ************************************
1520 // For selecting load and store instructions, the addressing modes are defined
1521 // as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1522 // patterns to match the load the store instructions.
1524 // The TD definitions for the addressing modes correspond to their respective
1525 // Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1526 // on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1527 // address mode flags of a particular node. Afterwards, the computed address
1528 // flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1529 // addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1530 // accordingly, based on the preferred addressing mode.
1532 // Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1533 // MemOpFlags contains all the possible flags that can be used to compute the
1534 // optimal addressing mode for load and store instructions.
1535 // AddrMode contains all the possible load and store addressing modes available
1536 // on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1538 // When adding new load and store instructions, it is possible that new address
1539 // flags may need to be added into MemOpFlags, and a new addressing mode will
1540 // need to be added to AddrMode. An entry of the new addressing mode (consisting
1541 // of the minimal and main distinguishing address flags for the new load/store
1542 // instructions) will need to be added into initializeAddrModeMap() below.
1543 // Finally, when adding new addressing modes, the getAddrModeForFlags() will
1544 // need to be updated to account for selecting the optimal addressing mode.
1545 // *****************************************************************************
1546 /// Initialize the map that relates the different addressing modes of the load
1547 /// and store instructions to a set of flags. This ensures the load/store
1548 /// instruction is correctly matched during instruction selection.
1549 void PPCTargetLowering::initializeAddrModeMap() {
1550 AddrModesMap
[PPC::AM_DForm
] = {
1552 PPC::MOF_ZExt
| PPC::MOF_RPlusSImm16
| PPC::MOF_WordInt
,
1553 PPC::MOF_ZExt
| PPC::MOF_RPlusLo
| PPC::MOF_WordInt
,
1554 PPC::MOF_ZExt
| PPC::MOF_NotAddNorCst
| PPC::MOF_WordInt
,
1555 PPC::MOF_ZExt
| PPC::MOF_AddrIsSImm32
| PPC::MOF_WordInt
,
1556 // LBZ, LHZ, STB, STH
1557 PPC::MOF_ZExt
| PPC::MOF_RPlusSImm16
| PPC::MOF_SubWordInt
,
1558 PPC::MOF_ZExt
| PPC::MOF_RPlusLo
| PPC::MOF_SubWordInt
,
1559 PPC::MOF_ZExt
| PPC::MOF_NotAddNorCst
| PPC::MOF_SubWordInt
,
1560 PPC::MOF_ZExt
| PPC::MOF_AddrIsSImm32
| PPC::MOF_SubWordInt
,
1562 PPC::MOF_SExt
| PPC::MOF_RPlusSImm16
| PPC::MOF_SubWordInt
,
1563 PPC::MOF_SExt
| PPC::MOF_RPlusLo
| PPC::MOF_SubWordInt
,
1564 PPC::MOF_SExt
| PPC::MOF_NotAddNorCst
| PPC::MOF_SubWordInt
,
1565 PPC::MOF_SExt
| PPC::MOF_AddrIsSImm32
| PPC::MOF_SubWordInt
,
1566 // LFS, LFD, STFS, STFD
1567 PPC::MOF_RPlusSImm16
| PPC::MOF_ScalarFloat
| PPC::MOF_SubtargetBeforeP9
,
1568 PPC::MOF_RPlusLo
| PPC::MOF_ScalarFloat
| PPC::MOF_SubtargetBeforeP9
,
1569 PPC::MOF_NotAddNorCst
| PPC::MOF_ScalarFloat
| PPC::MOF_SubtargetBeforeP9
,
1570 PPC::MOF_AddrIsSImm32
| PPC::MOF_ScalarFloat
| PPC::MOF_SubtargetBeforeP9
,
1572 AddrModesMap
[PPC::AM_DSForm
] = {
1574 PPC::MOF_SExt
| PPC::MOF_RPlusSImm16Mult4
| PPC::MOF_WordInt
,
1575 PPC::MOF_SExt
| PPC::MOF_NotAddNorCst
| PPC::MOF_WordInt
,
1576 PPC::MOF_SExt
| PPC::MOF_AddrIsSImm32
| PPC::MOF_WordInt
,
1578 PPC::MOF_RPlusSImm16Mult4
| PPC::MOF_DoubleWordInt
,
1579 PPC::MOF_NotAddNorCst
| PPC::MOF_DoubleWordInt
,
1580 PPC::MOF_AddrIsSImm32
| PPC::MOF_DoubleWordInt
,
1581 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1582 PPC::MOF_RPlusSImm16Mult4
| PPC::MOF_ScalarFloat
| PPC::MOF_SubtargetP9
,
1583 PPC::MOF_NotAddNorCst
| PPC::MOF_ScalarFloat
| PPC::MOF_SubtargetP9
,
1584 PPC::MOF_AddrIsSImm32
| PPC::MOF_ScalarFloat
| PPC::MOF_SubtargetP9
,
1586 AddrModesMap
[PPC::AM_DQForm
] = {
1588 PPC::MOF_RPlusSImm16Mult16
| PPC::MOF_Vector
| PPC::MOF_SubtargetP9
,
1589 PPC::MOF_NotAddNorCst
| PPC::MOF_Vector
| PPC::MOF_SubtargetP9
,
1590 PPC::MOF_AddrIsSImm32
| PPC::MOF_Vector
| PPC::MOF_SubtargetP9
,
1592 AddrModesMap
[PPC::AM_PrefixDForm
] = {PPC::MOF_RPlusSImm34
|
1593 PPC::MOF_SubtargetP10
};
1594 // TODO: Add mapping for quadword load/store.
1597 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1598 /// the desired ByVal argument alignment.
1599 static void getMaxByValAlign(Type
*Ty
, Align
&MaxAlign
, Align MaxMaxAlign
) {
1600 if (MaxAlign
== MaxMaxAlign
)
1602 if (VectorType
*VTy
= dyn_cast
<VectorType
>(Ty
)) {
1603 if (MaxMaxAlign
>= 32 &&
1604 VTy
->getPrimitiveSizeInBits().getFixedValue() >= 256)
1605 MaxAlign
= Align(32);
1606 else if (VTy
->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1608 MaxAlign
= Align(16);
1609 } else if (ArrayType
*ATy
= dyn_cast
<ArrayType
>(Ty
)) {
1611 getMaxByValAlign(ATy
->getElementType(), EltAlign
, MaxMaxAlign
);
1612 if (EltAlign
> MaxAlign
)
1613 MaxAlign
= EltAlign
;
1614 } else if (StructType
*STy
= dyn_cast
<StructType
>(Ty
)) {
1615 for (auto *EltTy
: STy
->elements()) {
1617 getMaxByValAlign(EltTy
, EltAlign
, MaxMaxAlign
);
1618 if (EltAlign
> MaxAlign
)
1619 MaxAlign
= EltAlign
;
1620 if (MaxAlign
== MaxMaxAlign
)
1626 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1627 /// function arguments in the caller parameter area.
1628 uint64_t PPCTargetLowering::getByValTypeAlignment(Type
*Ty
,
1629 const DataLayout
&DL
) const {
1630 // 16byte and wider vectors are passed on 16byte boundary.
1631 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1632 Align Alignment
= Subtarget
.isPPC64() ? Align(8) : Align(4);
1633 if (Subtarget
.hasAltivec())
1634 getMaxByValAlign(Ty
, Alignment
, Align(16));
1635 return Alignment
.value();
1638 bool PPCTargetLowering::useSoftFloat() const {
1639 return Subtarget
.useSoftFloat();
1642 bool PPCTargetLowering::hasSPE() const {
1643 return Subtarget
.hasSPE();
1646 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT
) const {
1647 return VT
.isScalarInteger();
1650 bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
1651 Type
*VectorTy
, unsigned ElemSizeInBits
, unsigned &Index
) const {
1652 if (!Subtarget
.isPPC64() || !Subtarget
.hasVSX())
1655 if (auto *VTy
= dyn_cast
<VectorType
>(VectorTy
)) {
1656 if (VTy
->getScalarType()->isIntegerTy()) {
1657 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1658 if (ElemSizeInBits
== 32) {
1659 Index
= Subtarget
.isLittleEndian() ? 2 : 1;
1662 if (ElemSizeInBits
== 64) {
1663 Index
= Subtarget
.isLittleEndian() ? 1 : 0;
1671 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode
) const {
1672 switch ((PPCISD::NodeType
)Opcode
) {
1673 case PPCISD::FIRST_NUMBER
: break;
1674 case PPCISD::FSEL
: return "PPCISD::FSEL";
1675 case PPCISD::XSMAXC
: return "PPCISD::XSMAXC";
1676 case PPCISD::XSMINC
: return "PPCISD::XSMINC";
1677 case PPCISD::FCFID
: return "PPCISD::FCFID";
1678 case PPCISD::FCFIDU
: return "PPCISD::FCFIDU";
1679 case PPCISD::FCFIDS
: return "PPCISD::FCFIDS";
1680 case PPCISD::FCFIDUS
: return "PPCISD::FCFIDUS";
1681 case PPCISD::FCTIDZ
: return "PPCISD::FCTIDZ";
1682 case PPCISD::FCTIWZ
: return "PPCISD::FCTIWZ";
1683 case PPCISD::FCTIDUZ
: return "PPCISD::FCTIDUZ";
1684 case PPCISD::FCTIWUZ
: return "PPCISD::FCTIWUZ";
1685 case PPCISD::FRE
: return "PPCISD::FRE";
1686 case PPCISD::FRSQRTE
: return "PPCISD::FRSQRTE";
1687 case PPCISD::FTSQRT
:
1688 return "PPCISD::FTSQRT";
1690 return "PPCISD::FSQRT";
1691 case PPCISD::STFIWX
: return "PPCISD::STFIWX";
1692 case PPCISD::VPERM
: return "PPCISD::VPERM";
1693 case PPCISD::XXSPLT
: return "PPCISD::XXSPLT";
1694 case PPCISD::XXSPLTI_SP_TO_DP
:
1695 return "PPCISD::XXSPLTI_SP_TO_DP";
1696 case PPCISD::XXSPLTI32DX
:
1697 return "PPCISD::XXSPLTI32DX";
1698 case PPCISD::VECINSERT
: return "PPCISD::VECINSERT";
1699 case PPCISD::XXPERMDI
: return "PPCISD::XXPERMDI";
1700 case PPCISD::XXPERM
:
1701 return "PPCISD::XXPERM";
1702 case PPCISD::VECSHL
: return "PPCISD::VECSHL";
1703 case PPCISD::CMPB
: return "PPCISD::CMPB";
1704 case PPCISD::Hi
: return "PPCISD::Hi";
1705 case PPCISD::Lo
: return "PPCISD::Lo";
1706 case PPCISD::TOC_ENTRY
: return "PPCISD::TOC_ENTRY";
1707 case PPCISD::ATOMIC_CMP_SWAP_8
: return "PPCISD::ATOMIC_CMP_SWAP_8";
1708 case PPCISD::ATOMIC_CMP_SWAP_16
: return "PPCISD::ATOMIC_CMP_SWAP_16";
1709 case PPCISD::DYNALLOC
: return "PPCISD::DYNALLOC";
1710 case PPCISD::DYNAREAOFFSET
: return "PPCISD::DYNAREAOFFSET";
1711 case PPCISD::PROBED_ALLOCA
: return "PPCISD::PROBED_ALLOCA";
1712 case PPCISD::GlobalBaseReg
: return "PPCISD::GlobalBaseReg";
1713 case PPCISD::SRL
: return "PPCISD::SRL";
1714 case PPCISD::SRA
: return "PPCISD::SRA";
1715 case PPCISD::SHL
: return "PPCISD::SHL";
1716 case PPCISD::SRA_ADDZE
: return "PPCISD::SRA_ADDZE";
1717 case PPCISD::CALL
: return "PPCISD::CALL";
1718 case PPCISD::CALL_NOP
: return "PPCISD::CALL_NOP";
1719 case PPCISD::CALL_NOTOC
: return "PPCISD::CALL_NOTOC";
1720 case PPCISD::CALL_RM
:
1721 return "PPCISD::CALL_RM";
1722 case PPCISD::CALL_NOP_RM
:
1723 return "PPCISD::CALL_NOP_RM";
1724 case PPCISD::CALL_NOTOC_RM
:
1725 return "PPCISD::CALL_NOTOC_RM";
1726 case PPCISD::MTCTR
: return "PPCISD::MTCTR";
1727 case PPCISD::BCTRL
: return "PPCISD::BCTRL";
1728 case PPCISD::BCTRL_LOAD_TOC
: return "PPCISD::BCTRL_LOAD_TOC";
1729 case PPCISD::BCTRL_RM
:
1730 return "PPCISD::BCTRL_RM";
1731 case PPCISD::BCTRL_LOAD_TOC_RM
:
1732 return "PPCISD::BCTRL_LOAD_TOC_RM";
1733 case PPCISD::RET_GLUE
: return "PPCISD::RET_GLUE";
1734 case PPCISD::READ_TIME_BASE
: return "PPCISD::READ_TIME_BASE";
1735 case PPCISD::EH_SJLJ_SETJMP
: return "PPCISD::EH_SJLJ_SETJMP";
1736 case PPCISD::EH_SJLJ_LONGJMP
: return "PPCISD::EH_SJLJ_LONGJMP";
1737 case PPCISD::MFOCRF
: return "PPCISD::MFOCRF";
1738 case PPCISD::MFVSR
: return "PPCISD::MFVSR";
1739 case PPCISD::MTVSRA
: return "PPCISD::MTVSRA";
1740 case PPCISD::MTVSRZ
: return "PPCISD::MTVSRZ";
1741 case PPCISD::SINT_VEC_TO_FP
: return "PPCISD::SINT_VEC_TO_FP";
1742 case PPCISD::UINT_VEC_TO_FP
: return "PPCISD::UINT_VEC_TO_FP";
1743 case PPCISD::SCALAR_TO_VECTOR_PERMUTED
:
1744 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1745 case PPCISD::ANDI_rec_1_EQ_BIT
:
1746 return "PPCISD::ANDI_rec_1_EQ_BIT";
1747 case PPCISD::ANDI_rec_1_GT_BIT
:
1748 return "PPCISD::ANDI_rec_1_GT_BIT";
1749 case PPCISD::VCMP
: return "PPCISD::VCMP";
1750 case PPCISD::VCMP_rec
: return "PPCISD::VCMP_rec";
1751 case PPCISD::LBRX
: return "PPCISD::LBRX";
1752 case PPCISD::STBRX
: return "PPCISD::STBRX";
1753 case PPCISD::LFIWAX
: return "PPCISD::LFIWAX";
1754 case PPCISD::LFIWZX
: return "PPCISD::LFIWZX";
1755 case PPCISD::LXSIZX
: return "PPCISD::LXSIZX";
1756 case PPCISD::STXSIX
: return "PPCISD::STXSIX";
1757 case PPCISD::VEXTS
: return "PPCISD::VEXTS";
1758 case PPCISD::LXVD2X
: return "PPCISD::LXVD2X";
1759 case PPCISD::STXVD2X
: return "PPCISD::STXVD2X";
1760 case PPCISD::LOAD_VEC_BE
: return "PPCISD::LOAD_VEC_BE";
1761 case PPCISD::STORE_VEC_BE
: return "PPCISD::STORE_VEC_BE";
1762 case PPCISD::ST_VSR_SCAL_INT
:
1763 return "PPCISD::ST_VSR_SCAL_INT";
1764 case PPCISD::COND_BRANCH
: return "PPCISD::COND_BRANCH";
1765 case PPCISD::BDNZ
: return "PPCISD::BDNZ";
1766 case PPCISD::BDZ
: return "PPCISD::BDZ";
1767 case PPCISD::MFFS
: return "PPCISD::MFFS";
1768 case PPCISD::FADDRTZ
: return "PPCISD::FADDRTZ";
1769 case PPCISD::TC_RETURN
: return "PPCISD::TC_RETURN";
1770 case PPCISD::CR6SET
: return "PPCISD::CR6SET";
1771 case PPCISD::CR6UNSET
: return "PPCISD::CR6UNSET";
1772 case PPCISD::PPC32_GOT
: return "PPCISD::PPC32_GOT";
1773 case PPCISD::PPC32_PICGOT
: return "PPCISD::PPC32_PICGOT";
1774 case PPCISD::ADDIS_GOT_TPREL_HA
: return "PPCISD::ADDIS_GOT_TPREL_HA";
1775 case PPCISD::LD_GOT_TPREL_L
: return "PPCISD::LD_GOT_TPREL_L";
1776 case PPCISD::ADD_TLS
: return "PPCISD::ADD_TLS";
1777 case PPCISD::ADDIS_TLSGD_HA
: return "PPCISD::ADDIS_TLSGD_HA";
1778 case PPCISD::ADDI_TLSGD_L
: return "PPCISD::ADDI_TLSGD_L";
1779 case PPCISD::GET_TLS_ADDR
: return "PPCISD::GET_TLS_ADDR";
1780 case PPCISD::GET_TLS_MOD_AIX
: return "PPCISD::GET_TLS_MOD_AIX";
1781 case PPCISD::GET_TPOINTER
: return "PPCISD::GET_TPOINTER";
1782 case PPCISD::ADDI_TLSGD_L_ADDR
: return "PPCISD::ADDI_TLSGD_L_ADDR";
1783 case PPCISD::TLSGD_AIX
: return "PPCISD::TLSGD_AIX";
1784 case PPCISD::TLSLD_AIX
: return "PPCISD::TLSLD_AIX";
1785 case PPCISD::ADDIS_TLSLD_HA
: return "PPCISD::ADDIS_TLSLD_HA";
1786 case PPCISD::ADDI_TLSLD_L
: return "PPCISD::ADDI_TLSLD_L";
1787 case PPCISD::GET_TLSLD_ADDR
: return "PPCISD::GET_TLSLD_ADDR";
1788 case PPCISD::ADDI_TLSLD_L_ADDR
: return "PPCISD::ADDI_TLSLD_L_ADDR";
1789 case PPCISD::ADDIS_DTPREL_HA
: return "PPCISD::ADDIS_DTPREL_HA";
1790 case PPCISD::ADDI_DTPREL_L
: return "PPCISD::ADDI_DTPREL_L";
1791 case PPCISD::PADDI_DTPREL
:
1792 return "PPCISD::PADDI_DTPREL";
1793 case PPCISD::VADD_SPLAT
: return "PPCISD::VADD_SPLAT";
1794 case PPCISD::SC
: return "PPCISD::SC";
1795 case PPCISD::CLRBHRB
: return "PPCISD::CLRBHRB";
1796 case PPCISD::MFBHRBE
: return "PPCISD::MFBHRBE";
1797 case PPCISD::RFEBB
: return "PPCISD::RFEBB";
1798 case PPCISD::XXSWAPD
: return "PPCISD::XXSWAPD";
1799 case PPCISD::SWAP_NO_CHAIN
: return "PPCISD::SWAP_NO_CHAIN";
1800 case PPCISD::BUILD_FP128
: return "PPCISD::BUILD_FP128";
1801 case PPCISD::BUILD_SPE64
: return "PPCISD::BUILD_SPE64";
1802 case PPCISD::EXTRACT_SPE
: return "PPCISD::EXTRACT_SPE";
1803 case PPCISD::EXTSWSLI
: return "PPCISD::EXTSWSLI";
1804 case PPCISD::LD_VSX_LH
: return "PPCISD::LD_VSX_LH";
1805 case PPCISD::FP_EXTEND_HALF
: return "PPCISD::FP_EXTEND_HALF";
1806 case PPCISD::MAT_PCREL_ADDR
: return "PPCISD::MAT_PCREL_ADDR";
1807 case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR
:
1808 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1809 case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR
:
1810 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1811 case PPCISD::ACC_BUILD
: return "PPCISD::ACC_BUILD";
1812 case PPCISD::PAIR_BUILD
: return "PPCISD::PAIR_BUILD";
1813 case PPCISD::EXTRACT_VSX_REG
: return "PPCISD::EXTRACT_VSX_REG";
1814 case PPCISD::XXMFACC
: return "PPCISD::XXMFACC";
1815 case PPCISD::LD_SPLAT
: return "PPCISD::LD_SPLAT";
1816 case PPCISD::ZEXT_LD_SPLAT
: return "PPCISD::ZEXT_LD_SPLAT";
1817 case PPCISD::SEXT_LD_SPLAT
: return "PPCISD::SEXT_LD_SPLAT";
1818 case PPCISD::FNMSUB
: return "PPCISD::FNMSUB";
1819 case PPCISD::STRICT_FADDRTZ
:
1820 return "PPCISD::STRICT_FADDRTZ";
1821 case PPCISD::STRICT_FCTIDZ
:
1822 return "PPCISD::STRICT_FCTIDZ";
1823 case PPCISD::STRICT_FCTIWZ
:
1824 return "PPCISD::STRICT_FCTIWZ";
1825 case PPCISD::STRICT_FCTIDUZ
:
1826 return "PPCISD::STRICT_FCTIDUZ";
1827 case PPCISD::STRICT_FCTIWUZ
:
1828 return "PPCISD::STRICT_FCTIWUZ";
1829 case PPCISD::STRICT_FCFID
:
1830 return "PPCISD::STRICT_FCFID";
1831 case PPCISD::STRICT_FCFIDU
:
1832 return "PPCISD::STRICT_FCFIDU";
1833 case PPCISD::STRICT_FCFIDS
:
1834 return "PPCISD::STRICT_FCFIDS";
1835 case PPCISD::STRICT_FCFIDUS
:
1836 return "PPCISD::STRICT_FCFIDUS";
1837 case PPCISD::LXVRZX
: return "PPCISD::LXVRZX";
1838 case PPCISD::STORE_COND
:
1839 return "PPCISD::STORE_COND";
1844 EVT
PPCTargetLowering::getSetCCResultType(const DataLayout
&DL
, LLVMContext
&C
,
1847 return Subtarget
.useCRBits() ? MVT::i1
: MVT::i32
;
1849 return VT
.changeVectorElementTypeToInteger();
1852 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT
) const {
1853 assert(VT
.isFloatingPoint() && "Non-floating-point FMA?");
1857 //===----------------------------------------------------------------------===//
1858 // Node matching predicates, for use by the tblgen matching code.
1859 //===----------------------------------------------------------------------===//
1861 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1862 static bool isFloatingPointZero(SDValue Op
) {
1863 if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(Op
))
1864 return CFP
->getValueAPF().isZero();
1865 else if (ISD::isEXTLoad(Op
.getNode()) || ISD::isNON_EXTLoad(Op
.getNode())) {
1866 // Maybe this has already been legalized into the constant pool?
1867 if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(Op
.getOperand(1)))
1868 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(CP
->getConstVal()))
1869 return CFP
->getValueAPF().isZero();
1874 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1875 /// true if Op is undef or if it matches the specified value.
1876 static bool isConstantOrUndef(int Op
, int Val
) {
1877 return Op
< 0 || Op
== Val
;
1880 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1881 /// VPKUHUM instruction.
1882 /// The ShuffleKind distinguishes between big-endian operations with
1883 /// two different inputs (0), either-endian operations with two identical
1884 /// inputs (1), and little-endian operations with two different inputs (2).
1885 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1886 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1887 SelectionDAG
&DAG
) {
1888 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1889 if (ShuffleKind
== 0) {
1892 for (unsigned i
= 0; i
!= 16; ++i
)
1893 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+1))
1895 } else if (ShuffleKind
== 2) {
1898 for (unsigned i
= 0; i
!= 16; ++i
)
1899 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2))
1901 } else if (ShuffleKind
== 1) {
1902 unsigned j
= IsLE
? 0 : 1;
1903 for (unsigned i
= 0; i
!= 8; ++i
)
1904 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1905 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
))
1911 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1912 /// VPKUWUM instruction.
1913 /// The ShuffleKind distinguishes between big-endian operations with
1914 /// two different inputs (0), either-endian operations with two identical
1915 /// inputs (1), and little-endian operations with two different inputs (2).
1916 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1917 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1918 SelectionDAG
&DAG
) {
1919 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1920 if (ShuffleKind
== 0) {
1923 for (unsigned i
= 0; i
!= 16; i
+= 2)
1924 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+2) ||
1925 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+3))
1927 } else if (ShuffleKind
== 2) {
1930 for (unsigned i
= 0; i
!= 16; i
+= 2)
1931 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2) ||
1932 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+1))
1934 } else if (ShuffleKind
== 1) {
1935 unsigned j
= IsLE
? 0 : 2;
1936 for (unsigned i
= 0; i
!= 8; i
+= 2)
1937 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1938 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+j
+1) ||
1939 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
) ||
1940 !isConstantOrUndef(N
->getMaskElt(i
+9), i
*2+j
+1))
1946 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1947 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1948 /// current subtarget.
1950 /// The ShuffleKind distinguishes between big-endian operations with
1951 /// two different inputs (0), either-endian operations with two identical
1952 /// inputs (1), and little-endian operations with two different inputs (2).
1953 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1954 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1955 SelectionDAG
&DAG
) {
1956 const PPCSubtarget
&Subtarget
= DAG
.getSubtarget
<PPCSubtarget
>();
1957 if (!Subtarget
.hasP8Vector())
1960 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1961 if (ShuffleKind
== 0) {
1964 for (unsigned i
= 0; i
!= 16; i
+= 4)
1965 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+4) ||
1966 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+5) ||
1967 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+6) ||
1968 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+7))
1970 } else if (ShuffleKind
== 2) {
1973 for (unsigned i
= 0; i
!= 16; i
+= 4)
1974 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2) ||
1975 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+1) ||
1976 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+2) ||
1977 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+3))
1979 } else if (ShuffleKind
== 1) {
1980 unsigned j
= IsLE
? 0 : 4;
1981 for (unsigned i
= 0; i
!= 8; i
+= 4)
1982 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1983 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+j
+1) ||
1984 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+j
+2) ||
1985 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+j
+3) ||
1986 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
) ||
1987 !isConstantOrUndef(N
->getMaskElt(i
+9), i
*2+j
+1) ||
1988 !isConstantOrUndef(N
->getMaskElt(i
+10), i
*2+j
+2) ||
1989 !isConstantOrUndef(N
->getMaskElt(i
+11), i
*2+j
+3))
1995 /// isVMerge - Common function, used to match vmrg* shuffles.
1997 static bool isVMerge(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1998 unsigned LHSStart
, unsigned RHSStart
) {
1999 if (N
->getValueType(0) != MVT::v16i8
)
2001 assert((UnitSize
== 1 || UnitSize
== 2 || UnitSize
== 4) &&
2002 "Unsupported merge size!");
2004 for (unsigned i
= 0; i
!= 8/UnitSize
; ++i
) // Step over units
2005 for (unsigned j
= 0; j
!= UnitSize
; ++j
) { // Step over bytes within unit
2006 if (!isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+j
),
2007 LHSStart
+j
+i
*UnitSize
) ||
2008 !isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+UnitSize
+j
),
2009 RHSStart
+j
+i
*UnitSize
))
2015 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2016 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2017 /// The ShuffleKind distinguishes between big-endian merges with two
2018 /// different inputs (0), either-endian merges with two identical inputs (1),
2019 /// and little-endian merges with two different inputs (2). For the latter,
2020 /// the input operands are swapped (see PPCInstrAltivec.td).
2021 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
2022 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
2023 if (DAG
.getDataLayout().isLittleEndian()) {
2024 if (ShuffleKind
== 1) // unary
2025 return isVMerge(N
, UnitSize
, 0, 0);
2026 else if (ShuffleKind
== 2) // swapped
2027 return isVMerge(N
, UnitSize
, 0, 16);
2031 if (ShuffleKind
== 1) // unary
2032 return isVMerge(N
, UnitSize
, 8, 8);
2033 else if (ShuffleKind
== 0) // normal
2034 return isVMerge(N
, UnitSize
, 8, 24);
2040 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2041 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2042 /// The ShuffleKind distinguishes between big-endian merges with two
2043 /// different inputs (0), either-endian merges with two identical inputs (1),
2044 /// and little-endian merges with two different inputs (2). For the latter,
2045 /// the input operands are swapped (see PPCInstrAltivec.td).
2046 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
2047 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
2048 if (DAG
.getDataLayout().isLittleEndian()) {
2049 if (ShuffleKind
== 1) // unary
2050 return isVMerge(N
, UnitSize
, 8, 8);
2051 else if (ShuffleKind
== 2) // swapped
2052 return isVMerge(N
, UnitSize
, 8, 24);
2056 if (ShuffleKind
== 1) // unary
2057 return isVMerge(N
, UnitSize
, 0, 0);
2058 else if (ShuffleKind
== 0) // normal
2059 return isVMerge(N
, UnitSize
, 0, 16);
2066 * Common function used to match vmrgew and vmrgow shuffles
2068 * The indexOffset determines whether to look for even or odd words in
2069 * the shuffle mask. This is based on the of the endianness of the target
2072 * - Use offset of 0 to check for odd elements
2073 * - Use offset of 4 to check for even elements
2075 * - Use offset of 0 to check for even elements
2076 * - Use offset of 4 to check for odd elements
2077 * A detailed description of the vector element ordering for little endian and
2078 * big endian can be found at
2079 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2080 * Targeting your applications - what little endian and big endian IBM XL C/C++
2081 * compiler differences mean to you
2083 * The mask to the shuffle vector instruction specifies the indices of the
2084 * elements from the two input vectors to place in the result. The elements are
2085 * numbered in array-access order, starting with the first vector. These vectors
2086 * are always of type v16i8, thus each vector will contain 16 elements of size
2087 * 8. More info on the shuffle vector can be found in the
2088 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2089 * Language Reference.
2091 * The RHSStartValue indicates whether the same input vectors are used (unary)
2092 * or two different input vectors are used, based on the following:
2093 * - If the instruction uses the same vector for both inputs, the range of the
2094 * indices will be 0 to 15. In this case, the RHSStart value passed should
2096 * - If the instruction has two different vectors then the range of the
2097 * indices will be 0 to 31. In this case, the RHSStart value passed should
2098 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2099 * to 31 specify elements in the second vector).
2101 * \param[in] N The shuffle vector SD Node to analyze
2102 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2103 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2104 * vector to the shuffle_vector instruction
2105 * \return true iff this shuffle vector represents an even or odd word merge
2107 static bool isVMerge(ShuffleVectorSDNode
*N
, unsigned IndexOffset
,
2108 unsigned RHSStartValue
) {
2109 if (N
->getValueType(0) != MVT::v16i8
)
2112 for (unsigned i
= 0; i
< 2; ++i
)
2113 for (unsigned j
= 0; j
< 4; ++j
)
2114 if (!isConstantOrUndef(N
->getMaskElt(i
*4+j
),
2115 i
*RHSStartValue
+j
+IndexOffset
) ||
2116 !isConstantOrUndef(N
->getMaskElt(i
*4+j
+8),
2117 i
*RHSStartValue
+j
+IndexOffset
+8))
2123 * Determine if the specified shuffle mask is suitable for the vmrgew or
2124 * vmrgow instructions.
2126 * \param[in] N The shuffle vector SD Node to analyze
2127 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2128 * \param[in] ShuffleKind Identify the type of merge:
2129 * - 0 = big-endian merge with two different inputs;
2130 * - 1 = either-endian merge with two identical inputs;
2131 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2132 * little-endian merges).
2133 * \param[in] DAG The current SelectionDAG
2134 * \return true iff this shuffle mask
2136 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode
*N
, bool CheckEven
,
2137 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
2138 if (DAG
.getDataLayout().isLittleEndian()) {
2139 unsigned indexOffset
= CheckEven
? 4 : 0;
2140 if (ShuffleKind
== 1) // Unary
2141 return isVMerge(N
, indexOffset
, 0);
2142 else if (ShuffleKind
== 2) // swapped
2143 return isVMerge(N
, indexOffset
, 16);
2148 unsigned indexOffset
= CheckEven
? 0 : 4;
2149 if (ShuffleKind
== 1) // Unary
2150 return isVMerge(N
, indexOffset
, 0);
2151 else if (ShuffleKind
== 0) // Normal
2152 return isVMerge(N
, indexOffset
, 16);
2159 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2160 /// amount, otherwise return -1.
2161 /// The ShuffleKind distinguishes between big-endian operations with two
2162 /// different inputs (0), either-endian operations with two identical inputs
2163 /// (1), and little-endian operations with two different inputs (2). For the
2164 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
2165 int PPC::isVSLDOIShuffleMask(SDNode
*N
, unsigned ShuffleKind
,
2166 SelectionDAG
&DAG
) {
2167 if (N
->getValueType(0) != MVT::v16i8
)
2170 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
2172 // Find the first non-undef value in the shuffle mask.
2174 for (i
= 0; i
!= 16 && SVOp
->getMaskElt(i
) < 0; ++i
)
2177 if (i
== 16) return -1; // all undef.
2179 // Otherwise, check to see if the rest of the elements are consecutively
2180 // numbered from this value.
2181 unsigned ShiftAmt
= SVOp
->getMaskElt(i
);
2182 if (ShiftAmt
< i
) return -1;
2185 bool isLE
= DAG
.getDataLayout().isLittleEndian();
2187 if ((ShuffleKind
== 0 && !isLE
) || (ShuffleKind
== 2 && isLE
)) {
2188 // Check the rest of the elements to see if they are consecutive.
2189 for (++i
; i
!= 16; ++i
)
2190 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), ShiftAmt
+i
))
2192 } else if (ShuffleKind
== 1) {
2193 // Check the rest of the elements to see if they are consecutive.
2194 for (++i
; i
!= 16; ++i
)
2195 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), (ShiftAmt
+i
) & 15))
2201 ShiftAmt
= 16 - ShiftAmt
;
2206 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2207 /// specifies a splat of a single element that is suitable for input to
2208 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2209 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode
*N
, unsigned EltSize
) {
2210 EVT VT
= N
->getValueType(0);
2211 if (VT
== MVT::v2i64
|| VT
== MVT::v2f64
)
2212 return EltSize
== 8 && N
->getMaskElt(0) == N
->getMaskElt(1);
2214 assert(VT
== MVT::v16i8
&& isPowerOf2_32(EltSize
) &&
2215 EltSize
<= 8 && "Can only handle 1,2,4,8 byte element sizes");
2217 // The consecutive indices need to specify an element, not part of two
2218 // different elements. So abandon ship early if this isn't the case.
2219 if (N
->getMaskElt(0) % EltSize
!= 0)
2222 // This is a splat operation if each element of the permute is the same, and
2223 // if the value doesn't reference the second vector.
2224 unsigned ElementBase
= N
->getMaskElt(0);
2226 // FIXME: Handle UNDEF elements too!
2227 if (ElementBase
>= 16)
2230 // Check that the indices are consecutive, in the case of a multi-byte element
2231 // splatted with a v16i8 mask.
2232 for (unsigned i
= 1; i
!= EltSize
; ++i
)
2233 if (N
->getMaskElt(i
) < 0 || N
->getMaskElt(i
) != (int)(i
+ElementBase
))
2236 for (unsigned i
= EltSize
, e
= 16; i
!= e
; i
+= EltSize
) {
2237 if (N
->getMaskElt(i
) < 0) continue;
2238 for (unsigned j
= 0; j
!= EltSize
; ++j
)
2239 if (N
->getMaskElt(i
+j
) != N
->getMaskElt(j
))
2245 /// Check that the mask is shuffling N byte elements. Within each N byte
2246 /// element of the mask, the indices could be either in increasing or
2247 /// decreasing order as long as they are consecutive.
2248 /// \param[in] N the shuffle vector SD Node to analyze
2249 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2250 /// Word/DoubleWord/QuadWord).
2251 /// \param[in] StepLen the delta indices number among the N byte element, if
2252 /// the mask is in increasing/decreasing order then it is 1/-1.
2253 /// \return true iff the mask is shuffling N byte elements.
2254 static bool isNByteElemShuffleMask(ShuffleVectorSDNode
*N
, unsigned Width
,
2256 assert((Width
== 2 || Width
== 4 || Width
== 8 || Width
== 16) &&
2257 "Unexpected element width.");
2258 assert((StepLen
== 1 || StepLen
== -1) && "Unexpected element width.");
2260 unsigned NumOfElem
= 16 / Width
;
2261 unsigned MaskVal
[16]; // Width is never greater than 16
2262 for (unsigned i
= 0; i
< NumOfElem
; ++i
) {
2263 MaskVal
[0] = N
->getMaskElt(i
* Width
);
2264 if ((StepLen
== 1) && (MaskVal
[0] % Width
)) {
2266 } else if ((StepLen
== -1) && ((MaskVal
[0] + 1) % Width
)) {
2270 for (unsigned int j
= 1; j
< Width
; ++j
) {
2271 MaskVal
[j
] = N
->getMaskElt(i
* Width
+ j
);
2272 if (MaskVal
[j
] != MaskVal
[j
-1] + StepLen
) {
2281 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode
*N
, unsigned &ShiftElts
,
2282 unsigned &InsertAtByte
, bool &Swap
, bool IsLE
) {
2283 if (!isNByteElemShuffleMask(N
, 4, 1))
2286 // Now we look at mask elements 0,4,8,12
2287 unsigned M0
= N
->getMaskElt(0) / 4;
2288 unsigned M1
= N
->getMaskElt(4) / 4;
2289 unsigned M2
= N
->getMaskElt(8) / 4;
2290 unsigned M3
= N
->getMaskElt(12) / 4;
2291 unsigned LittleEndianShifts
[] = { 2, 1, 0, 3 };
2292 unsigned BigEndianShifts
[] = { 3, 0, 1, 2 };
2294 // Below, let H and L be arbitrary elements of the shuffle mask
2295 // where H is in the range [4,7] and L is in the range [0,3].
2296 // H, 1, 2, 3 or L, 5, 6, 7
2297 if ((M0
> 3 && M1
== 1 && M2
== 2 && M3
== 3) ||
2298 (M0
< 4 && M1
== 5 && M2
== 6 && M3
== 7)) {
2299 ShiftElts
= IsLE
? LittleEndianShifts
[M0
& 0x3] : BigEndianShifts
[M0
& 0x3];
2300 InsertAtByte
= IsLE
? 12 : 0;
2304 // 0, H, 2, 3 or 4, L, 6, 7
2305 if ((M1
> 3 && M0
== 0 && M2
== 2 && M3
== 3) ||
2306 (M1
< 4 && M0
== 4 && M2
== 6 && M3
== 7)) {
2307 ShiftElts
= IsLE
? LittleEndianShifts
[M1
& 0x3] : BigEndianShifts
[M1
& 0x3];
2308 InsertAtByte
= IsLE
? 8 : 4;
2312 // 0, 1, H, 3 or 4, 5, L, 7
2313 if ((M2
> 3 && M0
== 0 && M1
== 1 && M3
== 3) ||
2314 (M2
< 4 && M0
== 4 && M1
== 5 && M3
== 7)) {
2315 ShiftElts
= IsLE
? LittleEndianShifts
[M2
& 0x3] : BigEndianShifts
[M2
& 0x3];
2316 InsertAtByte
= IsLE
? 4 : 8;
2320 // 0, 1, 2, H or 4, 5, 6, L
2321 if ((M3
> 3 && M0
== 0 && M1
== 1 && M2
== 2) ||
2322 (M3
< 4 && M0
== 4 && M1
== 5 && M2
== 6)) {
2323 ShiftElts
= IsLE
? LittleEndianShifts
[M3
& 0x3] : BigEndianShifts
[M3
& 0x3];
2324 InsertAtByte
= IsLE
? 0 : 12;
2329 // If both vector operands for the shuffle are the same vector, the mask will
2330 // contain only elements from the first one and the second one will be undef.
2331 if (N
->getOperand(1).isUndef()) {
2334 unsigned XXINSERTWSrcElem
= IsLE
? 2 : 1;
2335 if (M0
== XXINSERTWSrcElem
&& M1
== 1 && M2
== 2 && M3
== 3) {
2336 InsertAtByte
= IsLE
? 12 : 0;
2339 if (M0
== 0 && M1
== XXINSERTWSrcElem
&& M2
== 2 && M3
== 3) {
2340 InsertAtByte
= IsLE
? 8 : 4;
2343 if (M0
== 0 && M1
== 1 && M2
== XXINSERTWSrcElem
&& M3
== 3) {
2344 InsertAtByte
= IsLE
? 4 : 8;
2347 if (M0
== 0 && M1
== 1 && M2
== 2 && M3
== XXINSERTWSrcElem
) {
2348 InsertAtByte
= IsLE
? 0 : 12;
2356 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode
*N
, unsigned &ShiftElts
,
2357 bool &Swap
, bool IsLE
) {
2358 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
2359 // Ensure each byte index of the word is consecutive.
2360 if (!isNByteElemShuffleMask(N
, 4, 1))
2363 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2364 unsigned M0
= N
->getMaskElt(0) / 4;
2365 unsigned M1
= N
->getMaskElt(4) / 4;
2366 unsigned M2
= N
->getMaskElt(8) / 4;
2367 unsigned M3
= N
->getMaskElt(12) / 4;
2369 // If both vector operands for the shuffle are the same vector, the mask will
2370 // contain only elements from the first one and the second one will be undef.
2371 if (N
->getOperand(1).isUndef()) {
2372 assert(M0
< 4 && "Indexing into an undef vector?");
2373 if (M1
!= (M0
+ 1) % 4 || M2
!= (M1
+ 1) % 4 || M3
!= (M2
+ 1) % 4)
2376 ShiftElts
= IsLE
? (4 - M0
) % 4 : M0
;
2381 // Ensure each word index of the ShuffleVector Mask is consecutive.
2382 if (M1
!= (M0
+ 1) % 8 || M2
!= (M1
+ 1) % 8 || M3
!= (M2
+ 1) % 8)
2386 if (M0
== 0 || M0
== 7 || M0
== 6 || M0
== 5) {
2387 // Input vectors don't need to be swapped if the leading element
2388 // of the result is one of the 3 left elements of the second vector
2389 // (or if there is no shift to be done at all).
2391 ShiftElts
= (8 - M0
) % 8;
2392 } else if (M0
== 4 || M0
== 3 || M0
== 2 || M0
== 1) {
2393 // Input vectors need to be swapped if the leading element
2394 // of the result is one of the 3 left elements of the first vector
2395 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2397 ShiftElts
= (4 - M0
) % 4;
2402 if (M0
== 0 || M0
== 1 || M0
== 2 || M0
== 3) {
2403 // Input vectors don't need to be swapped if the leading element
2404 // of the result is one of the 4 elements of the first vector.
2407 } else if (M0
== 4 || M0
== 5 || M0
== 6 || M0
== 7) {
2408 // Input vectors need to be swapped if the leading element
2409 // of the result is one of the 4 elements of the right vector.
2418 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode
*N
, int Width
) {
2419 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
2421 if (!isNByteElemShuffleMask(N
, Width
, -1))
2424 for (int i
= 0; i
< 16; i
+= Width
)
2425 if (N
->getMaskElt(i
) != i
+ Width
- 1)
2431 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode
*N
) {
2432 return isXXBRShuffleMaskHelper(N
, 2);
2435 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode
*N
) {
2436 return isXXBRShuffleMaskHelper(N
, 4);
2439 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode
*N
) {
2440 return isXXBRShuffleMaskHelper(N
, 8);
2443 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode
*N
) {
2444 return isXXBRShuffleMaskHelper(N
, 16);
2447 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2448 /// if the inputs to the instruction should be swapped and set \p DM to the
2449 /// value for the immediate.
2450 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2451 /// AND element 0 of the result comes from the first input (LE) or second input
2452 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2453 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2455 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode
*N
, unsigned &DM
,
2456 bool &Swap
, bool IsLE
) {
2457 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
2459 // Ensure each byte index of the double word is consecutive.
2460 if (!isNByteElemShuffleMask(N
, 8, 1))
2463 unsigned M0
= N
->getMaskElt(0) / 8;
2464 unsigned M1
= N
->getMaskElt(8) / 8;
2465 assert(((M0
| M1
) < 4) && "A mask element out of bounds?");
2467 // If both vector operands for the shuffle are the same vector, the mask will
2468 // contain only elements from the first one and the second one will be undef.
2469 if (N
->getOperand(1).isUndef()) {
2470 if ((M0
| M1
) < 2) {
2471 DM
= IsLE
? (((~M1
) & 1) << 1) + ((~M0
) & 1) : (M0
<< 1) + (M1
& 1);
2479 if (M0
> 1 && M1
< 2) {
2481 } else if (M0
< 2 && M1
> 1) {
2488 // Note: if control flow comes here that means Swap is already set above
2489 DM
= (((~M1
) & 1) << 1) + ((~M0
) & 1);
2492 if (M0
< 2 && M1
> 1) {
2494 } else if (M0
> 1 && M1
< 2) {
2501 // Note: if control flow comes here that means Swap is already set above
2502 DM
= (M0
<< 1) + (M1
& 1);
2508 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2509 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2510 /// elements are counted from the left of the vector register).
2511 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode
*N
, unsigned EltSize
,
2512 SelectionDAG
&DAG
) {
2513 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
2514 assert(isSplatShuffleMask(SVOp
, EltSize
));
2515 EVT VT
= SVOp
->getValueType(0);
2517 if (VT
== MVT::v2i64
|| VT
== MVT::v2f64
)
2518 return DAG
.getDataLayout().isLittleEndian() ? 1 - SVOp
->getMaskElt(0)
2519 : SVOp
->getMaskElt(0);
2521 if (DAG
.getDataLayout().isLittleEndian())
2522 return (16 / EltSize
) - 1 - (SVOp
->getMaskElt(0) / EltSize
);
2524 return SVOp
->getMaskElt(0) / EltSize
;
2527 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2528 /// by using a vspltis[bhw] instruction of the specified element size, return
2529 /// the constant being splatted. The ByteSize field indicates the number of
2530 /// bytes of each element [124] -> [bhw].
2531 SDValue
PPC::get_VSPLTI_elt(SDNode
*N
, unsigned ByteSize
, SelectionDAG
&DAG
) {
2534 // If ByteSize of the splat is bigger than the element size of the
2535 // build_vector, then we have a case where we are checking for a splat where
2536 // multiple elements of the buildvector are folded together into a single
2537 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2538 unsigned EltSize
= 16/N
->getNumOperands();
2539 if (EltSize
< ByteSize
) {
2540 unsigned Multiple
= ByteSize
/EltSize
; // Number of BV entries per spltval.
2541 SDValue UniquedVals
[4];
2542 assert(Multiple
> 1 && Multiple
<= 4 && "How can this happen?");
2544 // See if all of the elements in the buildvector agree across.
2545 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
2546 if (N
->getOperand(i
).isUndef()) continue;
2547 // If the element isn't a constant, bail fully out.
2548 if (!isa
<ConstantSDNode
>(N
->getOperand(i
))) return SDValue();
2550 if (!UniquedVals
[i
&(Multiple
-1)].getNode())
2551 UniquedVals
[i
&(Multiple
-1)] = N
->getOperand(i
);
2552 else if (UniquedVals
[i
&(Multiple
-1)] != N
->getOperand(i
))
2553 return SDValue(); // no match.
2556 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2557 // either constant or undef values that are identical for each chunk. See
2558 // if these chunks can form into a larger vspltis*.
2560 // Check to see if all of the leading entries are either 0 or -1. If
2561 // neither, then this won't fit into the immediate field.
2562 bool LeadingZero
= true;
2563 bool LeadingOnes
= true;
2564 for (unsigned i
= 0; i
!= Multiple
-1; ++i
) {
2565 if (!UniquedVals
[i
].getNode()) continue; // Must have been undefs.
2567 LeadingZero
&= isNullConstant(UniquedVals
[i
]);
2568 LeadingOnes
&= isAllOnesConstant(UniquedVals
[i
]);
2570 // Finally, check the least significant entry.
2572 if (!UniquedVals
[Multiple
-1].getNode())
2573 return DAG
.getTargetConstant(0, SDLoc(N
), MVT::i32
); // 0,0,0,undef
2574 int Val
= UniquedVals
[Multiple
- 1]->getAsZExtVal();
2575 if (Val
< 16) // 0,0,0,4 -> vspltisw(4)
2576 return DAG
.getTargetConstant(Val
, SDLoc(N
), MVT::i32
);
2579 if (!UniquedVals
[Multiple
-1].getNode())
2580 return DAG
.getTargetConstant(~0U, SDLoc(N
), MVT::i32
); // -1,-1,-1,undef
2581 int Val
=cast
<ConstantSDNode
>(UniquedVals
[Multiple
-1])->getSExtValue();
2582 if (Val
>= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2583 return DAG
.getTargetConstant(Val
, SDLoc(N
), MVT::i32
);
2589 // Check to see if this buildvec has a single non-undef value in its elements.
2590 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
2591 if (N
->getOperand(i
).isUndef()) continue;
2592 if (!OpVal
.getNode())
2593 OpVal
= N
->getOperand(i
);
2594 else if (OpVal
!= N
->getOperand(i
))
2598 if (!OpVal
.getNode()) return SDValue(); // All UNDEF: use implicit def.
2600 unsigned ValSizeInBytes
= EltSize
;
2602 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(OpVal
)) {
2603 Value
= CN
->getZExtValue();
2604 } else if (ConstantFPSDNode
*CN
= dyn_cast
<ConstantFPSDNode
>(OpVal
)) {
2605 assert(CN
->getValueType(0) == MVT::f32
&& "Only one legal FP vector type!");
2606 Value
= llvm::bit_cast
<uint32_t>(CN
->getValueAPF().convertToFloat());
2609 // If the splat value is larger than the element value, then we can never do
2610 // this splat. The only case that we could fit the replicated bits into our
2611 // immediate field for would be zero, and we prefer to use vxor for it.
2612 if (ValSizeInBytes
< ByteSize
) return SDValue();
2614 // If the element value is larger than the splat value, check if it consists
2615 // of a repeated bit pattern of size ByteSize.
2616 if (!APInt(ValSizeInBytes
* 8, Value
).isSplat(ByteSize
* 8))
2619 // Properly sign extend the value.
2620 int MaskVal
= SignExtend32(Value
, ByteSize
* 8);
2622 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2623 if (MaskVal
== 0) return SDValue();
2625 // Finally, if this value fits in a 5 bit sext field, return it
2626 if (SignExtend32
<5>(MaskVal
) == MaskVal
)
2627 return DAG
.getTargetConstant(MaskVal
, SDLoc(N
), MVT::i32
);
2631 //===----------------------------------------------------------------------===//
2632 // Addressing Mode Selection
2633 //===----------------------------------------------------------------------===//
2635 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2636 /// or 64-bit immediate, and if the value can be accurately represented as a
2637 /// sign extension from a 16-bit value. If so, this returns true and the
2639 bool llvm::isIntS16Immediate(SDNode
*N
, int16_t &Imm
) {
2640 if (!isa
<ConstantSDNode
>(N
))
2643 Imm
= (int16_t)N
->getAsZExtVal();
2644 if (N
->getValueType(0) == MVT::i32
)
2645 return Imm
== (int32_t)N
->getAsZExtVal();
2647 return Imm
== (int64_t)N
->getAsZExtVal();
2649 bool llvm::isIntS16Immediate(SDValue Op
, int16_t &Imm
) {
2650 return isIntS16Immediate(Op
.getNode(), Imm
);
2653 /// Used when computing address flags for selecting loads and stores.
2654 /// If we have an OR, check if the LHS and RHS are provably disjoint.
2655 /// An OR of two provably disjoint values is equivalent to an ADD.
2656 /// Most PPC load/store instructions compute the effective address as a sum,
2657 /// so doing this conversion is useful.
2658 static bool provablyDisjointOr(SelectionDAG
&DAG
, const SDValue
&N
) {
2659 if (N
.getOpcode() != ISD::OR
)
2661 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2662 if (!LHSKnown
.Zero
.getBoolValue())
2664 KnownBits RHSKnown
= DAG
.computeKnownBits(N
.getOperand(1));
2665 return (~(LHSKnown
.Zero
| RHSKnown
.Zero
) == 0);
2668 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2669 /// be represented as an indexed [r+r] operation.
2670 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N
, SDValue
&Base
,
2672 SelectionDAG
&DAG
) const {
2673 for (SDNode
*U
: N
->uses()) {
2674 if (MemSDNode
*Memop
= dyn_cast
<MemSDNode
>(U
)) {
2675 if (Memop
->getMemoryVT() == MVT::f64
) {
2676 Base
= N
.getOperand(0);
2677 Index
= N
.getOperand(1);
2685 /// isIntS34Immediate - This method tests if value of node given can be
2686 /// accurately represented as a sign extension from a 34-bit value. If so,
2687 /// this returns true and the immediate.
2688 bool llvm::isIntS34Immediate(SDNode
*N
, int64_t &Imm
) {
2689 if (!isa
<ConstantSDNode
>(N
))
2692 Imm
= (int64_t)N
->getAsZExtVal();
2693 return isInt
<34>(Imm
);
2695 bool llvm::isIntS34Immediate(SDValue Op
, int64_t &Imm
) {
2696 return isIntS34Immediate(Op
.getNode(), Imm
);
2699 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2700 /// can be represented as an indexed [r+r] operation. Returns false if it
2701 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2702 /// non-zero and N can be represented by a base register plus a signed 16-bit
2703 /// displacement, make a more precise judgement by checking (displacement % \p
2704 /// EncodingAlignment).
2705 bool PPCTargetLowering::SelectAddressRegReg(
2706 SDValue N
, SDValue
&Base
, SDValue
&Index
, SelectionDAG
&DAG
,
2707 MaybeAlign EncodingAlignment
) const {
2708 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2710 if (SelectAddressPCRel(N
, Base
))
2714 if (N
.getOpcode() == ISD::ADD
) {
2715 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2716 // SPE load/store can only handle 8-bit offsets.
2717 if (hasSPE() && SelectAddressEVXRegReg(N
, Base
, Index
, DAG
))
2719 if (isIntS16Immediate(N
.getOperand(1), Imm
) &&
2720 (!EncodingAlignment
|| isAligned(*EncodingAlignment
, Imm
)))
2721 return false; // r+i
2722 if (N
.getOperand(1).getOpcode() == PPCISD::Lo
)
2723 return false; // r+i
2725 Base
= N
.getOperand(0);
2726 Index
= N
.getOperand(1);
2728 } else if (N
.getOpcode() == ISD::OR
) {
2729 if (isIntS16Immediate(N
.getOperand(1), Imm
) &&
2730 (!EncodingAlignment
|| isAligned(*EncodingAlignment
, Imm
)))
2731 return false; // r+i can fold it if we can.
2733 // If this is an or of disjoint bitfields, we can codegen this as an add
2734 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2736 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2738 if (LHSKnown
.Zero
.getBoolValue()) {
2739 KnownBits RHSKnown
= DAG
.computeKnownBits(N
.getOperand(1));
2740 // If all of the bits are known zero on the LHS or RHS, the add won't
2742 if (~(LHSKnown
.Zero
| RHSKnown
.Zero
) == 0) {
2743 Base
= N
.getOperand(0);
2744 Index
= N
.getOperand(1);
2753 // If we happen to be doing an i64 load or store into a stack slot that has
2754 // less than a 4-byte alignment, then the frame-index elimination may need to
2755 // use an indexed load or store instruction (because the offset may not be a
2756 // multiple of 4). The extra register needed to hold the offset comes from the
2757 // register scavenger, and it is possible that the scavenger will need to use
2758 // an emergency spill slot. As a result, we need to make sure that a spill slot
2759 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2761 static void fixupFuncForFI(SelectionDAG
&DAG
, int FrameIdx
, EVT VT
) {
2762 // FIXME: This does not handle the LWA case.
2766 // NOTE: We'll exclude negative FIs here, which come from argument
2767 // lowering, because there are no known test cases triggering this problem
2768 // using packed structures (or similar). We can remove this exclusion if
2769 // we find such a test case. The reason why this is so test-case driven is
2770 // because this entire 'fixup' is only to prevent crashes (from the
2771 // register scavenger) on not-really-valid inputs. For example, if we have:
2773 // %b = bitcast i1* %a to i64*
2774 // store i64* a, i64 b
2775 // then the store should really be marked as 'align 1', but is not. If it
2776 // were marked as 'align 1' then the indexed form would have been
2777 // instruction-selected initially, and the problem this 'fixup' is preventing
2778 // won't happen regardless.
2782 MachineFunction
&MF
= DAG
.getMachineFunction();
2783 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2785 if (MFI
.getObjectAlign(FrameIdx
) >= Align(4))
2788 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
2789 FuncInfo
->setHasNonRISpills();
2792 /// Returns true if the address N can be represented by a base register plus
2793 /// a signed 16-bit displacement [r+imm], and if it is not better
2794 /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2795 /// displacements that are multiples of that value.
2796 bool PPCTargetLowering::SelectAddressRegImm(
2797 SDValue N
, SDValue
&Disp
, SDValue
&Base
, SelectionDAG
&DAG
,
2798 MaybeAlign EncodingAlignment
) const {
2799 // FIXME dl should come from parent load or store, not from address
2802 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2804 if (SelectAddressPCRel(N
, Base
))
2807 // If this can be more profitably realized as r+r, fail.
2808 if (SelectAddressRegReg(N
, Disp
, Base
, DAG
, EncodingAlignment
))
2811 if (N
.getOpcode() == ISD::ADD
) {
2813 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2814 (!EncodingAlignment
|| isAligned(*EncodingAlignment
, imm
))) {
2815 Disp
= DAG
.getTargetConstant(imm
, dl
, N
.getValueType());
2816 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
2817 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2818 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2820 Base
= N
.getOperand(0);
2822 return true; // [r+i]
2823 } else if (N
.getOperand(1).getOpcode() == PPCISD::Lo
) {
2824 // Match LOAD (ADD (X, Lo(G))).
2825 assert(!N
.getOperand(1).getConstantOperandVal(1) &&
2826 "Cannot handle constant offsets yet!");
2827 Disp
= N
.getOperand(1).getOperand(0); // The global address.
2828 assert(Disp
.getOpcode() == ISD::TargetGlobalAddress
||
2829 Disp
.getOpcode() == ISD::TargetGlobalTLSAddress
||
2830 Disp
.getOpcode() == ISD::TargetConstantPool
||
2831 Disp
.getOpcode() == ISD::TargetJumpTable
);
2832 Base
= N
.getOperand(0);
2833 return true; // [&g+r]
2835 } else if (N
.getOpcode() == ISD::OR
) {
2837 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2838 (!EncodingAlignment
|| isAligned(*EncodingAlignment
, imm
))) {
2839 // If this is an or of disjoint bitfields, we can codegen this as an add
2840 // (for better address arithmetic) if the LHS and RHS of the OR are
2841 // provably disjoint.
2842 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2844 if ((LHSKnown
.Zero
.getZExtValue()|~(uint64_t)imm
) == ~0ULL) {
2845 // If all of the bits are known zero on the LHS or RHS, the add won't
2847 if (FrameIndexSDNode
*FI
=
2848 dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
2849 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2850 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2852 Base
= N
.getOperand(0);
2854 Disp
= DAG
.getTargetConstant(imm
, dl
, N
.getValueType());
2858 } else if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
)) {
2859 // Loading from a constant address.
2861 // If this address fits entirely in a 16-bit sext immediate field, codegen
2864 if (isIntS16Immediate(CN
, Imm
) &&
2865 (!EncodingAlignment
|| isAligned(*EncodingAlignment
, Imm
))) {
2866 Disp
= DAG
.getTargetConstant(Imm
, dl
, CN
->getValueType(0));
2867 Base
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
2868 CN
->getValueType(0));
2872 // Handle 32-bit sext immediates with LIS + addr mode.
2873 if ((CN
->getValueType(0) == MVT::i32
||
2874 (int64_t)CN
->getZExtValue() == (int)CN
->getZExtValue()) &&
2875 (!EncodingAlignment
||
2876 isAligned(*EncodingAlignment
, CN
->getZExtValue()))) {
2877 int Addr
= (int)CN
->getZExtValue();
2879 // Otherwise, break this down into an LIS + disp.
2880 Disp
= DAG
.getTargetConstant((short)Addr
, dl
, MVT::i32
);
2882 Base
= DAG
.getTargetConstant((Addr
- (signed short)Addr
) >> 16, dl
,
2884 unsigned Opc
= CN
->getValueType(0) == MVT::i32
? PPC::LIS
: PPC::LIS8
;
2885 Base
= SDValue(DAG
.getMachineNode(Opc
, dl
, CN
->getValueType(0), Base
), 0);
2890 Disp
= DAG
.getTargetConstant(0, dl
, getPointerTy(DAG
.getDataLayout()));
2891 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
)) {
2892 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2893 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2896 return true; // [r+0]
2899 /// Similar to the 16-bit case but for instructions that take a 34-bit
2900 /// displacement field (prefixed loads/stores).
2901 bool PPCTargetLowering::SelectAddressRegImm34(SDValue N
, SDValue
&Disp
,
2903 SelectionDAG
&DAG
) const {
2904 // Only on 64-bit targets.
2905 if (N
.getValueType() != MVT::i64
)
2911 if (N
.getOpcode() == ISD::ADD
) {
2912 if (!isIntS34Immediate(N
.getOperand(1), Imm
))
2914 Disp
= DAG
.getTargetConstant(Imm
, dl
, N
.getValueType());
2915 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0)))
2916 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2918 Base
= N
.getOperand(0);
2922 if (N
.getOpcode() == ISD::OR
) {
2923 if (!isIntS34Immediate(N
.getOperand(1), Imm
))
2925 // If this is an or of disjoint bitfields, we can codegen this as an add
2926 // (for better address arithmetic) if the LHS and RHS of the OR are
2927 // provably disjoint.
2928 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2929 if ((LHSKnown
.Zero
.getZExtValue() | ~(uint64_t)Imm
) != ~0ULL)
2931 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0)))
2932 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2934 Base
= N
.getOperand(0);
2935 Disp
= DAG
.getTargetConstant(Imm
, dl
, N
.getValueType());
2939 if (isIntS34Immediate(N
, Imm
)) { // If the address is a 34-bit const.
2940 Disp
= DAG
.getTargetConstant(Imm
, dl
, N
.getValueType());
2941 Base
= DAG
.getRegister(PPC::ZERO8
, N
.getValueType());
2948 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2949 /// represented as an indexed [r+r] operation.
2950 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N
, SDValue
&Base
,
2952 SelectionDAG
&DAG
) const {
2953 // Check to see if we can easily represent this as an [r+r] address. This
2954 // will fail if it thinks that the address is more profitably represented as
2955 // reg+imm, e.g. where imm = 0.
2956 if (SelectAddressRegReg(N
, Base
, Index
, DAG
))
2959 // If the address is the result of an add, we will utilize the fact that the
2960 // address calculation includes an implicit add. However, we can reduce
2961 // register pressure if we do not materialize a constant just for use as the
2962 // index register. We only get rid of the add if it is not an add of a
2963 // value and a 16-bit signed constant and both have a single use.
2965 if (N
.getOpcode() == ISD::ADD
&&
2966 (!isIntS16Immediate(N
.getOperand(1), imm
) ||
2967 !N
.getOperand(1).hasOneUse() || !N
.getOperand(0).hasOneUse())) {
2968 Base
= N
.getOperand(0);
2969 Index
= N
.getOperand(1);
2973 // Otherwise, do it the hard way, using R0 as the base register.
2974 Base
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
2980 template <typename Ty
> static bool isValidPCRelNode(SDValue N
) {
2981 Ty
*PCRelCand
= dyn_cast
<Ty
>(N
);
2982 return PCRelCand
&& (PPCInstrInfo::hasPCRelFlag(PCRelCand
->getTargetFlags()));
2985 /// Returns true if this address is a PC Relative address.
2986 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2987 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2988 bool PPCTargetLowering::SelectAddressPCRel(SDValue N
, SDValue
&Base
) const {
2989 // This is a materialize PC Relative node. Always select this as PC Relative.
2991 if (N
.getOpcode() == PPCISD::MAT_PCREL_ADDR
)
2993 if (isValidPCRelNode
<ConstantPoolSDNode
>(N
) ||
2994 isValidPCRelNode
<GlobalAddressSDNode
>(N
) ||
2995 isValidPCRelNode
<JumpTableSDNode
>(N
) ||
2996 isValidPCRelNode
<BlockAddressSDNode
>(N
))
3001 /// Returns true if we should use a direct load into vector instruction
3002 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3003 static bool usePartialVectorLoads(SDNode
*N
, const PPCSubtarget
& ST
) {
3005 // If there are any other uses other than scalar to vector, then we should
3006 // keep it as a scalar load -> direct move pattern to prevent multiple
3008 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
);
3012 EVT MemVT
= LD
->getMemoryVT();
3013 if (!MemVT
.isSimple())
3015 switch(MemVT
.getSimpleVT().SimpleTy
) {
3019 if (!ST
.hasP8Vector())
3024 if (!ST
.hasP9Vector())
3031 SDValue
LoadedVal(N
, 0);
3032 if (!LoadedVal
.hasOneUse())
3035 for (SDNode::use_iterator UI
= LD
->use_begin(), UE
= LD
->use_end();
3037 if (UI
.getUse().get().getResNo() == 0 &&
3038 UI
->getOpcode() != ISD::SCALAR_TO_VECTOR
&&
3039 UI
->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED
)
3045 /// getPreIndexedAddressParts - returns true by value, base pointer and
3046 /// offset pointer and addressing mode by reference if the node's address
3047 /// can be legally represented as pre-indexed load / store address.
3048 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
3050 ISD::MemIndexedMode
&AM
,
3051 SelectionDAG
&DAG
) const {
3052 if (DisablePPCPreinc
) return false;
3058 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
3059 Ptr
= LD
->getBasePtr();
3060 VT
= LD
->getMemoryVT();
3061 Alignment
= LD
->getAlign();
3062 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
3063 Ptr
= ST
->getBasePtr();
3064 VT
= ST
->getMemoryVT();
3065 Alignment
= ST
->getAlign();
3070 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3071 // instructions because we can fold these into a more efficient instruction
3072 // instead, (such as LXSD).
3073 if (isLoad
&& usePartialVectorLoads(N
, Subtarget
)) {
3077 // PowerPC doesn't have preinc load/store instructions for vectors
3081 if (SelectAddressRegReg(Ptr
, Base
, Offset
, DAG
)) {
3082 // Common code will reject creating a pre-inc form if the base pointer
3083 // is a frame index, or if N is a store and the base pointer is either
3084 // the same as or a predecessor of the value being stored. Check for
3085 // those situations here, and try with swapped Base/Offset instead.
3088 if (isa
<FrameIndexSDNode
>(Base
) || isa
<RegisterSDNode
>(Base
))
3091 SDValue Val
= cast
<StoreSDNode
>(N
)->getValue();
3092 if (Val
== Base
|| Base
.getNode()->isPredecessorOf(Val
.getNode()))
3097 std::swap(Base
, Offset
);
3103 // LDU/STU can only handle immediates that are a multiple of 4.
3104 if (VT
!= MVT::i64
) {
3105 if (!SelectAddressRegImm(Ptr
, Offset
, Base
, DAG
, std::nullopt
))
3108 // LDU/STU need an address with at least 4-byte alignment.
3109 if (Alignment
< Align(4))
3112 if (!SelectAddressRegImm(Ptr
, Offset
, Base
, DAG
, Align(4)))
3116 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
3117 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3118 // sext i32 to i64 when addr mode is r+i.
3119 if (LD
->getValueType(0) == MVT::i64
&& LD
->getMemoryVT() == MVT::i32
&&
3120 LD
->getExtensionType() == ISD::SEXTLOAD
&&
3121 isa
<ConstantSDNode
>(Offset
))
3129 //===----------------------------------------------------------------------===//
3130 // LowerOperation implementation
3131 //===----------------------------------------------------------------------===//
3133 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
3134 /// and LoOpFlags to the target MO flags.
3135 static void getLabelAccessInfo(bool IsPIC
, const PPCSubtarget
&Subtarget
,
3136 unsigned &HiOpFlags
, unsigned &LoOpFlags
,
3137 const GlobalValue
*GV
= nullptr) {
3138 HiOpFlags
= PPCII::MO_HA
;
3139 LoOpFlags
= PPCII::MO_LO
;
3141 // Don't use the pic base if not in PIC relocation model.
3143 HiOpFlags
= PPCII::MO_PIC_HA_FLAG
;
3144 LoOpFlags
= PPCII::MO_PIC_LO_FLAG
;
3148 static SDValue
LowerLabelRef(SDValue HiPart
, SDValue LoPart
, bool isPIC
,
3149 SelectionDAG
&DAG
) {
3151 EVT PtrVT
= HiPart
.getValueType();
3152 SDValue Zero
= DAG
.getConstant(0, DL
, PtrVT
);
3154 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, DL
, PtrVT
, HiPart
, Zero
);
3155 SDValue Lo
= DAG
.getNode(PPCISD::Lo
, DL
, PtrVT
, LoPart
, Zero
);
3157 // With PIC, the first instruction is actually "GR+hi(&G)".
3159 Hi
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
,
3160 DAG
.getNode(PPCISD::GlobalBaseReg
, DL
, PtrVT
), Hi
);
3162 // Generate non-pic code that has direct accesses to the constant pool.
3163 // The address of the global is just (hi(&g)+lo(&g)).
3164 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Hi
, Lo
);
3167 static void setUsesTOCBasePtr(MachineFunction
&MF
) {
3168 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3169 FuncInfo
->setUsesTOCBasePtr();
3172 static void setUsesTOCBasePtr(SelectionDAG
&DAG
) {
3173 setUsesTOCBasePtr(DAG
.getMachineFunction());
3176 SDValue
PPCTargetLowering::getTOCEntry(SelectionDAG
&DAG
, const SDLoc
&dl
,
3178 const bool Is64Bit
= Subtarget
.isPPC64();
3179 EVT VT
= Is64Bit
? MVT::i64
: MVT::i32
;
3180 SDValue Reg
= Is64Bit
? DAG
.getRegister(PPC::X2
, VT
)
3181 : Subtarget
.isAIXABI()
3182 ? DAG
.getRegister(PPC::R2
, VT
)
3183 : DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, VT
);
3184 SDValue Ops
[] = { GA
, Reg
};
3185 return DAG
.getMemIntrinsicNode(
3186 PPCISD::TOC_ENTRY
, dl
, DAG
.getVTList(VT
, MVT::Other
), Ops
, VT
,
3187 MachinePointerInfo::getGOT(DAG
.getMachineFunction()), std::nullopt
,
3188 MachineMemOperand::MOLoad
);
3191 SDValue
PPCTargetLowering::LowerConstantPool(SDValue Op
,
3192 SelectionDAG
&DAG
) const {
3193 EVT PtrVT
= Op
.getValueType();
3194 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
3195 const Constant
*C
= CP
->getConstVal();
3197 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3198 // The actual address of the GlobalValue is stored in the TOC.
3199 if (Subtarget
.is64BitELFABI() || Subtarget
.isAIXABI()) {
3200 if (Subtarget
.isUsingPCRelativeCalls()) {
3202 EVT Ty
= getPointerTy(DAG
.getDataLayout());
3203 SDValue ConstPool
= DAG
.getTargetConstantPool(
3204 C
, Ty
, CP
->getAlign(), CP
->getOffset(), PPCII::MO_PCREL_FLAG
);
3205 return DAG
.getNode(PPCISD::MAT_PCREL_ADDR
, DL
, Ty
, ConstPool
);
3207 setUsesTOCBasePtr(DAG
);
3208 SDValue GA
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlign(), 0);
3209 return getTOCEntry(DAG
, SDLoc(CP
), GA
);
3212 unsigned MOHiFlag
, MOLoFlag
;
3213 bool IsPIC
= isPositionIndependent();
3214 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
3216 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
3218 DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlign(), PPCII::MO_PIC_FLAG
);
3219 return getTOCEntry(DAG
, SDLoc(CP
), GA
);
3223 DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlign(), 0, MOHiFlag
);
3225 DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlign(), 0, MOLoFlag
);
3226 return LowerLabelRef(CPIHi
, CPILo
, IsPIC
, DAG
);
3229 // For 64-bit PowerPC, prefer the more compact relative encodings.
3230 // This trades 32 bits per jump table entry for one or two instructions
3231 // on the jump site.
3232 unsigned PPCTargetLowering::getJumpTableEncoding() const {
3233 if (isJumpTableRelative())
3234 return MachineJumpTableInfo::EK_LabelDifference32
;
3236 return TargetLowering::getJumpTableEncoding();
3239 bool PPCTargetLowering::isJumpTableRelative() const {
3240 if (UseAbsoluteJumpTables
)
3242 if (Subtarget
.isPPC64() || Subtarget
.isAIXABI())
3244 return TargetLowering::isJumpTableRelative();
3247 SDValue
PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table
,
3248 SelectionDAG
&DAG
) const {
3249 if (!Subtarget
.isPPC64() || Subtarget
.isAIXABI())
3250 return TargetLowering::getPICJumpTableRelocBase(Table
, DAG
);
3252 switch (getTargetMachine().getCodeModel()) {
3253 case CodeModel::Small
:
3254 case CodeModel::Medium
:
3255 return TargetLowering::getPICJumpTableRelocBase(Table
, DAG
);
3257 return DAG
.getNode(PPCISD::GlobalBaseReg
, SDLoc(),
3258 getPointerTy(DAG
.getDataLayout()));
3263 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction
*MF
,
3265 MCContext
&Ctx
) const {
3266 if (!Subtarget
.isPPC64() || Subtarget
.isAIXABI())
3267 return TargetLowering::getPICJumpTableRelocBaseExpr(MF
, JTI
, Ctx
);
3269 switch (getTargetMachine().getCodeModel()) {
3270 case CodeModel::Small
:
3271 case CodeModel::Medium
:
3272 return TargetLowering::getPICJumpTableRelocBaseExpr(MF
, JTI
, Ctx
);
3274 return MCSymbolRefExpr::create(MF
->getPICBaseSymbol(), Ctx
);
3278 SDValue
PPCTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const {
3279 EVT PtrVT
= Op
.getValueType();
3280 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
3282 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3283 if (Subtarget
.isUsingPCRelativeCalls()) {
3285 EVT Ty
= getPointerTy(DAG
.getDataLayout());
3287 DAG
.getTargetJumpTable(JT
->getIndex(), Ty
, PPCII::MO_PCREL_FLAG
);
3288 SDValue MatAddr
= DAG
.getNode(PPCISD::MAT_PCREL_ADDR
, DL
, Ty
, GA
);
3292 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3293 // The actual address of the GlobalValue is stored in the TOC.
3294 if (Subtarget
.is64BitELFABI() || Subtarget
.isAIXABI()) {
3295 setUsesTOCBasePtr(DAG
);
3296 SDValue GA
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
3297 return getTOCEntry(DAG
, SDLoc(JT
), GA
);
3300 unsigned MOHiFlag
, MOLoFlag
;
3301 bool IsPIC
= isPositionIndependent();
3302 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
3304 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
3305 SDValue GA
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
,
3306 PPCII::MO_PIC_FLAG
);
3307 return getTOCEntry(DAG
, SDLoc(GA
), GA
);
3310 SDValue JTIHi
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
, MOHiFlag
);
3311 SDValue JTILo
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
, MOLoFlag
);
3312 return LowerLabelRef(JTIHi
, JTILo
, IsPIC
, DAG
);
3315 SDValue
PPCTargetLowering::LowerBlockAddress(SDValue Op
,
3316 SelectionDAG
&DAG
) const {
3317 EVT PtrVT
= Op
.getValueType();
3318 BlockAddressSDNode
*BASDN
= cast
<BlockAddressSDNode
>(Op
);
3319 const BlockAddress
*BA
= BASDN
->getBlockAddress();
3321 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3322 if (Subtarget
.isUsingPCRelativeCalls()) {
3324 EVT Ty
= getPointerTy(DAG
.getDataLayout());
3325 SDValue GA
= DAG
.getTargetBlockAddress(BA
, Ty
, BASDN
->getOffset(),
3326 PPCII::MO_PCREL_FLAG
);
3327 SDValue MatAddr
= DAG
.getNode(PPCISD::MAT_PCREL_ADDR
, DL
, Ty
, GA
);
3331 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3332 // The actual BlockAddress is stored in the TOC.
3333 if (Subtarget
.is64BitELFABI() || Subtarget
.isAIXABI()) {
3334 setUsesTOCBasePtr(DAG
);
3335 SDValue GA
= DAG
.getTargetBlockAddress(BA
, PtrVT
, BASDN
->getOffset());
3336 return getTOCEntry(DAG
, SDLoc(BASDN
), GA
);
3339 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3340 if (Subtarget
.is32BitELFABI() && isPositionIndependent())
3343 DAG
.getTargetBlockAddress(BA
, PtrVT
, BASDN
->getOffset()));
3345 unsigned MOHiFlag
, MOLoFlag
;
3346 bool IsPIC
= isPositionIndependent();
3347 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
3348 SDValue TgtBAHi
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, MOHiFlag
);
3349 SDValue TgtBALo
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, MOLoFlag
);
3350 return LowerLabelRef(TgtBAHi
, TgtBALo
, IsPIC
, DAG
);
3353 SDValue
PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
3354 SelectionDAG
&DAG
) const {
3355 if (Subtarget
.isAIXABI())
3356 return LowerGlobalTLSAddressAIX(Op
, DAG
);
3358 return LowerGlobalTLSAddressLinux(Op
, DAG
);
3361 /// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3362 /// and then apply the update.
3363 static void updateForAIXShLibTLSModelOpt(TLSModel::Model
&Model
,
3365 const TargetMachine
&TM
) {
3366 // Initialize TLS model opt setting lazily:
3367 // (1) Use initial-exec for single TLS var references within current function.
3368 // (2) Use local-dynamic for multiple TLS var references within current
3370 PPCFunctionInfo
*FuncInfo
=
3371 DAG
.getMachineFunction().getInfo
<PPCFunctionInfo
>();
3372 if (!FuncInfo
->isAIXFuncTLSModelOptInitDone()) {
3373 SmallPtrSet
<const GlobalValue
*, 8> TLSGV
;
3374 // Iterate over all instructions within current function, collect all TLS
3375 // global variables (global variables taken as the first parameter to
3376 // Intrinsic::threadlocal_address).
3377 const Function
&Func
= DAG
.getMachineFunction().getFunction();
3378 for (Function::const_iterator BI
= Func
.begin(), BE
= Func
.end(); BI
!= BE
;
3380 for (BasicBlock::const_iterator II
= BI
->begin(), IE
= BI
->end();
3382 if (II
->getOpcode() == Instruction::Call
)
3383 if (const CallInst
*CI
= dyn_cast
<const CallInst
>(&*II
))
3384 if (Function
*CF
= CI
->getCalledFunction())
3385 if (CF
->isDeclaration() &&
3386 CF
->getIntrinsicID() == Intrinsic::threadlocal_address
)
3387 if (const GlobalValue
*GV
=
3388 dyn_cast
<GlobalValue
>(II
->getOperand(0))) {
3389 TLSModel::Model GVModel
= TM
.getTLSModel(GV
);
3390 if (GVModel
== TLSModel::LocalDynamic
)
3394 unsigned TLSGVCnt
= TLSGV
.size();
3395 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt
));
3396 if (TLSGVCnt
<= PPCAIXTLSModelOptUseIEForLDLimit
)
3397 FuncInfo
->setAIXFuncUseTLSIEForLD();
3398 FuncInfo
->setAIXFuncTLSModelOptInitDone();
3401 if (FuncInfo
->isAIXFuncUseTLSIEForLD()) {
3403 dbgs() << DAG
.getMachineFunction().getName()
3404 << " function is using the TLS-IE model for TLS-LD access.\n");
3405 Model
= TLSModel::InitialExec
;
3409 SDValue
PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op
,
3410 SelectionDAG
&DAG
) const {
3411 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
3413 if (DAG
.getTarget().useEmulatedTLS())
3414 report_fatal_error("Emulated TLS is not yet supported on AIX");
3417 const GlobalValue
*GV
= GA
->getGlobal();
3418 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3419 bool Is64Bit
= Subtarget
.isPPC64();
3420 TLSModel::Model Model
= getTargetMachine().getTLSModel(GV
);
3422 // Apply update to the TLS model.
3423 if (Subtarget
.hasAIXShLibTLSModelOpt())
3424 updateForAIXShLibTLSModelOpt(Model
, DAG
, getTargetMachine());
3426 bool IsTLSLocalExecModel
= Model
== TLSModel::LocalExec
;
3428 if (IsTLSLocalExecModel
|| Model
== TLSModel::InitialExec
) {
3429 bool HasAIXSmallLocalExecTLS
= Subtarget
.hasAIXSmallLocalExecTLS();
3430 bool HasAIXSmallTLSGlobalAttr
= false;
3431 SDValue VariableOffsetTGA
=
3432 DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, PPCII::MO_TPREL_FLAG
);
3433 SDValue VariableOffset
= getTOCEntry(DAG
, dl
, VariableOffsetTGA
);
3436 if (const GlobalVariable
*GVar
= dyn_cast
<GlobalVariable
>(GV
))
3437 if (GVar
->hasAttribute("aix-small-tls"))
3438 HasAIXSmallTLSGlobalAttr
= true;
3441 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3442 // involves a load of the variable offset (from the TOC), followed by an
3443 // add of the loaded variable offset to R13 (the thread pointer).
3444 // This code sequence looks like:
3445 // ld reg1,var[TC](2)
3446 // add reg2, reg1, r13 // r13 contains the thread pointer
3447 TLSReg
= DAG
.getRegister(PPC::X13
, MVT::i64
);
3449 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3450 // global variable attribute, produce a faster access sequence for
3451 // local-exec TLS variables where the offset from the TLS base is encoded
3452 // as an immediate operand.
3454 // We only utilize the faster local-exec access sequence when the TLS
3455 // variable has a size within the policy limit. We treat types that are
3456 // not sized or are empty as being over the policy size limit.
3457 if ((HasAIXSmallLocalExecTLS
|| HasAIXSmallTLSGlobalAttr
) &&
3458 IsTLSLocalExecModel
) {
3459 Type
*GVType
= GV
->getValueType();
3460 if (GVType
->isSized() && !GVType
->isEmptyTy() &&
3461 GV
->getDataLayout().getTypeAllocSize(GVType
) <=
3462 AIXSmallTlsPolicySizeLimit
)
3463 return DAG
.getNode(PPCISD::Lo
, dl
, PtrVT
, VariableOffsetTGA
, TLSReg
);
3466 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3467 // involves loading the variable offset from the TOC, generating a call to
3468 // .__get_tpointer to get the thread pointer (which will be in R3), and
3469 // adding the two together:
3470 // lwz reg1,var[TC](2)
3471 // bla .__get_tpointer
3472 // add reg2, reg1, r3
3473 TLSReg
= DAG
.getNode(PPCISD::GET_TPOINTER
, dl
, PtrVT
);
3475 // We do not implement the 32-bit version of the faster access sequence
3476 // for local-exec that is controlled by the -maix-small-local-exec-tls
3477 // option, or the "aix-small-tls" global variable attribute.
3478 if (HasAIXSmallLocalExecTLS
|| HasAIXSmallTLSGlobalAttr
)
3479 report_fatal_error("The small-local-exec TLS access sequence is "
3480 "currently only supported on AIX (64-bit mode).");
3482 return DAG
.getNode(PPCISD::ADD_TLS
, dl
, PtrVT
, TLSReg
, VariableOffset
);
3485 if (Model
== TLSModel::LocalDynamic
) {
3486 bool HasAIXSmallLocalDynamicTLS
= Subtarget
.hasAIXSmallLocalDynamicTLS();
3488 // We do not implement the 32-bit version of the faster access sequence
3489 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3490 if (!Is64Bit
&& HasAIXSmallLocalDynamicTLS
)
3491 report_fatal_error("The small-local-dynamic TLS access sequence is "
3492 "currently only supported on AIX (64-bit mode).");
3494 // For local-dynamic on AIX, we need to generate one TOC entry for each
3495 // variable offset, and a single module-handle TOC entry for the entire
3498 SDValue VariableOffsetTGA
=
3499 DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, PPCII::MO_TLSLD_FLAG
);
3500 SDValue VariableOffset
= getTOCEntry(DAG
, dl
, VariableOffsetTGA
);
3502 Module
*M
= DAG
.getMachineFunction().getFunction().getParent();
3503 GlobalVariable
*TLSGV
=
3504 dyn_cast_or_null
<GlobalVariable
>(M
->getOrInsertGlobal(
3505 StringRef("_$TLSML"), PointerType::getUnqual(*DAG
.getContext())));
3506 TLSGV
->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel
);
3507 assert(TLSGV
&& "Not able to create GV for _$TLSML.");
3508 SDValue ModuleHandleTGA
=
3509 DAG
.getTargetGlobalAddress(TLSGV
, dl
, PtrVT
, 0, PPCII::MO_TLSLDM_FLAG
);
3510 SDValue ModuleHandleTOC
= getTOCEntry(DAG
, dl
, ModuleHandleTGA
);
3511 SDValue ModuleHandle
=
3512 DAG
.getNode(PPCISD::TLSLD_AIX
, dl
, PtrVT
, ModuleHandleTOC
);
3514 // With the -maix-small-local-dynamic-tls option, produce a faster access
3515 // sequence for local-dynamic TLS variables where the offset from the
3516 // module-handle is encoded as an immediate operand.
3518 // We only utilize the faster local-dynamic access sequence when the TLS
3519 // variable has a size within the policy limit. We treat types that are
3520 // not sized or are empty as being over the policy size limit.
3521 if (HasAIXSmallLocalDynamicTLS
) {
3522 Type
*GVType
= GV
->getValueType();
3523 if (GVType
->isSized() && !GVType
->isEmptyTy() &&
3524 GV
->getDataLayout().getTypeAllocSize(GVType
) <=
3525 AIXSmallTlsPolicySizeLimit
)
3526 return DAG
.getNode(PPCISD::Lo
, dl
, PtrVT
, VariableOffsetTGA
,
3530 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, ModuleHandle
, VariableOffset
);
3533 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3534 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3535 // need to generate two TOC entries, one for the variable offset, one for the
3536 // region handle. The global address for the TOC entry of the region handle is
3537 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3538 // entry of the variable offset is created with MO_TLSGD_FLAG.
3539 SDValue VariableOffsetTGA
=
3540 DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, PPCII::MO_TLSGD_FLAG
);
3541 SDValue RegionHandleTGA
=
3542 DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, PPCII::MO_TLSGDM_FLAG
);
3543 SDValue VariableOffset
= getTOCEntry(DAG
, dl
, VariableOffsetTGA
);
3544 SDValue RegionHandle
= getTOCEntry(DAG
, dl
, RegionHandleTGA
);
3545 return DAG
.getNode(PPCISD::TLSGD_AIX
, dl
, PtrVT
, VariableOffset
,
3549 SDValue
PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op
,
3550 SelectionDAG
&DAG
) const {
3551 // FIXME: TLS addresses currently use medium model code sequences,
3552 // which is the most useful form. Eventually support for small and
3553 // large models could be added if users need it, at the cost of
3554 // additional complexity.
3555 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
3556 if (DAG
.getTarget().useEmulatedTLS())
3557 return LowerToTLSEmulatedModel(GA
, DAG
);
3560 const GlobalValue
*GV
= GA
->getGlobal();
3561 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3562 bool is64bit
= Subtarget
.isPPC64();
3563 const Module
*M
= DAG
.getMachineFunction().getFunction().getParent();
3564 PICLevel::Level picLevel
= M
->getPICLevel();
3566 const TargetMachine
&TM
= getTargetMachine();
3567 TLSModel::Model Model
= TM
.getTLSModel(GV
);
3569 if (Model
== TLSModel::LocalExec
) {
3570 if (Subtarget
.isUsingPCRelativeCalls()) {
3571 SDValue TLSReg
= DAG
.getRegister(PPC::X13
, MVT::i64
);
3572 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
3573 PPCII::MO_TPREL_PCREL_FLAG
);
3575 DAG
.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR
, dl
, PtrVT
, TGA
);
3576 return DAG
.getNode(PPCISD::ADD_TLS
, dl
, PtrVT
, TLSReg
, MatAddr
);
3579 SDValue TGAHi
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
3580 PPCII::MO_TPREL_HA
);
3581 SDValue TGALo
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
3582 PPCII::MO_TPREL_LO
);
3583 SDValue TLSReg
= is64bit
? DAG
.getRegister(PPC::X13
, MVT::i64
)
3584 : DAG
.getRegister(PPC::R2
, MVT::i32
);
3586 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, dl
, PtrVT
, TGAHi
, TLSReg
);
3587 return DAG
.getNode(PPCISD::Lo
, dl
, PtrVT
, TGALo
, Hi
);
3590 if (Model
== TLSModel::InitialExec
) {
3591 bool IsPCRel
= Subtarget
.isUsingPCRelativeCalls();
3592 SDValue TGA
= DAG
.getTargetGlobalAddress(
3593 GV
, dl
, PtrVT
, 0, IsPCRel
? PPCII::MO_GOT_TPREL_PCREL_FLAG
: 0);
3594 SDValue TGATLS
= DAG
.getTargetGlobalAddress(
3595 GV
, dl
, PtrVT
, 0, IsPCRel
? PPCII::MO_TLS_PCREL_FLAG
: PPCII::MO_TLS
);
3598 SDValue MatPCRel
= DAG
.getNode(PPCISD::MAT_PCREL_ADDR
, dl
, PtrVT
, TGA
);
3599 TPOffset
= DAG
.getLoad(MVT::i64
, dl
, DAG
.getEntryNode(), MatPCRel
,
3600 MachinePointerInfo());
3604 setUsesTOCBasePtr(DAG
);
3605 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
3607 DAG
.getNode(PPCISD::ADDIS_GOT_TPREL_HA
, dl
, PtrVT
, GOTReg
, TGA
);
3609 if (!TM
.isPositionIndependent())
3610 GOTPtr
= DAG
.getNode(PPCISD::PPC32_GOT
, dl
, PtrVT
);
3611 else if (picLevel
== PICLevel::SmallPIC
)
3612 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
3614 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
3616 TPOffset
= DAG
.getNode(PPCISD::LD_GOT_TPREL_L
, dl
, PtrVT
, TGA
, GOTPtr
);
3618 return DAG
.getNode(PPCISD::ADD_TLS
, dl
, PtrVT
, TPOffset
, TGATLS
);
3621 if (Model
== TLSModel::GeneralDynamic
) {
3622 if (Subtarget
.isUsingPCRelativeCalls()) {
3623 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
3624 PPCII::MO_GOT_TLSGD_PCREL_FLAG
);
3625 return DAG
.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR
, dl
, PtrVT
, TGA
);
3628 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
3631 setUsesTOCBasePtr(DAG
);
3632 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
3633 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_TLSGD_HA
, dl
, PtrVT
,
3636 if (picLevel
== PICLevel::SmallPIC
)
3637 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
3639 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
3641 return DAG
.getNode(PPCISD::ADDI_TLSGD_L_ADDR
, dl
, PtrVT
,
3645 if (Model
== TLSModel::LocalDynamic
) {
3646 if (Subtarget
.isUsingPCRelativeCalls()) {
3647 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
3648 PPCII::MO_GOT_TLSLD_PCREL_FLAG
);
3650 DAG
.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR
, dl
, PtrVT
, TGA
);
3651 return DAG
.getNode(PPCISD::PADDI_DTPREL
, dl
, PtrVT
, MatPCRel
, TGA
);
3654 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
3657 setUsesTOCBasePtr(DAG
);
3658 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
3659 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_TLSLD_HA
, dl
, PtrVT
,
3662 if (picLevel
== PICLevel::SmallPIC
)
3663 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
3665 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
3667 SDValue TLSAddr
= DAG
.getNode(PPCISD::ADDI_TLSLD_L_ADDR
, dl
,
3668 PtrVT
, GOTPtr
, TGA
, TGA
);
3669 SDValue DtvOffsetHi
= DAG
.getNode(PPCISD::ADDIS_DTPREL_HA
, dl
,
3670 PtrVT
, TLSAddr
, TGA
);
3671 return DAG
.getNode(PPCISD::ADDI_DTPREL_L
, dl
, PtrVT
, DtvOffsetHi
, TGA
);
3674 llvm_unreachable("Unknown TLS model!");
3677 SDValue
PPCTargetLowering::LowerGlobalAddress(SDValue Op
,
3678 SelectionDAG
&DAG
) const {
3679 EVT PtrVT
= Op
.getValueType();
3680 GlobalAddressSDNode
*GSDN
= cast
<GlobalAddressSDNode
>(Op
);
3682 const GlobalValue
*GV
= GSDN
->getGlobal();
3684 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3685 // The actual address of the GlobalValue is stored in the TOC.
3686 if (Subtarget
.is64BitELFABI() || Subtarget
.isAIXABI()) {
3687 if (Subtarget
.isUsingPCRelativeCalls()) {
3688 EVT Ty
= getPointerTy(DAG
.getDataLayout());
3689 if (isAccessedAsGotIndirect(Op
)) {
3690 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, GSDN
->getOffset(),
3691 PPCII::MO_GOT_PCREL_FLAG
);
3692 SDValue MatPCRel
= DAG
.getNode(PPCISD::MAT_PCREL_ADDR
, DL
, Ty
, GA
);
3693 SDValue Load
= DAG
.getLoad(MVT::i64
, DL
, DAG
.getEntryNode(), MatPCRel
,
3694 MachinePointerInfo());
3697 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, Ty
, GSDN
->getOffset(),
3698 PPCII::MO_PCREL_FLAG
);
3699 return DAG
.getNode(PPCISD::MAT_PCREL_ADDR
, DL
, Ty
, GA
);
3702 setUsesTOCBasePtr(DAG
);
3703 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset());
3704 return getTOCEntry(DAG
, DL
, GA
);
3707 unsigned MOHiFlag
, MOLoFlag
;
3708 bool IsPIC
= isPositionIndependent();
3709 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
, GV
);
3711 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
3712 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
,
3714 PPCII::MO_PIC_FLAG
);
3715 return getTOCEntry(DAG
, DL
, GA
);
3719 DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset(), MOHiFlag
);
3721 DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset(), MOLoFlag
);
3723 return LowerLabelRef(GAHi
, GALo
, IsPIC
, DAG
);
3726 SDValue
PPCTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const {
3727 bool IsStrict
= Op
->isStrictFPOpcode();
3729 cast
<CondCodeSDNode
>(Op
.getOperand(IsStrict
? 3 : 2))->get();
3730 SDValue LHS
= Op
.getOperand(IsStrict
? 1 : 0);
3731 SDValue RHS
= Op
.getOperand(IsStrict
? 2 : 1);
3732 SDValue Chain
= IsStrict
? Op
.getOperand(0) : SDValue();
3733 EVT LHSVT
= LHS
.getValueType();
3736 // Soften the setcc with libcall if it is fp128.
3737 if (LHSVT
== MVT::f128
) {
3738 assert(!Subtarget
.hasP9Vector() &&
3739 "SETCC for f128 is already legal under Power9!");
3740 softenSetCCOperands(DAG
, LHSVT
, LHS
, RHS
, CC
, dl
, LHS
, RHS
, Chain
,
3741 Op
->getOpcode() == ISD::STRICT_FSETCCS
);
3743 LHS
= DAG
.getNode(ISD::SETCC
, dl
, Op
.getValueType(), LHS
, RHS
,
3744 DAG
.getCondCode(CC
));
3746 return DAG
.getMergeValues({LHS
, Chain
}, dl
);
3750 assert(!IsStrict
&& "Don't know how to handle STRICT_FSETCC!");
3752 if (Op
.getValueType() == MVT::v2i64
) {
3753 // When the operands themselves are v2i64 values, we need to do something
3754 // special because VSX has no underlying comparison operations for these.
3755 if (LHS
.getValueType() == MVT::v2i64
) {
3756 // Equality can be handled by casting to the legal type for Altivec
3757 // comparisons, everything else needs to be expanded.
3758 if (CC
!= ISD::SETEQ
&& CC
!= ISD::SETNE
)
3760 SDValue SetCC32
= DAG
.getSetCC(
3761 dl
, MVT::v4i32
, DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, LHS
),
3762 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, RHS
), CC
);
3763 int ShuffV
[] = {1, 0, 3, 2};
3765 DAG
.getVectorShuffle(MVT::v4i32
, dl
, SetCC32
, SetCC32
, ShuffV
);
3766 return DAG
.getBitcast(MVT::v2i64
,
3767 DAG
.getNode(CC
== ISD::SETEQ
? ISD::AND
: ISD::OR
,
3768 dl
, MVT::v4i32
, Shuff
, SetCC32
));
3771 // We handle most of these in the usual way.
3775 // If we're comparing for equality to zero, expose the fact that this is
3776 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3777 // fold the new nodes.
3778 if (SDValue V
= lowerCmpEqZeroToCtlzSrl(Op
, DAG
))
3781 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(RHS
)) {
3782 // Leave comparisons against 0 and -1 alone for now, since they're usually
3783 // optimized. FIXME: revisit this when we can custom lower all setcc
3785 if (C
->isAllOnes() || C
->isZero())
3789 // If we have an integer seteq/setne, turn it into a compare against zero
3790 // by xor'ing the rhs with the lhs, which is faster than setting a
3791 // condition register, reading it back out, and masking the correct bit. The
3792 // normal approach here uses sub to do this instead of xor. Using xor exposes
3793 // the result to other bit-twiddling opportunities.
3794 if (LHSVT
.isInteger() && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
)) {
3795 EVT VT
= Op
.getValueType();
3796 SDValue Sub
= DAG
.getNode(ISD::XOR
, dl
, LHSVT
, LHS
, RHS
);
3797 return DAG
.getSetCC(dl
, VT
, Sub
, DAG
.getConstant(0, dl
, LHSVT
), CC
);
3802 SDValue
PPCTargetLowering::LowerVAARG(SDValue Op
, SelectionDAG
&DAG
) const {
3803 SDNode
*Node
= Op
.getNode();
3804 EVT VT
= Node
->getValueType(0);
3805 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3806 SDValue InChain
= Node
->getOperand(0);
3807 SDValue VAListPtr
= Node
->getOperand(1);
3808 const Value
*SV
= cast
<SrcValueSDNode
>(Node
->getOperand(2))->getValue();
3811 assert(!Subtarget
.isPPC64() && "LowerVAARG is PPC32 only");
3814 SDValue GprIndex
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, MVT::i32
, InChain
,
3815 VAListPtr
, MachinePointerInfo(SV
), MVT::i8
);
3816 InChain
= GprIndex
.getValue(1);
3818 if (VT
== MVT::i64
) {
3819 // Check if GprIndex is even
3820 SDValue GprAnd
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, GprIndex
,
3821 DAG
.getConstant(1, dl
, MVT::i32
));
3822 SDValue CC64
= DAG
.getSetCC(dl
, MVT::i32
, GprAnd
,
3823 DAG
.getConstant(0, dl
, MVT::i32
), ISD::SETNE
);
3824 SDValue GprIndexPlusOne
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, GprIndex
,
3825 DAG
.getConstant(1, dl
, MVT::i32
));
3826 // Align GprIndex to be even if it isn't
3827 GprIndex
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, CC64
, GprIndexPlusOne
,
3831 // fpr index is 1 byte after gpr
3832 SDValue FprPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3833 DAG
.getConstant(1, dl
, MVT::i32
));
3836 SDValue FprIndex
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, MVT::i32
, InChain
,
3837 FprPtr
, MachinePointerInfo(SV
), MVT::i8
);
3838 InChain
= FprIndex
.getValue(1);
3840 SDValue RegSaveAreaPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3841 DAG
.getConstant(8, dl
, MVT::i32
));
3843 SDValue OverflowAreaPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3844 DAG
.getConstant(4, dl
, MVT::i32
));
3847 SDValue OverflowArea
=
3848 DAG
.getLoad(MVT::i32
, dl
, InChain
, OverflowAreaPtr
, MachinePointerInfo());
3849 InChain
= OverflowArea
.getValue(1);
3851 SDValue RegSaveArea
=
3852 DAG
.getLoad(MVT::i32
, dl
, InChain
, RegSaveAreaPtr
, MachinePointerInfo());
3853 InChain
= RegSaveArea
.getValue(1);
3855 // select overflow_area if index > 8
3856 SDValue CC
= DAG
.getSetCC(dl
, MVT::i32
, VT
.isInteger() ? GprIndex
: FprIndex
,
3857 DAG
.getConstant(8, dl
, MVT::i32
), ISD::SETLT
);
3859 // adjustment constant gpr_index * 4/8
3860 SDValue RegConstant
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
,
3861 VT
.isInteger() ? GprIndex
: FprIndex
,
3862 DAG
.getConstant(VT
.isInteger() ? 4 : 8, dl
,
3865 // OurReg = RegSaveArea + RegConstant
3866 SDValue OurReg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, RegSaveArea
,
3869 // Floating types are 32 bytes into RegSaveArea
3870 if (VT
.isFloatingPoint())
3871 OurReg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, OurReg
,
3872 DAG
.getConstant(32, dl
, MVT::i32
));
3874 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3875 SDValue IndexPlus1
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
,
3876 VT
.isInteger() ? GprIndex
: FprIndex
,
3877 DAG
.getConstant(VT
== MVT::i64
? 2 : 1, dl
,
3880 InChain
= DAG
.getTruncStore(InChain
, dl
, IndexPlus1
,
3881 VT
.isInteger() ? VAListPtr
: FprPtr
,
3882 MachinePointerInfo(SV
), MVT::i8
);
3884 // determine if we should load from reg_save_area or overflow_area
3885 SDValue Result
= DAG
.getNode(ISD::SELECT
, dl
, PtrVT
, CC
, OurReg
, OverflowArea
);
3887 // increase overflow_area by 4/8 if gpr/fpr > 8
3888 SDValue OverflowAreaPlusN
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, OverflowArea
,
3889 DAG
.getConstant(VT
.isInteger() ? 4 : 8,
3892 OverflowArea
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, CC
, OverflowArea
,
3895 InChain
= DAG
.getTruncStore(InChain
, dl
, OverflowArea
, OverflowAreaPtr
,
3896 MachinePointerInfo(), MVT::i32
);
3898 return DAG
.getLoad(VT
, dl
, InChain
, Result
, MachinePointerInfo());
3901 SDValue
PPCTargetLowering::LowerVACOPY(SDValue Op
, SelectionDAG
&DAG
) const {
3902 assert(!Subtarget
.isPPC64() && "LowerVACOPY is PPC32 only");
3904 // We have to copy the entire va_list struct:
3905 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3906 return DAG
.getMemcpy(Op
.getOperand(0), Op
, Op
.getOperand(1), Op
.getOperand(2),
3907 DAG
.getConstant(12, SDLoc(Op
), MVT::i32
), Align(8),
3908 false, true, /*CI=*/nullptr, std::nullopt
,
3909 MachinePointerInfo(), MachinePointerInfo());
3912 SDValue
PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op
,
3913 SelectionDAG
&DAG
) const {
3914 if (Subtarget
.isAIXABI())
3915 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3917 return Op
.getOperand(0);
3920 SDValue
PPCTargetLowering::LowerINLINEASM(SDValue Op
, SelectionDAG
&DAG
) const {
3921 MachineFunction
&MF
= DAG
.getMachineFunction();
3922 PPCFunctionInfo
&MFI
= *MF
.getInfo
<PPCFunctionInfo
>();
3924 assert((Op
.getOpcode() == ISD::INLINEASM
||
3925 Op
.getOpcode() == ISD::INLINEASM_BR
) &&
3926 "Expecting Inline ASM node.");
3928 // If an LR store is already known to be required then there is not point in
3929 // checking this ASM as well.
3930 if (MFI
.isLRStoreRequired())
3933 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3934 // type MVT::Glue. We want to ignore this last operand if that is the case.
3935 unsigned NumOps
= Op
.getNumOperands();
3936 if (Op
.getOperand(NumOps
- 1).getValueType() == MVT::Glue
)
3939 // Check all operands that may contain the LR.
3940 for (unsigned i
= InlineAsm::Op_FirstOperand
; i
!= NumOps
;) {
3941 const InlineAsm::Flag
Flags(Op
.getConstantOperandVal(i
));
3942 unsigned NumVals
= Flags
.getNumOperandRegisters();
3943 ++i
; // Skip the ID value.
3945 switch (Flags
.getKind()) {
3947 llvm_unreachable("Bad flags!");
3948 case InlineAsm::Kind::RegUse
:
3949 case InlineAsm::Kind::Imm
:
3950 case InlineAsm::Kind::Mem
:
3953 case InlineAsm::Kind::Clobber
:
3954 case InlineAsm::Kind::RegDef
:
3955 case InlineAsm::Kind::RegDefEarlyClobber
: {
3956 for (; NumVals
; --NumVals
, ++i
) {
3957 Register Reg
= cast
<RegisterSDNode
>(Op
.getOperand(i
))->getReg();
3958 if (Reg
!= PPC::LR
&& Reg
!= PPC::LR8
)
3960 MFI
.setLRStoreRequired();
3971 SDValue
PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op
,
3972 SelectionDAG
&DAG
) const {
3973 if (Subtarget
.isAIXABI())
3974 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3976 SDValue Chain
= Op
.getOperand(0);
3977 SDValue Trmp
= Op
.getOperand(1); // trampoline
3978 SDValue FPtr
= Op
.getOperand(2); // nested function
3979 SDValue Nest
= Op
.getOperand(3); // 'nest' parameter value
3982 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3983 bool isPPC64
= (PtrVT
== MVT::i64
);
3984 Type
*IntPtrTy
= DAG
.getDataLayout().getIntPtrType(*DAG
.getContext());
3986 TargetLowering::ArgListTy Args
;
3987 TargetLowering::ArgListEntry Entry
;
3989 Entry
.Ty
= IntPtrTy
;
3990 Entry
.Node
= Trmp
; Args
.push_back(Entry
);
3992 // TrampSize == (isPPC64 ? 48 : 40);
3993 Entry
.Node
= DAG
.getConstant(isPPC64
? 48 : 40, dl
,
3994 isPPC64
? MVT::i64
: MVT::i32
);
3995 Args
.push_back(Entry
);
3997 Entry
.Node
= FPtr
; Args
.push_back(Entry
);
3998 Entry
.Node
= Nest
; Args
.push_back(Entry
);
4000 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4001 TargetLowering::CallLoweringInfo
CLI(DAG
);
4002 CLI
.setDebugLoc(dl
).setChain(Chain
).setLibCallee(
4003 CallingConv::C
, Type::getVoidTy(*DAG
.getContext()),
4004 DAG
.getExternalSymbol("__trampoline_setup", PtrVT
), std::move(Args
));
4006 std::pair
<SDValue
, SDValue
> CallResult
= LowerCallTo(CLI
);
4007 return CallResult
.second
;
4010 SDValue
PPCTargetLowering::LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
4011 MachineFunction
&MF
= DAG
.getMachineFunction();
4012 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
4013 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
4017 if (Subtarget
.isPPC64() || Subtarget
.isAIXABI()) {
4018 // vastart just stores the address of the VarArgsFrameIndex slot into the
4019 // memory location argument.
4020 SDValue FR
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
4021 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
4022 return DAG
.getStore(Op
.getOperand(0), dl
, FR
, Op
.getOperand(1),
4023 MachinePointerInfo(SV
));
4026 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4027 // We suppose the given va_list is already allocated.
4030 // char gpr; /* index into the array of 8 GPRs
4031 // * stored in the register save area
4032 // * gpr=0 corresponds to r3,
4033 // * gpr=1 to r4, etc.
4035 // char fpr; /* index into the array of 8 FPRs
4036 // * stored in the register save area
4037 // * fpr=0 corresponds to f1,
4038 // * fpr=1 to f2, etc.
4040 // char *overflow_arg_area;
4041 // /* location on stack that holds
4042 // * the next overflow argument
4044 // char *reg_save_area;
4045 // /* where r3:r10 and f1:f8 (if saved)
4050 SDValue ArgGPR
= DAG
.getConstant(FuncInfo
->getVarArgsNumGPR(), dl
, MVT::i32
);
4051 SDValue ArgFPR
= DAG
.getConstant(FuncInfo
->getVarArgsNumFPR(), dl
, MVT::i32
);
4052 SDValue StackOffsetFI
= DAG
.getFrameIndex(FuncInfo
->getVarArgsStackOffset(),
4054 SDValue FR
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(),
4057 uint64_t FrameOffset
= PtrVT
.getSizeInBits()/8;
4058 SDValue ConstFrameOffset
= DAG
.getConstant(FrameOffset
, dl
, PtrVT
);
4060 uint64_t StackOffset
= PtrVT
.getSizeInBits()/8 - 1;
4061 SDValue ConstStackOffset
= DAG
.getConstant(StackOffset
, dl
, PtrVT
);
4063 uint64_t FPROffset
= 1;
4064 SDValue ConstFPROffset
= DAG
.getConstant(FPROffset
, dl
, PtrVT
);
4066 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
4068 // Store first byte : number of int regs
4069 SDValue firstStore
=
4070 DAG
.getTruncStore(Op
.getOperand(0), dl
, ArgGPR
, Op
.getOperand(1),
4071 MachinePointerInfo(SV
), MVT::i8
);
4072 uint64_t nextOffset
= FPROffset
;
4073 SDValue nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Op
.getOperand(1),
4076 // Store second byte : number of float regs
4077 SDValue secondStore
=
4078 DAG
.getTruncStore(firstStore
, dl
, ArgFPR
, nextPtr
,
4079 MachinePointerInfo(SV
, nextOffset
), MVT::i8
);
4080 nextOffset
+= StackOffset
;
4081 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstStackOffset
);
4083 // Store second word : arguments given on stack
4084 SDValue thirdStore
= DAG
.getStore(secondStore
, dl
, StackOffsetFI
, nextPtr
,
4085 MachinePointerInfo(SV
, nextOffset
));
4086 nextOffset
+= FrameOffset
;
4087 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstFrameOffset
);
4089 // Store third word : arguments given in registers
4090 return DAG
.getStore(thirdStore
, dl
, FR
, nextPtr
,
4091 MachinePointerInfo(SV
, nextOffset
));
4094 /// FPR - The set of FP registers that should be allocated for arguments
4095 /// on Darwin and AIX.
4096 static const MCPhysReg FPR
[] = {PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
,
4097 PPC::F6
, PPC::F7
, PPC::F8
, PPC::F9
, PPC::F10
,
4098 PPC::F11
, PPC::F12
, PPC::F13
};
4100 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
4102 static unsigned CalculateStackSlotSize(EVT ArgVT
, ISD::ArgFlagsTy Flags
,
4103 unsigned PtrByteSize
) {
4104 unsigned ArgSize
= ArgVT
.getStoreSize();
4105 if (Flags
.isByVal())
4106 ArgSize
= Flags
.getByValSize();
4108 // Round up to multiples of the pointer size, except for array members,
4109 // which are always packed.
4110 if (!Flags
.isInConsecutiveRegs())
4111 ArgSize
= ((ArgSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4116 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
4118 static Align
CalculateStackSlotAlignment(EVT ArgVT
, EVT OrigVT
,
4119 ISD::ArgFlagsTy Flags
,
4120 unsigned PtrByteSize
) {
4121 Align
Alignment(PtrByteSize
);
4123 // Altivec parameters are padded to a 16 byte boundary.
4124 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
4125 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
4126 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
||
4127 ArgVT
== MVT::v1i128
|| ArgVT
== MVT::f128
)
4128 Alignment
= Align(16);
4130 // ByVal parameters are aligned as requested.
4131 if (Flags
.isByVal()) {
4132 auto BVAlign
= Flags
.getNonZeroByValAlign();
4133 if (BVAlign
> PtrByteSize
) {
4134 if (BVAlign
.value() % PtrByteSize
!= 0)
4136 "ByVal alignment is not a multiple of the pointer size");
4138 Alignment
= BVAlign
;
4142 // Array members are always packed to their original alignment.
4143 if (Flags
.isInConsecutiveRegs()) {
4144 // If the array member was split into multiple registers, the first
4145 // needs to be aligned to the size of the full type. (Except for
4146 // ppcf128, which is only aligned as its f64 components.)
4147 if (Flags
.isSplit() && OrigVT
!= MVT::ppcf128
)
4148 Alignment
= Align(OrigVT
.getStoreSize());
4150 Alignment
= Align(ArgVT
.getStoreSize());
4156 /// CalculateStackSlotUsed - Return whether this argument will use its
4157 /// stack slot (instead of being passed in registers). ArgOffset,
4158 /// AvailableFPRs, and AvailableVRs must hold the current argument
4159 /// position, and will be updated to account for this argument.
4160 static bool CalculateStackSlotUsed(EVT ArgVT
, EVT OrigVT
, ISD::ArgFlagsTy Flags
,
4161 unsigned PtrByteSize
, unsigned LinkageSize
,
4162 unsigned ParamAreaSize
, unsigned &ArgOffset
,
4163 unsigned &AvailableFPRs
,
4164 unsigned &AvailableVRs
) {
4165 bool UseMemory
= false;
4167 // Respect alignment of argument on the stack.
4169 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
4170 ArgOffset
= alignTo(ArgOffset
, Alignment
);
4171 // If there's no space left in the argument save area, we must
4172 // use memory (this check also catches zero-sized arguments).
4173 if (ArgOffset
>= LinkageSize
+ ParamAreaSize
)
4176 // Allocate argument on the stack.
4177 ArgOffset
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
4178 if (Flags
.isInConsecutiveRegsLast())
4179 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4180 // If we overran the argument save area, we must use memory
4181 // (this check catches arguments passed partially in memory)
4182 if (ArgOffset
> LinkageSize
+ ParamAreaSize
)
4185 // However, if the argument is actually passed in an FPR or a VR,
4186 // we don't use memory after all.
4187 if (!Flags
.isByVal()) {
4188 if (ArgVT
== MVT::f32
|| ArgVT
== MVT::f64
)
4189 if (AvailableFPRs
> 0) {
4193 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
4194 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
4195 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
||
4196 ArgVT
== MVT::v1i128
|| ArgVT
== MVT::f128
)
4197 if (AvailableVRs
> 0) {
4206 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
4207 /// ensure minimum alignment required for target.
4208 static unsigned EnsureStackAlignment(const PPCFrameLowering
*Lowering
,
4209 unsigned NumBytes
) {
4210 return alignTo(NumBytes
, Lowering
->getStackAlign());
4213 SDValue
PPCTargetLowering::LowerFormalArguments(
4214 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
4215 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
4216 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
4217 if (Subtarget
.isAIXABI())
4218 return LowerFormalArguments_AIX(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
4220 if (Subtarget
.is64BitELFABI())
4221 return LowerFormalArguments_64SVR4(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
4223 assert(Subtarget
.is32BitELFABI());
4224 return LowerFormalArguments_32SVR4(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
4228 SDValue
PPCTargetLowering::LowerFormalArguments_32SVR4(
4229 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
4230 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
4231 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
4233 // 32-bit SVR4 ABI Stack Frame Layout:
4234 // +-----------------------------------+
4235 // +--> | Back chain |
4236 // | +-----------------------------------+
4237 // | | Floating-point register save area |
4238 // | +-----------------------------------+
4239 // | | General register save area |
4240 // | +-----------------------------------+
4241 // | | CR save word |
4242 // | +-----------------------------------+
4243 // | | VRSAVE save word |
4244 // | +-----------------------------------+
4245 // | | Alignment padding |
4246 // | +-----------------------------------+
4247 // | | Vector register save area |
4248 // | +-----------------------------------+
4249 // | | Local variable space |
4250 // | +-----------------------------------+
4251 // | | Parameter list area |
4252 // | +-----------------------------------+
4253 // | | LR save word |
4254 // | +-----------------------------------+
4255 // SP--> +--- | Back chain |
4256 // +-----------------------------------+
4259 // System V Application Binary Interface PowerPC Processor Supplement
4260 // AltiVec Technology Programming Interface Manual
4262 MachineFunction
&MF
= DAG
.getMachineFunction();
4263 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
4264 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
4266 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
4267 // Potential tail calls could cause overwriting of argument stack slots.
4268 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
4269 (CallConv
== CallingConv::Fast
));
4270 const Align
PtrAlign(4);
4272 // Assign locations to all of the incoming arguments.
4273 SmallVector
<CCValAssign
, 16> ArgLocs
;
4274 PPCCCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), ArgLocs
,
4277 // Reserve space for the linkage area on the stack.
4278 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
4279 CCInfo
.AllocateStack(LinkageSize
, PtrAlign
);
4281 CCInfo
.PreAnalyzeFormalArguments(Ins
);
4283 CCInfo
.AnalyzeFormalArguments(Ins
, CC_PPC32_SVR4
);
4284 CCInfo
.clearWasPPCF128();
4286 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
4287 CCValAssign
&VA
= ArgLocs
[i
];
4289 // Arguments stored in registers.
4290 if (VA
.isRegLoc()) {
4291 const TargetRegisterClass
*RC
;
4292 EVT ValVT
= VA
.getValVT();
4294 switch (ValVT
.getSimpleVT().SimpleTy
) {
4296 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4299 RC
= &PPC::GPRCRegClass
;
4302 if (Subtarget
.hasP8Vector())
4303 RC
= &PPC::VSSRCRegClass
;
4304 else if (Subtarget
.hasSPE())
4305 RC
= &PPC::GPRCRegClass
;
4307 RC
= &PPC::F4RCRegClass
;
4310 if (Subtarget
.hasVSX())
4311 RC
= &PPC::VSFRCRegClass
;
4312 else if (Subtarget
.hasSPE())
4313 // SPE passes doubles in GPR pairs.
4314 RC
= &PPC::GPRCRegClass
;
4316 RC
= &PPC::F8RCRegClass
;
4321 RC
= &PPC::VRRCRegClass
;
4324 RC
= &PPC::VRRCRegClass
;
4328 RC
= &PPC::VRRCRegClass
;
4333 // Transform the arguments stored in physical registers into
4335 if (VA
.getLocVT() == MVT::f64
&& Subtarget
.hasSPE()) {
4336 assert(i
+ 1 < e
&& "No second half of double precision argument");
4337 Register RegLo
= MF
.addLiveIn(VA
.getLocReg(), RC
);
4338 Register RegHi
= MF
.addLiveIn(ArgLocs
[++i
].getLocReg(), RC
);
4339 SDValue ArgValueLo
= DAG
.getCopyFromReg(Chain
, dl
, RegLo
, MVT::i32
);
4340 SDValue ArgValueHi
= DAG
.getCopyFromReg(Chain
, dl
, RegHi
, MVT::i32
);
4341 if (!Subtarget
.isLittleEndian())
4342 std::swap (ArgValueLo
, ArgValueHi
);
4343 ArgValue
= DAG
.getNode(PPCISD::BUILD_SPE64
, dl
, MVT::f64
, ArgValueLo
,
4346 Register Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
4347 ArgValue
= DAG
.getCopyFromReg(Chain
, dl
, Reg
,
4348 ValVT
== MVT::i1
? MVT::i32
: ValVT
);
4349 if (ValVT
== MVT::i1
)
4350 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, ArgValue
);
4353 InVals
.push_back(ArgValue
);
4355 // Argument stored in memory.
4356 assert(VA
.isMemLoc());
4358 // Get the extended size of the argument type in stack
4359 unsigned ArgSize
= VA
.getLocVT().getStoreSize();
4360 // Get the actual size of the argument type
4361 unsigned ObjSize
= VA
.getValVT().getStoreSize();
4362 unsigned ArgOffset
= VA
.getLocMemOffset();
4363 // Stack objects in PPC32 are right justified.
4364 ArgOffset
+= ArgSize
- ObjSize
;
4365 int FI
= MFI
.CreateFixedObject(ArgSize
, ArgOffset
, isImmutable
);
4367 // Create load nodes to retrieve arguments from the stack.
4368 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4370 DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
, MachinePointerInfo()));
4374 // Assign locations to all of the incoming aggregate by value arguments.
4375 // Aggregates passed by value are stored in the local variable space of the
4376 // caller's stack frame, right above the parameter list area.
4377 SmallVector
<CCValAssign
, 16> ByValArgLocs
;
4378 CCState
CCByValInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
4379 ByValArgLocs
, *DAG
.getContext());
4381 // Reserve stack space for the allocations in CCInfo.
4382 CCByValInfo
.AllocateStack(CCInfo
.getStackSize(), PtrAlign
);
4384 CCByValInfo
.AnalyzeFormalArguments(Ins
, CC_PPC32_SVR4_ByVal
);
4386 // Area that is at least reserved in the caller of this function.
4387 unsigned MinReservedArea
= CCByValInfo
.getStackSize();
4388 MinReservedArea
= std::max(MinReservedArea
, LinkageSize
);
4390 // Set the size that is at least reserved in caller of this function. Tail
4391 // call optimized function's reserved stack space needs to be aligned so that
4392 // taking the difference between two stack areas will result in an aligned
4395 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
4396 FuncInfo
->setMinReservedArea(MinReservedArea
);
4398 SmallVector
<SDValue
, 8> MemOps
;
4400 // If the function takes variable number of arguments, make a frame index for
4401 // the start of the first vararg value... for expansion of llvm.va_start.
4403 static const MCPhysReg GPArgRegs
[] = {
4404 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
4405 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
4407 const unsigned NumGPArgRegs
= std::size(GPArgRegs
);
4409 static const MCPhysReg FPArgRegs
[] = {
4410 PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
, PPC::F6
, PPC::F7
,
4413 unsigned NumFPArgRegs
= std::size(FPArgRegs
);
4415 if (useSoftFloat() || hasSPE())
4418 FuncInfo
->setVarArgsNumGPR(CCInfo
.getFirstUnallocated(GPArgRegs
));
4419 FuncInfo
->setVarArgsNumFPR(CCInfo
.getFirstUnallocated(FPArgRegs
));
4421 // Make room for NumGPArgRegs and NumFPArgRegs.
4422 int Depth
= NumGPArgRegs
* PtrVT
.getSizeInBits()/8 +
4423 NumFPArgRegs
* MVT(MVT::f64
).getSizeInBits()/8;
4425 FuncInfo
->setVarArgsStackOffset(MFI
.CreateFixedObject(
4426 PtrVT
.getSizeInBits() / 8, CCInfo
.getStackSize(), true));
4428 FuncInfo
->setVarArgsFrameIndex(
4429 MFI
.CreateStackObject(Depth
, Align(8), false));
4430 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
4432 // The fixed integer arguments of a variadic function are stored to the
4433 // VarArgsFrameIndex on the stack so that they may be loaded by
4434 // dereferencing the result of va_next.
4435 for (unsigned GPRIndex
= 0; GPRIndex
!= NumGPArgRegs
; ++GPRIndex
) {
4436 // Get an existing live-in vreg, or add a new one.
4437 Register VReg
= MF
.getRegInfo().getLiveInVirtReg(GPArgRegs
[GPRIndex
]);
4439 VReg
= MF
.addLiveIn(GPArgRegs
[GPRIndex
], &PPC::GPRCRegClass
);
4441 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4443 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
4444 MemOps
.push_back(Store
);
4445 // Increment the address by four for the next argument to store
4446 SDValue PtrOff
= DAG
.getConstant(PtrVT
.getSizeInBits()/8, dl
, PtrVT
);
4447 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
4450 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4452 // The double arguments are stored to the VarArgsFrameIndex
4454 for (unsigned FPRIndex
= 0; FPRIndex
!= NumFPArgRegs
; ++FPRIndex
) {
4455 // Get an existing live-in vreg, or add a new one.
4456 Register VReg
= MF
.getRegInfo().getLiveInVirtReg(FPArgRegs
[FPRIndex
]);
4458 VReg
= MF
.addLiveIn(FPArgRegs
[FPRIndex
], &PPC::F8RCRegClass
);
4460 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::f64
);
4462 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
4463 MemOps
.push_back(Store
);
4464 // Increment the address by eight for the next argument to store
4465 SDValue PtrOff
= DAG
.getConstant(MVT(MVT::f64
).getSizeInBits()/8, dl
,
4467 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
4471 if (!MemOps
.empty())
4472 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
4477 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4478 // value to MVT::i64 and then truncate to the correct register size.
4479 SDValue
PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags
,
4480 EVT ObjectVT
, SelectionDAG
&DAG
,
4482 const SDLoc
&dl
) const {
4484 ArgVal
= DAG
.getNode(ISD::AssertSext
, dl
, MVT::i64
, ArgVal
,
4485 DAG
.getValueType(ObjectVT
));
4486 else if (Flags
.isZExt())
4487 ArgVal
= DAG
.getNode(ISD::AssertZext
, dl
, MVT::i64
, ArgVal
,
4488 DAG
.getValueType(ObjectVT
));
4490 return DAG
.getNode(ISD::TRUNCATE
, dl
, ObjectVT
, ArgVal
);
4493 SDValue
PPCTargetLowering::LowerFormalArguments_64SVR4(
4494 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
4495 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
4496 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
4497 // TODO: add description of PPC stack frame format, or at least some docs.
4499 bool isELFv2ABI
= Subtarget
.isELFv2ABI();
4500 bool isLittleEndian
= Subtarget
.isLittleEndian();
4501 MachineFunction
&MF
= DAG
.getMachineFunction();
4502 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
4503 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
4505 assert(!(CallConv
== CallingConv::Fast
&& isVarArg
) &&
4506 "fastcc not supported on varargs functions");
4508 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
4509 // Potential tail calls could cause overwriting of argument stack slots.
4510 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
4511 (CallConv
== CallingConv::Fast
));
4512 unsigned PtrByteSize
= 8;
4513 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
4515 static const MCPhysReg GPR
[] = {
4516 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
4517 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
4519 static const MCPhysReg VR
[] = {
4520 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
4521 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
4524 const unsigned Num_GPR_Regs
= std::size(GPR
);
4525 const unsigned Num_FPR_Regs
= useSoftFloat() ? 0 : 13;
4526 const unsigned Num_VR_Regs
= std::size(VR
);
4528 // Do a first pass over the arguments to determine whether the ABI
4529 // guarantees that our caller has allocated the parameter save area
4530 // on its stack frame. In the ELFv1 ABI, this is always the case;
4531 // in the ELFv2 ABI, it is true if this is a vararg function or if
4532 // any parameter is located in a stack slot.
4534 bool HasParameterArea
= !isELFv2ABI
|| isVarArg
;
4535 unsigned ParamAreaSize
= Num_GPR_Regs
* PtrByteSize
;
4536 unsigned NumBytes
= LinkageSize
;
4537 unsigned AvailableFPRs
= Num_FPR_Regs
;
4538 unsigned AvailableVRs
= Num_VR_Regs
;
4539 for (unsigned i
= 0, e
= Ins
.size(); i
!= e
; ++i
) {
4540 if (Ins
[i
].Flags
.isNest())
4543 if (CalculateStackSlotUsed(Ins
[i
].VT
, Ins
[i
].ArgVT
, Ins
[i
].Flags
,
4544 PtrByteSize
, LinkageSize
, ParamAreaSize
,
4545 NumBytes
, AvailableFPRs
, AvailableVRs
))
4546 HasParameterArea
= true;
4549 // Add DAG nodes to load the arguments or copy them out of registers. On
4550 // entry to a function on PPC, the arguments start after the linkage area,
4551 // although the first ones are often in registers.
4553 unsigned ArgOffset
= LinkageSize
;
4554 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
4555 SmallVector
<SDValue
, 8> MemOps
;
4556 Function::const_arg_iterator FuncArg
= MF
.getFunction().arg_begin();
4557 unsigned CurArgIdx
= 0;
4558 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
; ++ArgNo
) {
4560 bool needsLoad
= false;
4561 EVT ObjectVT
= Ins
[ArgNo
].VT
;
4562 EVT OrigVT
= Ins
[ArgNo
].ArgVT
;
4563 unsigned ObjSize
= ObjectVT
.getStoreSize();
4564 unsigned ArgSize
= ObjSize
;
4565 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
4566 if (Ins
[ArgNo
].isOrigArg()) {
4567 std::advance(FuncArg
, Ins
[ArgNo
].getOrigArgIndex() - CurArgIdx
);
4568 CurArgIdx
= Ins
[ArgNo
].getOrigArgIndex();
4570 // We re-align the argument offset for each argument, except when using the
4571 // fast calling convention, when we need to make sure we do that only when
4572 // we'll actually use a stack slot.
4573 unsigned CurArgOffset
;
4575 auto ComputeArgOffset
= [&]() {
4576 /* Respect alignment of argument on the stack. */
4578 CalculateStackSlotAlignment(ObjectVT
, OrigVT
, Flags
, PtrByteSize
);
4579 ArgOffset
= alignTo(ArgOffset
, Alignment
);
4580 CurArgOffset
= ArgOffset
;
4583 if (CallConv
!= CallingConv::Fast
) {
4586 /* Compute GPR index associated with argument offset. */
4587 GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
4588 GPR_idx
= std::min(GPR_idx
, Num_GPR_Regs
);
4591 // FIXME the codegen can be much improved in some cases.
4592 // We do not have to keep everything in memory.
4593 if (Flags
.isByVal()) {
4594 assert(Ins
[ArgNo
].isOrigArg() && "Byval arguments cannot be implicit");
4596 if (CallConv
== CallingConv::Fast
)
4599 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4600 ObjSize
= Flags
.getByValSize();
4601 ArgSize
= ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4602 // Empty aggregate parameters do not take up registers. Examples:
4606 // etc. However, we have to provide a place-holder in InVals, so
4607 // pretend we have an 8-byte item at the current address for that
4610 int FI
= MFI
.CreateFixedObject(PtrByteSize
, ArgOffset
, true);
4611 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4612 InVals
.push_back(FIN
);
4616 // Create a stack object covering all stack doublewords occupied
4617 // by the argument. If the argument is (fully or partially) on
4618 // the stack, or if the argument is fully in registers but the
4619 // caller has allocated the parameter save anyway, we can refer
4620 // directly to the caller's stack frame. Otherwise, create a
4621 // local copy in our own frame.
4623 if (HasParameterArea
||
4624 ArgSize
+ ArgOffset
> LinkageSize
+ Num_GPR_Regs
* PtrByteSize
)
4625 FI
= MFI
.CreateFixedObject(ArgSize
, ArgOffset
, false, true);
4627 FI
= MFI
.CreateStackObject(ArgSize
, Alignment
, false);
4628 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4630 // Handle aggregates smaller than 8 bytes.
4631 if (ObjSize
< PtrByteSize
) {
4632 // The value of the object is its address, which differs from the
4633 // address of the enclosing doubleword on big-endian systems.
4635 if (!isLittleEndian
) {
4636 SDValue ArgOff
= DAG
.getConstant(PtrByteSize
- ObjSize
, dl
, PtrVT
);
4637 Arg
= DAG
.getNode(ISD::ADD
, dl
, ArgOff
.getValueType(), Arg
, ArgOff
);
4639 InVals
.push_back(Arg
);
4641 if (GPR_idx
!= Num_GPR_Regs
) {
4642 Register VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
4643 FuncInfo
->addLiveInAttr(VReg
, Flags
);
4644 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4645 EVT ObjType
= EVT::getIntegerVT(*DAG
.getContext(), ObjSize
* 8);
4647 DAG
.getTruncStore(Val
.getValue(1), dl
, Val
, Arg
,
4648 MachinePointerInfo(&*FuncArg
), ObjType
);
4649 MemOps
.push_back(Store
);
4651 // Whether we copied from a register or not, advance the offset
4652 // into the parameter save area by a full doubleword.
4653 ArgOffset
+= PtrByteSize
;
4657 // The value of the object is its address, which is the address of
4658 // its first stack doubleword.
4659 InVals
.push_back(FIN
);
4661 // Store whatever pieces of the object are in registers to memory.
4662 for (unsigned j
= 0; j
< ArgSize
; j
+= PtrByteSize
) {
4663 if (GPR_idx
== Num_GPR_Regs
)
4666 Register VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4667 FuncInfo
->addLiveInAttr(VReg
, Flags
);
4668 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4671 SDValue Off
= DAG
.getConstant(j
, dl
, PtrVT
);
4672 Addr
= DAG
.getNode(ISD::ADD
, dl
, Off
.getValueType(), Addr
, Off
);
4674 unsigned StoreSizeInBits
= std::min(PtrByteSize
, (ObjSize
- j
)) * 8;
4675 EVT ObjType
= EVT::getIntegerVT(*DAG
.getContext(), StoreSizeInBits
);
4677 DAG
.getTruncStore(Val
.getValue(1), dl
, Val
, Addr
,
4678 MachinePointerInfo(&*FuncArg
, j
), ObjType
);
4679 MemOps
.push_back(Store
);
4682 ArgOffset
+= ArgSize
;
4686 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
4687 default: llvm_unreachable("Unhandled argument type!");
4691 if (Flags
.isNest()) {
4692 // The 'nest' parameter, if any, is passed in R11.
4693 Register VReg
= MF
.addLiveIn(PPC::X11
, &PPC::G8RCRegClass
);
4694 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
4696 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
4697 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
4702 // These can be scalar arguments or elements of an integer array type
4703 // passed directly. Clang may use those instead of "byval" aggregate
4704 // types to avoid forcing arguments to memory unnecessarily.
4705 if (GPR_idx
!= Num_GPR_Regs
) {
4706 Register VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
4707 FuncInfo
->addLiveInAttr(VReg
, Flags
);
4708 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
4710 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
4711 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4712 // value to MVT::i64 and then truncate to the correct register size.
4713 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
4715 if (CallConv
== CallingConv::Fast
)
4719 ArgSize
= PtrByteSize
;
4721 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
4727 // These can be scalar arguments or elements of a float array type
4728 // passed directly. The latter are used to implement ELFv2 homogenous
4729 // float aggregates.
4730 if (FPR_idx
!= Num_FPR_Regs
) {
4733 if (ObjectVT
== MVT::f32
)
4734 VReg
= MF
.addLiveIn(FPR
[FPR_idx
],
4735 Subtarget
.hasP8Vector()
4736 ? &PPC::VSSRCRegClass
4737 : &PPC::F4RCRegClass
);
4739 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], Subtarget
.hasVSX()
4740 ? &PPC::VSFRCRegClass
4741 : &PPC::F8RCRegClass
);
4743 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4745 } else if (GPR_idx
!= Num_GPR_Regs
&& CallConv
!= CallingConv::Fast
) {
4746 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4747 // once we support fp <-> gpr moves.
4749 // This can only ever happen in the presence of f32 array types,
4750 // since otherwise we never run out of FPRs before running out
4752 Register VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
4753 FuncInfo
->addLiveInAttr(VReg
, Flags
);
4754 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
4756 if (ObjectVT
== MVT::f32
) {
4757 if ((ArgOffset
% PtrByteSize
) == (isLittleEndian
? 4 : 0))
4758 ArgVal
= DAG
.getNode(ISD::SRL
, dl
, MVT::i64
, ArgVal
,
4759 DAG
.getConstant(32, dl
, MVT::i32
));
4760 ArgVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, ArgVal
);
4763 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, ObjectVT
, ArgVal
);
4765 if (CallConv
== CallingConv::Fast
)
4771 // When passing an array of floats, the array occupies consecutive
4772 // space in the argument area; only round up to the next doubleword
4773 // at the end of the array. Otherwise, each float takes 8 bytes.
4774 if (CallConv
!= CallingConv::Fast
|| needsLoad
) {
4775 ArgSize
= Flags
.isInConsecutiveRegs() ? ObjSize
: PtrByteSize
;
4776 ArgOffset
+= ArgSize
;
4777 if (Flags
.isInConsecutiveRegsLast())
4778 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4789 // These can be scalar arguments or elements of a vector array type
4790 // passed directly. The latter are used to implement ELFv2 homogenous
4791 // vector aggregates.
4792 if (VR_idx
!= Num_VR_Regs
) {
4793 Register VReg
= MF
.addLiveIn(VR
[VR_idx
], &PPC::VRRCRegClass
);
4794 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4797 if (CallConv
== CallingConv::Fast
)
4801 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
4806 // We need to load the argument to a virtual register if we determined
4807 // above that we ran out of physical registers of the appropriate type.
4809 if (ObjSize
< ArgSize
&& !isLittleEndian
)
4810 CurArgOffset
+= ArgSize
- ObjSize
;
4811 int FI
= MFI
.CreateFixedObject(ObjSize
, CurArgOffset
, isImmutable
);
4812 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4813 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Chain
, FIN
, MachinePointerInfo());
4816 InVals
.push_back(ArgVal
);
4819 // Area that is at least reserved in the caller of this function.
4820 unsigned MinReservedArea
;
4821 if (HasParameterArea
)
4822 MinReservedArea
= std::max(ArgOffset
, LinkageSize
+ 8 * PtrByteSize
);
4824 MinReservedArea
= LinkageSize
;
4826 // Set the size that is at least reserved in caller of this function. Tail
4827 // call optimized functions' reserved stack space needs to be aligned so that
4828 // taking the difference between two stack areas will result in an aligned
4831 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
4832 FuncInfo
->setMinReservedArea(MinReservedArea
);
4834 // If the function takes variable number of arguments, make a frame index for
4835 // the start of the first vararg value... for expansion of llvm.va_start.
4836 // On ELFv2ABI spec, it writes:
4837 // C programs that are intended to be *portable* across different compilers
4838 // and architectures must use the header file <stdarg.h> to deal with variable
4840 if (isVarArg
&& MFI
.hasVAStart()) {
4841 int Depth
= ArgOffset
;
4843 FuncInfo
->setVarArgsFrameIndex(
4844 MFI
.CreateFixedObject(PtrByteSize
, Depth
, true));
4845 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
4847 // If this function is vararg, store any remaining integer argument regs
4848 // to their spots on the stack so that they may be loaded by dereferencing
4849 // the result of va_next.
4850 for (GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
4851 GPR_idx
< Num_GPR_Regs
; ++GPR_idx
) {
4852 Register VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4853 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4855 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
4856 MemOps
.push_back(Store
);
4857 // Increment the address by four for the next argument to store
4858 SDValue PtrOff
= DAG
.getConstant(PtrByteSize
, dl
, PtrVT
);
4859 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
4863 if (!MemOps
.empty())
4864 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
4869 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4870 /// adjusted to accommodate the arguments for the tailcall.
4871 static int CalculateTailCallSPDiff(SelectionDAG
& DAG
, bool isTailCall
,
4872 unsigned ParamSize
) {
4874 if (!isTailCall
) return 0;
4876 PPCFunctionInfo
*FI
= DAG
.getMachineFunction().getInfo
<PPCFunctionInfo
>();
4877 unsigned CallerMinReservedArea
= FI
->getMinReservedArea();
4878 int SPDiff
= (int)CallerMinReservedArea
- (int)ParamSize
;
4879 // Remember only if the new adjustment is bigger.
4880 if (SPDiff
< FI
->getTailCallSPDelta())
4881 FI
->setTailCallSPDelta(SPDiff
);
4886 static bool isFunctionGlobalAddress(const GlobalValue
*CalleeGV
);
4888 static bool callsShareTOCBase(const Function
*Caller
,
4889 const GlobalValue
*CalleeGV
,
4890 const TargetMachine
&TM
) {
4891 // It does not make sense to call callsShareTOCBase() with a caller that
4892 // is PC Relative since PC Relative callers do not have a TOC.
4894 const PPCSubtarget
*STICaller
= &TM
.getSubtarget
<PPCSubtarget
>(*Caller
);
4895 assert(!STICaller
->isUsingPCRelativeCalls() &&
4896 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4899 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4900 // don't have enough information to determine if the caller and callee share
4901 // the same TOC base, so we have to pessimistically assume they don't for
4906 // If the callee is preemptable, then the static linker will use a plt-stub
4907 // which saves the toc to the stack, and needs a nop after the call
4908 // instruction to convert to a toc-restore.
4909 if (!TM
.shouldAssumeDSOLocal(CalleeGV
))
4912 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4913 // We may need a TOC restore in the situation where the caller requires a
4914 // valid TOC but the callee is PC Relative and does not.
4915 const Function
*F
= dyn_cast
<Function
>(CalleeGV
);
4916 const GlobalAlias
*Alias
= dyn_cast
<GlobalAlias
>(CalleeGV
);
4918 // If we have an Alias we can try to get the function from there.
4920 const GlobalObject
*GlobalObj
= Alias
->getAliaseeObject();
4921 F
= dyn_cast
<Function
>(GlobalObj
);
4924 // If we still have no valid function pointer we do not have enough
4925 // information to determine if the callee uses PC Relative calls so we must
4926 // assume that it does.
4930 // If the callee uses PC Relative we cannot guarantee that the callee won't
4931 // clobber the TOC of the caller and so we must assume that the two
4932 // functions do not share a TOC base.
4933 const PPCSubtarget
*STICallee
= &TM
.getSubtarget
<PPCSubtarget
>(*F
);
4934 if (STICallee
->isUsingPCRelativeCalls())
4937 // If the GV is not a strong definition then we need to assume it can be
4938 // replaced by another function at link time. The function that replaces
4939 // it may not share the same TOC as the caller since the callee may be
4940 // replaced by a PC Relative version of the same function.
4941 if (!CalleeGV
->isStrongDefinitionForLinker())
4944 // The medium and large code models are expected to provide a sufficiently
4945 // large TOC to provide all data addressing needs of a module with a
4947 if (CodeModel::Medium
== TM
.getCodeModel() ||
4948 CodeModel::Large
== TM
.getCodeModel())
4951 // Any explicitly-specified sections and section prefixes must also match.
4952 // Also, if we're using -ffunction-sections, then each function is always in
4953 // a different section (the same is true for COMDAT functions).
4954 if (TM
.getFunctionSections() || CalleeGV
->hasComdat() ||
4955 Caller
->hasComdat() || CalleeGV
->getSection() != Caller
->getSection())
4957 if (const auto *F
= dyn_cast
<Function
>(CalleeGV
)) {
4958 if (F
->getSectionPrefix() != Caller
->getSectionPrefix())
4966 needStackSlotPassParameters(const PPCSubtarget
&Subtarget
,
4967 const SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
4968 assert(Subtarget
.is64BitELFABI());
4970 const unsigned PtrByteSize
= 8;
4971 const unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
4973 static const MCPhysReg GPR
[] = {
4974 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
4975 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
4977 static const MCPhysReg VR
[] = {
4978 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
4979 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
4982 const unsigned NumGPRs
= std::size(GPR
);
4983 const unsigned NumFPRs
= 13;
4984 const unsigned NumVRs
= std::size(VR
);
4985 const unsigned ParamAreaSize
= NumGPRs
* PtrByteSize
;
4987 unsigned NumBytes
= LinkageSize
;
4988 unsigned AvailableFPRs
= NumFPRs
;
4989 unsigned AvailableVRs
= NumVRs
;
4991 for (const ISD::OutputArg
& Param
: Outs
) {
4992 if (Param
.Flags
.isNest()) continue;
4994 if (CalculateStackSlotUsed(Param
.VT
, Param
.ArgVT
, Param
.Flags
, PtrByteSize
,
4995 LinkageSize
, ParamAreaSize
, NumBytes
,
4996 AvailableFPRs
, AvailableVRs
))
5002 static bool hasSameArgumentList(const Function
*CallerFn
, const CallBase
&CB
) {
5003 if (CB
.arg_size() != CallerFn
->arg_size())
5006 auto CalleeArgIter
= CB
.arg_begin();
5007 auto CalleeArgEnd
= CB
.arg_end();
5008 Function::const_arg_iterator CallerArgIter
= CallerFn
->arg_begin();
5010 for (; CalleeArgIter
!= CalleeArgEnd
; ++CalleeArgIter
, ++CallerArgIter
) {
5011 const Value
* CalleeArg
= *CalleeArgIter
;
5012 const Value
* CallerArg
= &(*CallerArgIter
);
5013 if (CalleeArg
== CallerArg
)
5016 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5017 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5019 // 1st argument of callee is undef and has the same type as caller.
5020 if (CalleeArg
->getType() == CallerArg
->getType() &&
5021 isa
<UndefValue
>(CalleeArg
))
5030 // Returns true if TCO is possible between the callers and callees
5031 // calling conventions.
5033 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC
,
5034 CallingConv::ID CalleeCC
) {
5035 // Tail calls are possible with fastcc and ccc.
5036 auto isTailCallableCC
= [] (CallingConv::ID CC
){
5037 return CC
== CallingConv::C
|| CC
== CallingConv::Fast
;
5039 if (!isTailCallableCC(CallerCC
) || !isTailCallableCC(CalleeCC
))
5042 // We can safely tail call both fastcc and ccc callees from a c calling
5043 // convention caller. If the caller is fastcc, we may have less stack space
5044 // than a non-fastcc caller with the same signature so disable tail-calls in
5046 return CallerCC
== CallingConv::C
|| CallerCC
== CalleeCC
;
5049 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5050 const GlobalValue
*CalleeGV
, CallingConv::ID CalleeCC
,
5051 CallingConv::ID CallerCC
, const CallBase
*CB
, bool isVarArg
,
5052 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5053 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const Function
*CallerFunc
,
5054 bool isCalleeExternalSymbol
) const {
5055 bool TailCallOpt
= getTargetMachine().Options
.GuaranteedTailCallOpt
;
5057 if (DisableSCO
&& !TailCallOpt
) return false;
5059 // Variadic argument functions are not supported.
5060 if (isVarArg
) return false;
5062 // Check that the calling conventions are compatible for tco.
5063 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC
, CalleeCC
))
5066 // Caller contains any byval parameter is not supported.
5067 if (any_of(Ins
, [](const ISD::InputArg
&IA
) { return IA
.Flags
.isByVal(); }))
5070 // Callee contains any byval parameter is not supported, too.
5071 // Note: This is a quick work around, because in some cases, e.g.
5072 // caller's stack size > callee's stack size, we are still able to apply
5073 // sibling call optimization. For example, gcc is able to do SCO for caller1
5074 // in the following example, but not for caller2.
5079 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5083 // void caller1(struct test a, struct test c, struct test *b) {
5084 // callee(gTest, b); }
5085 // void caller2(struct test *b) { callee(gTest, b); }
5086 if (any_of(Outs
, [](const ISD::OutputArg
& OA
) { return OA
.Flags
.isByVal(); }))
5089 // If callee and caller use different calling conventions, we cannot pass
5090 // parameters on stack since offsets for the parameter area may be different.
5091 if (CallerCC
!= CalleeCC
&& needStackSlotPassParameters(Subtarget
, Outs
))
5094 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5095 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5096 // callee potentially have different TOC bases then we cannot tail call since
5097 // we need to restore the TOC pointer after the call.
5098 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5099 // We cannot guarantee this for indirect calls or calls to external functions.
5100 // When PC-Relative addressing is used, the concept of the TOC is no longer
5101 // applicable so this check is not required.
5102 // Check first for indirect calls.
5103 if (!Subtarget
.isUsingPCRelativeCalls() &&
5104 !isFunctionGlobalAddress(CalleeGV
) && !isCalleeExternalSymbol
)
5107 // Check if we share the TOC base.
5108 if (!Subtarget
.isUsingPCRelativeCalls() &&
5109 !callsShareTOCBase(CallerFunc
, CalleeGV
, getTargetMachine()))
5112 // TCO allows altering callee ABI, so we don't have to check further.
5113 if (CalleeCC
== CallingConv::Fast
&& TailCallOpt
)
5116 if (DisableSCO
) return false;
5118 // If callee use the same argument list that caller is using, then we can
5119 // apply SCO on this case. If it is not, then we need to check if callee needs
5120 // stack for passing arguments.
5121 // PC Relative tail calls may not have a CallBase.
5122 // If there is no CallBase we cannot verify if we have the same argument
5123 // list so assume that we don't have the same argument list.
5124 if (CB
&& !hasSameArgumentList(CallerFunc
, *CB
) &&
5125 needStackSlotPassParameters(Subtarget
, Outs
))
5127 else if (!CB
&& needStackSlotPassParameters(Subtarget
, Outs
))
5133 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
5134 /// for tail call optimization. Targets which want to do tail call
5135 /// optimization should implement this function.
5136 bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5137 const GlobalValue
*CalleeGV
, CallingConv::ID CalleeCC
,
5138 CallingConv::ID CallerCC
, bool isVarArg
,
5139 const SmallVectorImpl
<ISD::InputArg
> &Ins
) const {
5140 if (!getTargetMachine().Options
.GuaranteedTailCallOpt
)
5143 // Variable argument functions are not supported.
5147 if (CalleeCC
== CallingConv::Fast
&& CallerCC
== CalleeCC
) {
5148 // Functions containing by val parameters are not supported.
5149 if (any_of(Ins
, [](const ISD::InputArg
&IA
) { return IA
.Flags
.isByVal(); }))
5152 // Non-PIC/GOT tail calls are supported.
5153 if (getTargetMachine().getRelocationModel() != Reloc::PIC_
)
5156 // At the moment we can only do local tail calls (in same module, hidden
5157 // or protected) if we are generating PIC.
5159 return CalleeGV
->hasHiddenVisibility() ||
5160 CalleeGV
->hasProtectedVisibility();
5166 /// isCallCompatibleAddress - Return the immediate to use if the specified
5167 /// 32-bit value is representable in the immediate field of a BxA instruction.
5168 static SDNode
*isBLACompatibleAddress(SDValue Op
, SelectionDAG
&DAG
) {
5169 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
5170 if (!C
) return nullptr;
5172 int Addr
= C
->getZExtValue();
5173 if ((Addr
& 3) != 0 || // Low 2 bits are implicitly zero.
5174 SignExtend32
<26>(Addr
) != Addr
)
5175 return nullptr; // Top 6 bits have to be sext of immediate.
5179 (int)C
->getZExtValue() >> 2, SDLoc(Op
),
5180 DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout()))
5186 struct TailCallArgumentInfo
{
5191 TailCallArgumentInfo() = default;
5194 } // end anonymous namespace
5196 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5197 static void StoreTailCallArgumentsToStackSlot(
5198 SelectionDAG
&DAG
, SDValue Chain
,
5199 const SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArgs
,
5200 SmallVectorImpl
<SDValue
> &MemOpChains
, const SDLoc
&dl
) {
5201 for (unsigned i
= 0, e
= TailCallArgs
.size(); i
!= e
; ++i
) {
5202 SDValue Arg
= TailCallArgs
[i
].Arg
;
5203 SDValue FIN
= TailCallArgs
[i
].FrameIdxOp
;
5204 int FI
= TailCallArgs
[i
].FrameIdx
;
5205 // Store relative to framepointer.
5206 MemOpChains
.push_back(DAG
.getStore(
5207 Chain
, dl
, Arg
, FIN
,
5208 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
)));
5212 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5213 /// the appropriate stack slot for the tail call optimized function call.
5214 static SDValue
EmitTailCallStoreFPAndRetAddr(SelectionDAG
&DAG
, SDValue Chain
,
5215 SDValue OldRetAddr
, SDValue OldFP
,
5216 int SPDiff
, const SDLoc
&dl
) {
5218 // Calculate the new stack slot for the return address.
5219 MachineFunction
&MF
= DAG
.getMachineFunction();
5220 const PPCSubtarget
&Subtarget
= MF
.getSubtarget
<PPCSubtarget
>();
5221 const PPCFrameLowering
*FL
= Subtarget
.getFrameLowering();
5222 bool isPPC64
= Subtarget
.isPPC64();
5223 int SlotSize
= isPPC64
? 8 : 4;
5224 int NewRetAddrLoc
= SPDiff
+ FL
->getReturnSaveOffset();
5225 int NewRetAddr
= MF
.getFrameInfo().CreateFixedObject(SlotSize
,
5226 NewRetAddrLoc
, true);
5227 EVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
5228 SDValue NewRetAddrFrIdx
= DAG
.getFrameIndex(NewRetAddr
, VT
);
5229 Chain
= DAG
.getStore(Chain
, dl
, OldRetAddr
, NewRetAddrFrIdx
,
5230 MachinePointerInfo::getFixedStack(MF
, NewRetAddr
));
5235 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5236 /// the position of the argument.
5238 CalculateTailCallArgDest(SelectionDAG
&DAG
, MachineFunction
&MF
, bool isPPC64
,
5239 SDValue Arg
, int SPDiff
, unsigned ArgOffset
,
5240 SmallVectorImpl
<TailCallArgumentInfo
>& TailCallArguments
) {
5241 int Offset
= ArgOffset
+ SPDiff
;
5242 uint32_t OpSize
= (Arg
.getValueSizeInBits() + 7) / 8;
5243 int FI
= MF
.getFrameInfo().CreateFixedObject(OpSize
, Offset
, true);
5244 EVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
5245 SDValue FIN
= DAG
.getFrameIndex(FI
, VT
);
5246 TailCallArgumentInfo Info
;
5248 Info
.FrameIdxOp
= FIN
;
5250 TailCallArguments
.push_back(Info
);
5253 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5254 /// stack slot. Returns the chain as result and the loaded frame pointers in
5255 /// LROpOut/FPOpout. Used when tail calling.
5256 SDValue
PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5257 SelectionDAG
&DAG
, int SPDiff
, SDValue Chain
, SDValue
&LROpOut
,
5258 SDValue
&FPOpOut
, const SDLoc
&dl
) const {
5260 // Load the LR and FP stack slot for later adjusting.
5261 EVT VT
= Subtarget
.isPPC64() ? MVT::i64
: MVT::i32
;
5262 LROpOut
= getReturnAddrFrameIndex(DAG
);
5263 LROpOut
= DAG
.getLoad(VT
, dl
, Chain
, LROpOut
, MachinePointerInfo());
5264 Chain
= SDValue(LROpOut
.getNode(), 1);
5269 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5270 /// by "Src" to address "Dst" of size "Size". Alignment information is
5271 /// specified by the specific parameter attribute. The copy will be passed as
5272 /// a byval function parameter.
5273 /// Sometimes what we are copying is the end of a larger object, the part that
5274 /// does not fit in registers.
5275 static SDValue
CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
,
5276 SDValue Chain
, ISD::ArgFlagsTy Flags
,
5277 SelectionDAG
&DAG
, const SDLoc
&dl
) {
5278 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), dl
, MVT::i32
);
5279 return DAG
.getMemcpy(
5280 Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getNonZeroByValAlign(), false, false,
5281 /*CI=*/nullptr, std::nullopt
, MachinePointerInfo(), MachinePointerInfo());
5284 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5286 static void LowerMemOpCallTo(
5287 SelectionDAG
&DAG
, MachineFunction
&MF
, SDValue Chain
, SDValue Arg
,
5288 SDValue PtrOff
, int SPDiff
, unsigned ArgOffset
, bool isPPC64
,
5289 bool isTailCall
, bool isVector
, SmallVectorImpl
<SDValue
> &MemOpChains
,
5290 SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArguments
, const SDLoc
&dl
) {
5291 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout());
5296 StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
5298 StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
5299 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
,
5300 DAG
.getConstant(ArgOffset
, dl
, PtrVT
));
5302 MemOpChains
.push_back(
5303 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo()));
5304 // Calculate and remember argument location.
5305 } else CalculateTailCallArgDest(DAG
, MF
, isPPC64
, Arg
, SPDiff
, ArgOffset
,
5310 PrepareTailCall(SelectionDAG
&DAG
, SDValue
&InGlue
, SDValue
&Chain
,
5311 const SDLoc
&dl
, int SPDiff
, unsigned NumBytes
, SDValue LROp
,
5313 SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArguments
) {
5314 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5315 // might overwrite each other in case of tail call optimization.
5316 SmallVector
<SDValue
, 8> MemOpChains2
;
5317 // Do not flag preceding copytoreg stuff together with the following stuff.
5319 StoreTailCallArgumentsToStackSlot(DAG
, Chain
, TailCallArguments
,
5321 if (!MemOpChains2
.empty())
5322 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains2
);
5324 // Store the return address to the appropriate stack slot.
5325 Chain
= EmitTailCallStoreFPAndRetAddr(DAG
, Chain
, LROp
, FPOp
, SPDiff
, dl
);
5327 // Emit callseq_end just before tailcall node.
5328 Chain
= DAG
.getCALLSEQ_END(Chain
, NumBytes
, 0, InGlue
, dl
);
5329 InGlue
= Chain
.getValue(1);
5332 // Is this global address that of a function that can be called by name? (as
5333 // opposed to something that must hold a descriptor for an indirect call).
5334 static bool isFunctionGlobalAddress(const GlobalValue
*GV
) {
5336 if (GV
->isThreadLocal())
5339 return GV
->getValueType()->isFunctionTy();
5345 SDValue
PPCTargetLowering::LowerCallResult(
5346 SDValue Chain
, SDValue InGlue
, CallingConv::ID CallConv
, bool isVarArg
,
5347 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5348 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
5349 SmallVector
<CCValAssign
, 16> RVLocs
;
5350 CCState
CCRetInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), RVLocs
,
5353 CCRetInfo
.AnalyzeCallResult(
5354 Ins
, (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
5358 // Copy all of the result registers out of their specified physreg.
5359 for (unsigned i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
5360 CCValAssign
&VA
= RVLocs
[i
];
5361 assert(VA
.isRegLoc() && "Can only return in registers!");
5365 if (Subtarget
.hasSPE() && VA
.getLocVT() == MVT::f64
) {
5366 SDValue Lo
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
5368 Chain
= Lo
.getValue(1);
5369 InGlue
= Lo
.getValue(2);
5370 VA
= RVLocs
[++i
]; // skip ahead to next loc
5371 SDValue Hi
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
5373 Chain
= Hi
.getValue(1);
5374 InGlue
= Hi
.getValue(2);
5375 if (!Subtarget
.isLittleEndian())
5377 Val
= DAG
.getNode(PPCISD::BUILD_SPE64
, dl
, MVT::f64
, Lo
, Hi
);
5379 Val
= DAG
.getCopyFromReg(Chain
, dl
,
5380 VA
.getLocReg(), VA
.getLocVT(), InGlue
);
5381 Chain
= Val
.getValue(1);
5382 InGlue
= Val
.getValue(2);
5385 switch (VA
.getLocInfo()) {
5386 default: llvm_unreachable("Unknown loc info!");
5387 case CCValAssign::Full
: break;
5388 case CCValAssign::AExt
:
5389 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5391 case CCValAssign::ZExt
:
5392 Val
= DAG
.getNode(ISD::AssertZext
, dl
, VA
.getLocVT(), Val
,
5393 DAG
.getValueType(VA
.getValVT()));
5394 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5396 case CCValAssign::SExt
:
5397 Val
= DAG
.getNode(ISD::AssertSext
, dl
, VA
.getLocVT(), Val
,
5398 DAG
.getValueType(VA
.getValVT()));
5399 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5403 InVals
.push_back(Val
);
5409 static bool isIndirectCall(const SDValue
&Callee
, SelectionDAG
&DAG
,
5410 const PPCSubtarget
&Subtarget
, bool isPatchPoint
) {
5411 auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
);
5412 const GlobalValue
*GV
= G
? G
->getGlobal() : nullptr;
5414 // PatchPoint calls are not indirect.
5418 if (isFunctionGlobalAddress(GV
) || isa
<ExternalSymbolSDNode
>(Callee
))
5421 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5422 // becuase the immediate function pointer points to a descriptor instead of
5423 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5424 // pointer immediate points to the global entry point, while the BLA would
5425 // need to jump to the local entry point (see rL211174).
5426 if (!Subtarget
.usesFunctionDescriptors() && !Subtarget
.isELFv2ABI() &&
5427 isBLACompatibleAddress(Callee
, DAG
))
5433 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5434 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget
&Subtarget
) {
5435 return Subtarget
.isAIXABI() ||
5436 (Subtarget
.is64BitELFABI() && !Subtarget
.isUsingPCRelativeCalls());
5439 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags
,
5440 const Function
&Caller
, const SDValue
&Callee
,
5441 const PPCSubtarget
&Subtarget
,
5442 const TargetMachine
&TM
,
5443 bool IsStrictFPCall
= false) {
5444 if (CFlags
.IsTailCall
)
5445 return PPCISD::TC_RETURN
;
5447 unsigned RetOpc
= 0;
5448 // This is a call through a function pointer.
5449 if (CFlags
.IsIndirect
) {
5450 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5451 // indirect calls. The save of the caller's TOC pointer to the stack will be
5452 // inserted into the DAG as part of call lowering. The restore of the TOC
5453 // pointer is modeled by using a pseudo instruction for the call opcode that
5454 // represents the 2 instruction sequence of an indirect branch and link,
5455 // immediately followed by a load of the TOC pointer from the stack save
5456 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5457 // as it is not saved or used.
5458 RetOpc
= isTOCSaveRestoreRequired(Subtarget
) ? PPCISD::BCTRL_LOAD_TOC
5460 } else if (Subtarget
.isUsingPCRelativeCalls()) {
5461 assert(Subtarget
.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5462 RetOpc
= PPCISD::CALL_NOTOC
;
5463 } else if (Subtarget
.isAIXABI() || Subtarget
.is64BitELFABI()) {
5464 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5465 // immediately following the call instruction if the caller and callee may
5466 // have different TOC bases. At link time if the linker determines the calls
5467 // may not share a TOC base, the call is redirected to a trampoline inserted
5468 // by the linker. The trampoline will (among other things) save the callers
5469 // TOC pointer at an ABI designated offset in the linkage area and the
5470 // linker will rewrite the nop to be a load of the TOC pointer from the
5471 // linkage area into gpr2.
5472 auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
);
5473 const GlobalValue
*GV
= G
? G
->getGlobal() : nullptr;
5475 callsShareTOCBase(&Caller
, GV
, TM
) ? PPCISD::CALL
: PPCISD::CALL_NOP
;
5477 RetOpc
= PPCISD::CALL
;
5478 if (IsStrictFPCall
) {
5481 llvm_unreachable("Unknown call opcode");
5482 case PPCISD::BCTRL_LOAD_TOC
:
5483 RetOpc
= PPCISD::BCTRL_LOAD_TOC_RM
;
5486 RetOpc
= PPCISD::BCTRL_RM
;
5488 case PPCISD::CALL_NOTOC
:
5489 RetOpc
= PPCISD::CALL_NOTOC_RM
;
5492 RetOpc
= PPCISD::CALL_RM
;
5494 case PPCISD::CALL_NOP
:
5495 RetOpc
= PPCISD::CALL_NOP_RM
;
5502 static SDValue
transformCallee(const SDValue
&Callee
, SelectionDAG
&DAG
,
5503 const SDLoc
&dl
, const PPCSubtarget
&Subtarget
) {
5504 if (!Subtarget
.usesFunctionDescriptors() && !Subtarget
.isELFv2ABI())
5505 if (SDNode
*Dest
= isBLACompatibleAddress(Callee
, DAG
))
5506 return SDValue(Dest
, 0);
5508 // Returns true if the callee is local, and false otherwise.
5509 auto isLocalCallee
= [&]() {
5510 const GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
);
5511 const GlobalValue
*GV
= G
? G
->getGlobal() : nullptr;
5513 return DAG
.getTarget().shouldAssumeDSOLocal(GV
) &&
5514 !isa_and_nonnull
<GlobalIFunc
>(GV
);
5517 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5518 // a static relocation model causes some versions of GNU LD (2.17.50, at
5519 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5520 // built with secure-PLT.
5522 Subtarget
.is32BitELFABI() && !isLocalCallee() &&
5523 Subtarget
.getTargetMachine().getRelocationModel() == Reloc::PIC_
;
5525 const auto getAIXFuncEntryPointSymbolSDNode
= [&](const GlobalValue
*GV
) {
5526 const TargetMachine
&TM
= Subtarget
.getTargetMachine();
5527 const TargetLoweringObjectFile
*TLOF
= TM
.getObjFileLowering();
5529 cast
<MCSymbolXCOFF
>(TLOF
->getFunctionEntryPointSymbol(GV
, TM
));
5531 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout());
5532 return DAG
.getMCSymbol(S
, PtrVT
);
5535 auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
);
5536 const GlobalValue
*GV
= G
? G
->getGlobal() : nullptr;
5537 if (isFunctionGlobalAddress(GV
)) {
5538 const GlobalValue
*GV
= cast
<GlobalAddressSDNode
>(Callee
)->getGlobal();
5540 if (Subtarget
.isAIXABI()) {
5541 assert(!isa
<GlobalIFunc
>(GV
) && "IFunc is not supported on AIX.");
5542 return getAIXFuncEntryPointSymbolSDNode(GV
);
5544 return DAG
.getTargetGlobalAddress(GV
, dl
, Callee
.getValueType(), 0,
5545 UsePlt
? PPCII::MO_PLT
: 0);
5548 if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
5549 const char *SymName
= S
->getSymbol();
5550 if (Subtarget
.isAIXABI()) {
5551 // If there exists a user-declared function whose name is the same as the
5552 // ExternalSymbol's, then we pick up the user-declared version.
5553 const Module
*Mod
= DAG
.getMachineFunction().getFunction().getParent();
5554 if (const Function
*F
=
5555 dyn_cast_or_null
<Function
>(Mod
->getNamedValue(SymName
)))
5556 return getAIXFuncEntryPointSymbolSDNode(F
);
5558 // On AIX, direct function calls reference the symbol for the function's
5559 // entry point, which is named by prepending a "." before the function's
5560 // C-linkage name. A Qualname is returned here because an external
5561 // function entry point is a csect with XTY_ER property.
5562 const auto getExternalFunctionEntryPointSymbol
= [&](StringRef SymName
) {
5563 auto &Context
= DAG
.getMachineFunction().getContext();
5564 MCSectionXCOFF
*Sec
= Context
.getXCOFFSection(
5565 (Twine(".") + Twine(SymName
)).str(), SectionKind::getMetadata(),
5566 XCOFF::CsectProperties(XCOFF::XMC_PR
, XCOFF::XTY_ER
));
5567 return Sec
->getQualNameSymbol();
5570 SymName
= getExternalFunctionEntryPointSymbol(SymName
)->getName().data();
5572 return DAG
.getTargetExternalSymbol(SymName
, Callee
.getValueType(),
5573 UsePlt
? PPCII::MO_PLT
: 0);
5576 // No transformation needed.
5577 assert(Callee
.getNode() && "What no callee?");
5581 static SDValue
getOutputChainFromCallSeq(SDValue CallSeqStart
) {
5582 assert(CallSeqStart
.getOpcode() == ISD::CALLSEQ_START
&&
5583 "Expected a CALLSEQ_STARTSDNode.");
5585 // The last operand is the chain, except when the node has glue. If the node
5586 // has glue, then the last operand is the glue, and the chain is the second
5588 SDValue LastValue
= CallSeqStart
.getValue(CallSeqStart
->getNumValues() - 1);
5589 if (LastValue
.getValueType() != MVT::Glue
)
5592 return CallSeqStart
.getValue(CallSeqStart
->getNumValues() - 2);
5595 // Creates the node that moves a functions address into the count register
5596 // to prepare for an indirect call instruction.
5597 static void prepareIndirectCall(SelectionDAG
&DAG
, SDValue
&Callee
,
5598 SDValue
&Glue
, SDValue
&Chain
,
5600 SDValue MTCTROps
[] = {Chain
, Callee
, Glue
};
5601 EVT ReturnTypes
[] = {MVT::Other
, MVT::Glue
};
5602 Chain
= DAG
.getNode(PPCISD::MTCTR
, dl
, ReturnTypes
,
5603 ArrayRef(MTCTROps
, Glue
.getNode() ? 3 : 2));
5604 // The glue is the second value produced.
5605 Glue
= Chain
.getValue(1);
5608 static void prepareDescriptorIndirectCall(SelectionDAG
&DAG
, SDValue
&Callee
,
5609 SDValue
&Glue
, SDValue
&Chain
,
5610 SDValue CallSeqStart
,
5611 const CallBase
*CB
, const SDLoc
&dl
,
5613 const PPCSubtarget
&Subtarget
) {
5614 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5615 // entry point, but to the function descriptor (the function entry point
5616 // address is part of the function descriptor though).
5617 // The function descriptor is a three doubleword structure with the
5618 // following fields: function entry point, TOC base address and
5619 // environment pointer.
5620 // Thus for a call through a function pointer, the following actions need
5622 // 1. Save the TOC of the caller in the TOC save area of its stack
5623 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5624 // 2. Load the address of the function entry point from the function
5626 // 3. Load the TOC of the callee from the function descriptor into r2.
5627 // 4. Load the environment pointer from the function descriptor into
5629 // 5. Branch to the function entry point address.
5630 // 6. On return of the callee, the TOC of the caller needs to be
5631 // restored (this is done in FinishCall()).
5633 // The loads are scheduled at the beginning of the call sequence, and the
5634 // register copies are flagged together to ensure that no other
5635 // operations can be scheduled in between. E.g. without flagging the
5636 // copies together, a TOC access in the caller could be scheduled between
5637 // the assignment of the callee TOC and the branch to the callee, which leads
5638 // to incorrect code.
5640 // Start by loading the function address from the descriptor.
5641 SDValue LDChain
= getOutputChainFromCallSeq(CallSeqStart
);
5642 auto MMOFlags
= Subtarget
.hasInvariantFunctionDescriptors()
5643 ? (MachineMemOperand::MODereferenceable
|
5644 MachineMemOperand::MOInvariant
)
5645 : MachineMemOperand::MONone
;
5647 MachinePointerInfo
MPI(CB
? CB
->getCalledOperand() : nullptr);
5649 // Registers used in building the DAG.
5650 const MCRegister EnvPtrReg
= Subtarget
.getEnvironmentPointerRegister();
5651 const MCRegister TOCReg
= Subtarget
.getTOCPointerRegister();
5653 // Offsets of descriptor members.
5654 const unsigned TOCAnchorOffset
= Subtarget
.descriptorTOCAnchorOffset();
5655 const unsigned EnvPtrOffset
= Subtarget
.descriptorEnvironmentPointerOffset();
5657 const MVT RegVT
= Subtarget
.isPPC64() ? MVT::i64
: MVT::i32
;
5658 const Align Alignment
= Subtarget
.isPPC64() ? Align(8) : Align(4);
5660 // One load for the functions entry point address.
5661 SDValue LoadFuncPtr
= DAG
.getLoad(RegVT
, dl
, LDChain
, Callee
, MPI
,
5662 Alignment
, MMOFlags
);
5664 // One for loading the TOC anchor for the module that contains the called
5666 SDValue TOCOff
= DAG
.getIntPtrConstant(TOCAnchorOffset
, dl
);
5667 SDValue AddTOC
= DAG
.getNode(ISD::ADD
, dl
, RegVT
, Callee
, TOCOff
);
5669 DAG
.getLoad(RegVT
, dl
, LDChain
, AddTOC
,
5670 MPI
.getWithOffset(TOCAnchorOffset
), Alignment
, MMOFlags
);
5672 // One for loading the environment pointer.
5673 SDValue PtrOff
= DAG
.getIntPtrConstant(EnvPtrOffset
, dl
);
5674 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, RegVT
, Callee
, PtrOff
);
5675 SDValue LoadEnvPtr
=
5676 DAG
.getLoad(RegVT
, dl
, LDChain
, AddPtr
,
5677 MPI
.getWithOffset(EnvPtrOffset
), Alignment
, MMOFlags
);
5680 // Then copy the newly loaded TOC anchor to the TOC pointer.
5681 SDValue TOCVal
= DAG
.getCopyToReg(Chain
, dl
, TOCReg
, TOCPtr
, Glue
);
5682 Chain
= TOCVal
.getValue(0);
5683 Glue
= TOCVal
.getValue(1);
5685 // If the function call has an explicit 'nest' parameter, it takes the
5686 // place of the environment pointer.
5687 assert((!hasNest
|| !Subtarget
.isAIXABI()) &&
5688 "Nest parameter is not supported on AIX.");
5690 SDValue EnvVal
= DAG
.getCopyToReg(Chain
, dl
, EnvPtrReg
, LoadEnvPtr
, Glue
);
5691 Chain
= EnvVal
.getValue(0);
5692 Glue
= EnvVal
.getValue(1);
5695 // The rest of the indirect call sequence is the same as the non-descriptor
5697 prepareIndirectCall(DAG
, LoadFuncPtr
, Glue
, Chain
, dl
);
5701 buildCallOperands(SmallVectorImpl
<SDValue
> &Ops
,
5702 PPCTargetLowering::CallFlags CFlags
, const SDLoc
&dl
,
5704 SmallVector
<std::pair
<unsigned, SDValue
>, 8> &RegsToPass
,
5705 SDValue Glue
, SDValue Chain
, SDValue
&Callee
, int SPDiff
,
5706 const PPCSubtarget
&Subtarget
) {
5707 const bool IsPPC64
= Subtarget
.isPPC64();
5708 // MVT for a general purpose register.
5709 const MVT RegVT
= IsPPC64
? MVT::i64
: MVT::i32
;
5711 // First operand is always the chain.
5712 Ops
.push_back(Chain
);
5714 // If it's a direct call pass the callee as the second operand.
5715 if (!CFlags
.IsIndirect
)
5716 Ops
.push_back(Callee
);
5718 assert(!CFlags
.IsPatchPoint
&& "Patch point calls are not indirect.");
5720 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5721 // on the stack (this would have been done in `LowerCall_64SVR4` or
5722 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5723 // represents both the indirect branch and a load that restores the TOC
5724 // pointer from the linkage area. The operand for the TOC restore is an add
5725 // of the TOC save offset to the stack pointer. This must be the second
5726 // operand: after the chain input but before any other variadic arguments.
5727 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5729 if (isTOCSaveRestoreRequired(Subtarget
)) {
5730 const MCRegister StackPtrReg
= Subtarget
.getStackPointerRegister();
5732 SDValue StackPtr
= DAG
.getRegister(StackPtrReg
, RegVT
);
5733 unsigned TOCSaveOffset
= Subtarget
.getFrameLowering()->getTOCSaveOffset();
5734 SDValue TOCOff
= DAG
.getIntPtrConstant(TOCSaveOffset
, dl
);
5735 SDValue AddTOC
= DAG
.getNode(ISD::ADD
, dl
, RegVT
, StackPtr
, TOCOff
);
5736 Ops
.push_back(AddTOC
);
5739 // Add the register used for the environment pointer.
5740 if (Subtarget
.usesFunctionDescriptors() && !CFlags
.HasNest
)
5741 Ops
.push_back(DAG
.getRegister(Subtarget
.getEnvironmentPointerRegister(),
5745 // Add CTR register as callee so a bctr can be emitted later.
5746 if (CFlags
.IsTailCall
)
5747 Ops
.push_back(DAG
.getRegister(IsPPC64
? PPC::CTR8
: PPC::CTR
, RegVT
));
5750 // If this is a tail call add stack pointer delta.
5751 if (CFlags
.IsTailCall
)
5752 Ops
.push_back(DAG
.getConstant(SPDiff
, dl
, MVT::i32
));
5754 // Add argument registers to the end of the list so that they are known live
5756 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
5757 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
5758 RegsToPass
[i
].second
.getValueType()));
5760 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5761 // no way to mark dependencies as implicit here.
5762 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5763 if ((Subtarget
.is64BitELFABI() || Subtarget
.isAIXABI()) &&
5764 !CFlags
.IsPatchPoint
&& !Subtarget
.isUsingPCRelativeCalls())
5765 Ops
.push_back(DAG
.getRegister(Subtarget
.getTOCPointerRegister(), RegVT
));
5767 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5768 if (CFlags
.IsVarArg
&& Subtarget
.is32BitELFABI())
5769 Ops
.push_back(DAG
.getRegister(PPC::CR1EQ
, MVT::i32
));
5771 // Add a register mask operand representing the call-preserved registers.
5772 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
5773 const uint32_t *Mask
=
5774 TRI
->getCallPreservedMask(DAG
.getMachineFunction(), CFlags
.CallConv
);
5775 assert(Mask
&& "Missing call preserved mask for calling convention");
5776 Ops
.push_back(DAG
.getRegisterMask(Mask
));
5778 // If the glue is valid, it is the last operand.
5780 Ops
.push_back(Glue
);
5783 SDValue
PPCTargetLowering::FinishCall(
5784 CallFlags CFlags
, const SDLoc
&dl
, SelectionDAG
&DAG
,
5785 SmallVector
<std::pair
<unsigned, SDValue
>, 8> &RegsToPass
, SDValue Glue
,
5786 SDValue Chain
, SDValue CallSeqStart
, SDValue
&Callee
, int SPDiff
,
5787 unsigned NumBytes
, const SmallVectorImpl
<ISD::InputArg
> &Ins
,
5788 SmallVectorImpl
<SDValue
> &InVals
, const CallBase
*CB
) const {
5790 if ((Subtarget
.is64BitELFABI() && !Subtarget
.isUsingPCRelativeCalls()) ||
5791 Subtarget
.isAIXABI())
5792 setUsesTOCBasePtr(DAG
);
5795 getCallOpcode(CFlags
, DAG
.getMachineFunction().getFunction(), Callee
,
5796 Subtarget
, DAG
.getTarget(), CB
? CB
->isStrictFP() : false);
5798 if (!CFlags
.IsIndirect
)
5799 Callee
= transformCallee(Callee
, DAG
, dl
, Subtarget
);
5800 else if (Subtarget
.usesFunctionDescriptors())
5801 prepareDescriptorIndirectCall(DAG
, Callee
, Glue
, Chain
, CallSeqStart
, CB
,
5802 dl
, CFlags
.HasNest
, Subtarget
);
5804 prepareIndirectCall(DAG
, Callee
, Glue
, Chain
, dl
);
5806 // Build the operand list for the call instruction.
5807 SmallVector
<SDValue
, 8> Ops
;
5808 buildCallOperands(Ops
, CFlags
, dl
, DAG
, RegsToPass
, Glue
, Chain
, Callee
,
5812 if (CFlags
.IsTailCall
) {
5813 // Indirect tail call when using PC Relative calls do not have the same
5815 assert(((Callee
.getOpcode() == ISD::Register
&&
5816 cast
<RegisterSDNode
>(Callee
)->getReg() == PPC::CTR
) ||
5817 Callee
.getOpcode() == ISD::TargetExternalSymbol
||
5818 Callee
.getOpcode() == ISD::TargetGlobalAddress
||
5819 isa
<ConstantSDNode
>(Callee
) ||
5820 (CFlags
.IsIndirect
&& Subtarget
.isUsingPCRelativeCalls())) &&
5821 "Expecting a global address, external symbol, absolute value, "
5822 "register or an indirect tail call when PC Relative calls are "
5824 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5825 assert(CallOpc
== PPCISD::TC_RETURN
&&
5826 "Unexpected call opcode for a tail call.");
5827 DAG
.getMachineFunction().getFrameInfo().setHasTailCall();
5828 SDValue Ret
= DAG
.getNode(CallOpc
, dl
, MVT::Other
, Ops
);
5829 DAG
.addNoMergeSiteInfo(Ret
.getNode(), CFlags
.NoMerge
);
5833 std::array
<EVT
, 2> ReturnTypes
= {{MVT::Other
, MVT::Glue
}};
5834 Chain
= DAG
.getNode(CallOpc
, dl
, ReturnTypes
, Ops
);
5835 DAG
.addNoMergeSiteInfo(Chain
.getNode(), CFlags
.NoMerge
);
5836 Glue
= Chain
.getValue(1);
5838 // When performing tail call optimization the callee pops its arguments off
5839 // the stack. Account for this here so these bytes can be pushed back on in
5840 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5841 int BytesCalleePops
= (CFlags
.CallConv
== CallingConv::Fast
&&
5842 getTargetMachine().Options
.GuaranteedTailCallOpt
)
5846 Chain
= DAG
.getCALLSEQ_END(Chain
, NumBytes
, BytesCalleePops
, Glue
, dl
);
5847 Glue
= Chain
.getValue(1);
5849 return LowerCallResult(Chain
, Glue
, CFlags
.CallConv
, CFlags
.IsVarArg
, Ins
, dl
,
5853 bool PPCTargetLowering::supportsTailCallFor(const CallBase
*CB
) const {
5854 CallingConv::ID CalleeCC
= CB
->getCallingConv();
5855 const Function
*CallerFunc
= CB
->getCaller();
5856 CallingConv::ID CallerCC
= CallerFunc
->getCallingConv();
5857 const Function
*CalleeFunc
= CB
->getCalledFunction();
5860 const GlobalValue
*CalleeGV
= dyn_cast
<GlobalValue
>(CalleeFunc
);
5862 SmallVector
<ISD::OutputArg
, 2> Outs
;
5863 SmallVector
<ISD::InputArg
, 2> Ins
;
5865 GetReturnInfo(CalleeCC
, CalleeFunc
->getReturnType(),
5866 CalleeFunc
->getAttributes(), Outs
, *this,
5867 CalleeFunc
->getDataLayout());
5869 return isEligibleForTCO(CalleeGV
, CalleeCC
, CallerCC
, CB
,
5870 CalleeFunc
->isVarArg(), Outs
, Ins
, CallerFunc
,
5871 false /*isCalleeExternalSymbol*/);
5874 bool PPCTargetLowering::isEligibleForTCO(
5875 const GlobalValue
*CalleeGV
, CallingConv::ID CalleeCC
,
5876 CallingConv::ID CallerCC
, const CallBase
*CB
, bool isVarArg
,
5877 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5878 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const Function
*CallerFunc
,
5879 bool isCalleeExternalSymbol
) const {
5880 if (Subtarget
.useLongCalls() && !(CB
&& CB
->isMustTailCall()))
5883 if (Subtarget
.isSVR4ABI() && Subtarget
.isPPC64())
5884 return IsEligibleForTailCallOptimization_64SVR4(
5885 CalleeGV
, CalleeCC
, CallerCC
, CB
, isVarArg
, Outs
, Ins
, CallerFunc
,
5886 isCalleeExternalSymbol
);
5888 return IsEligibleForTailCallOptimization(CalleeGV
, CalleeCC
, CallerCC
,
5893 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
5894 SmallVectorImpl
<SDValue
> &InVals
) const {
5895 SelectionDAG
&DAG
= CLI
.DAG
;
5897 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
5898 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
5899 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
5900 SDValue Chain
= CLI
.Chain
;
5901 SDValue Callee
= CLI
.Callee
;
5902 bool &isTailCall
= CLI
.IsTailCall
;
5903 CallingConv::ID CallConv
= CLI
.CallConv
;
5904 bool isVarArg
= CLI
.IsVarArg
;
5905 bool isPatchPoint
= CLI
.IsPatchPoint
;
5906 const CallBase
*CB
= CLI
.CB
;
5909 MachineFunction
&MF
= DAG
.getMachineFunction();
5910 CallingConv::ID CallerCC
= MF
.getFunction().getCallingConv();
5911 auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
);
5912 const GlobalValue
*GV
= G
? G
->getGlobal() : nullptr;
5913 bool IsCalleeExternalSymbol
= isa
<ExternalSymbolSDNode
>(Callee
);
5916 isEligibleForTCO(GV
, CallConv
, CallerCC
, CB
, isVarArg
, Outs
, Ins
,
5917 &(MF
.getFunction()), IsCalleeExternalSymbol
);
5920 if (!getTargetMachine().Options
.GuaranteedTailCallOpt
)
5923 // PC Relative calls no longer guarantee that the callee is a Global
5924 // Address Node. The callee could be an indirect tail call in which
5925 // case the SDValue for the callee could be a load (to load the address
5926 // of a function pointer) or it may be a register copy (to move the
5927 // address of the callee from a function parameter into a virtual
5928 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5929 assert((Subtarget
.isUsingPCRelativeCalls() ||
5930 isa
<GlobalAddressSDNode
>(Callee
)) &&
5931 "Callee should be an llvm::Function object.");
5933 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG
.getMachineFunction().getName()
5934 << "\nTCO callee: ");
5935 LLVM_DEBUG(Callee
.dump());
5939 if (!isTailCall
&& CB
&& CB
->isMustTailCall())
5940 report_fatal_error("failed to perform tail call elimination on a call "
5941 "site marked musttail");
5943 // When long calls (i.e. indirect calls) are always used, calls are always
5944 // made via function pointer. If we have a function name, first translate it
5946 if (Subtarget
.useLongCalls() && isa
<GlobalAddressSDNode
>(Callee
) &&
5948 Callee
= LowerGlobalAddress(Callee
, DAG
);
5951 CallConv
, isTailCall
, isVarArg
, isPatchPoint
,
5952 isIndirectCall(Callee
, DAG
, Subtarget
, isPatchPoint
),
5954 Subtarget
.is64BitELFABI() &&
5955 any_of(Outs
, [](ISD::OutputArg Arg
) { return Arg
.Flags
.isNest(); }),
5958 if (Subtarget
.isAIXABI())
5959 return LowerCall_AIX(Chain
, Callee
, CFlags
, Outs
, OutVals
, Ins
, dl
, DAG
,
5962 assert(Subtarget
.isSVR4ABI());
5963 if (Subtarget
.isPPC64())
5964 return LowerCall_64SVR4(Chain
, Callee
, CFlags
, Outs
, OutVals
, Ins
, dl
, DAG
,
5966 return LowerCall_32SVR4(Chain
, Callee
, CFlags
, Outs
, OutVals
, Ins
, dl
, DAG
,
5970 SDValue
PPCTargetLowering::LowerCall_32SVR4(
5971 SDValue Chain
, SDValue Callee
, CallFlags CFlags
,
5972 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5973 const SmallVectorImpl
<SDValue
> &OutVals
,
5974 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5975 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
5976 const CallBase
*CB
) const {
5977 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5978 // of the 32-bit SVR4 ABI stack frame layout.
5980 const CallingConv::ID CallConv
= CFlags
.CallConv
;
5981 const bool IsVarArg
= CFlags
.IsVarArg
;
5982 const bool IsTailCall
= CFlags
.IsTailCall
;
5984 assert((CallConv
== CallingConv::C
||
5985 CallConv
== CallingConv::Cold
||
5986 CallConv
== CallingConv::Fast
) && "Unknown calling convention!");
5988 const Align
PtrAlign(4);
5990 MachineFunction
&MF
= DAG
.getMachineFunction();
5992 // Mark this function as potentially containing a function that contains a
5993 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5994 // and restoring the callers stack pointer in this functions epilog. This is
5995 // done because by tail calling the called function might overwrite the value
5996 // in this function's (MF) stack pointer stack slot 0(SP).
5997 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5998 CallConv
== CallingConv::Fast
)
5999 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
6001 // Count how many bytes are to be pushed on the stack, including the linkage
6002 // area, parameter list area and the part of the local variable space which
6003 // contains copies of aggregates which are passed by value.
6005 // Assign locations to all of the outgoing arguments.
6006 SmallVector
<CCValAssign
, 16> ArgLocs
;
6007 PPCCCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
6009 // Reserve space for the linkage area on the stack.
6010 CCInfo
.AllocateStack(Subtarget
.getFrameLowering()->getLinkageSize(),
6013 CCInfo
.PreAnalyzeCallOperands(Outs
);
6016 // Handle fixed and variable vector arguments differently.
6017 // Fixed vector arguments go into registers as long as registers are
6018 // available. Variable vector arguments always go into memory.
6019 unsigned NumArgs
= Outs
.size();
6021 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
6022 MVT ArgVT
= Outs
[i
].VT
;
6023 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
6026 if (Outs
[i
].IsFixed
) {
6027 Result
= CC_PPC32_SVR4(i
, ArgVT
, ArgVT
, CCValAssign::Full
, ArgFlags
,
6030 Result
= CC_PPC32_SVR4_VarArg(i
, ArgVT
, ArgVT
, CCValAssign::Full
,
6036 errs() << "Call operand #" << i
<< " has unhandled type "
6039 llvm_unreachable(nullptr);
6043 // All arguments are treated the same.
6044 CCInfo
.AnalyzeCallOperands(Outs
, CC_PPC32_SVR4
);
6046 CCInfo
.clearWasPPCF128();
6048 // Assign locations to all of the outgoing aggregate by value arguments.
6049 SmallVector
<CCValAssign
, 16> ByValArgLocs
;
6050 CCState
CCByValInfo(CallConv
, IsVarArg
, MF
, ByValArgLocs
, *DAG
.getContext());
6052 // Reserve stack space for the allocations in CCInfo.
6053 CCByValInfo
.AllocateStack(CCInfo
.getStackSize(), PtrAlign
);
6055 CCByValInfo
.AnalyzeCallOperands(Outs
, CC_PPC32_SVR4_ByVal
);
6057 // Size of the linkage area, parameter list area and the part of the local
6058 // space variable where copies of aggregates which are passed by value are
6060 unsigned NumBytes
= CCByValInfo
.getStackSize();
6062 // Calculate by how many bytes the stack has to be adjusted in case of tail
6063 // call optimization.
6064 int SPDiff
= CalculateTailCallSPDiff(DAG
, IsTailCall
, NumBytes
);
6066 // Adjust the stack pointer for the new arguments...
6067 // These operations are automatically eliminated by the prolog/epilog pass
6068 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
6069 SDValue CallSeqStart
= Chain
;
6071 // Load the return address and frame pointer so it can be moved somewhere else
6074 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
6076 // Set up a copy of the stack pointer for use loading and storing any
6077 // arguments that may not fit in the registers available for argument
6079 SDValue StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
6081 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
6082 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
6083 SmallVector
<SDValue
, 8> MemOpChains
;
6085 bool seenFloatArg
= false;
6086 // Walk the register/memloc assignments, inserting copies/loads.
6087 // i - Tracks the index into the list of registers allocated for the call
6088 // RealArgIdx - Tracks the index into the list of actual function arguments
6089 // j - Tracks the index into the list of byval arguments
6090 for (unsigned i
= 0, RealArgIdx
= 0, j
= 0, e
= ArgLocs
.size();
6092 ++i
, ++RealArgIdx
) {
6093 CCValAssign
&VA
= ArgLocs
[i
];
6094 SDValue Arg
= OutVals
[RealArgIdx
];
6095 ISD::ArgFlagsTy Flags
= Outs
[RealArgIdx
].Flags
;
6097 if (Flags
.isByVal()) {
6098 // Argument is an aggregate which is passed by value, thus we need to
6099 // create a copy of it in the local variable space of the current stack
6100 // frame (which is the stack frame of the caller) and pass the address of
6101 // this copy to the callee.
6102 assert((j
< ByValArgLocs
.size()) && "Index out of bounds!");
6103 CCValAssign
&ByValVA
= ByValArgLocs
[j
++];
6104 assert((VA
.getValNo() == ByValVA
.getValNo()) && "ValNo mismatch!");
6106 // Memory reserved in the local variable space of the callers stack frame.
6107 unsigned LocMemOffset
= ByValVA
.getLocMemOffset();
6109 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
, dl
);
6110 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(MF
.getDataLayout()),
6113 // Create a copy of the argument in the local area of the current
6115 SDValue MemcpyCall
=
6116 CreateCopyOfByValArgument(Arg
, PtrOff
,
6117 CallSeqStart
.getNode()->getOperand(0),
6120 // This must go outside the CALLSEQ_START..END.
6121 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
, NumBytes
, 0,
6123 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(),
6124 NewCallSeqStart
.getNode());
6125 Chain
= CallSeqStart
= NewCallSeqStart
;
6127 // Pass the address of the aggregate copy on the stack either in a
6128 // physical register or in the parameter list area of the current stack
6129 // frame to the callee.
6133 // When useCRBits() is true, there can be i1 arguments.
6134 // It is because getRegisterType(MVT::i1) => MVT::i1,
6135 // and for other integer types getRegisterType() => MVT::i32.
6136 // Extend i1 and ensure callee will get i32.
6137 if (Arg
.getValueType() == MVT::i1
)
6138 Arg
= DAG
.getNode(Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
,
6141 if (VA
.isRegLoc()) {
6142 seenFloatArg
|= VA
.getLocVT().isFloatingPoint();
6143 // Put argument in a physical register.
6144 if (Subtarget
.hasSPE() && Arg
.getValueType() == MVT::f64
) {
6145 bool IsLE
= Subtarget
.isLittleEndian();
6146 SDValue SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
6147 DAG
.getIntPtrConstant(IsLE
? 0 : 1, dl
));
6148 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), SVal
.getValue(0)));
6149 SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
6150 DAG
.getIntPtrConstant(IsLE
? 1 : 0, dl
));
6151 RegsToPass
.push_back(std::make_pair(ArgLocs
[++i
].getLocReg(),
6154 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
6156 // Put argument in the parameter list area of the current stack frame.
6157 assert(VA
.isMemLoc());
6158 unsigned LocMemOffset
= VA
.getLocMemOffset();
6161 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
, dl
);
6162 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(MF
.getDataLayout()),
6165 MemOpChains
.push_back(
6166 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo()));
6168 // Calculate and remember argument location.
6169 CalculateTailCallArgDest(DAG
, MF
, false, Arg
, SPDiff
, LocMemOffset
,
6175 if (!MemOpChains
.empty())
6176 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
6178 // Build a sequence of copy-to-reg nodes chained together with token chain
6179 // and flag operands which copy the outgoing args into the appropriate regs.
6181 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
6182 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
6183 RegsToPass
[i
].second
, InGlue
);
6184 InGlue
= Chain
.getValue(1);
6187 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6190 SDVTList VTs
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
6191 SDValue Ops
[] = { Chain
, InGlue
};
6193 Chain
= DAG
.getNode(seenFloatArg
? PPCISD::CR6SET
: PPCISD::CR6UNSET
, dl
,
6194 VTs
, ArrayRef(Ops
, InGlue
.getNode() ? 2 : 1));
6196 InGlue
= Chain
.getValue(1);
6200 PrepareTailCall(DAG
, InGlue
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
6203 return FinishCall(CFlags
, dl
, DAG
, RegsToPass
, InGlue
, Chain
, CallSeqStart
,
6204 Callee
, SPDiff
, NumBytes
, Ins
, InVals
, CB
);
6207 // Copy an argument into memory, being careful to do this outside the
6208 // call sequence for the call to which the argument belongs.
6209 SDValue
PPCTargetLowering::createMemcpyOutsideCallSeq(
6210 SDValue Arg
, SDValue PtrOff
, SDValue CallSeqStart
, ISD::ArgFlagsTy Flags
,
6211 SelectionDAG
&DAG
, const SDLoc
&dl
) const {
6212 SDValue MemcpyCall
= CreateCopyOfByValArgument(Arg
, PtrOff
,
6213 CallSeqStart
.getNode()->getOperand(0),
6215 // The MEMCPY must go outside the CALLSEQ_START..END.
6216 int64_t FrameSize
= CallSeqStart
.getConstantOperandVal(1);
6217 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
, FrameSize
, 0,
6219 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(),
6220 NewCallSeqStart
.getNode());
6221 return NewCallSeqStart
;
6224 SDValue
PPCTargetLowering::LowerCall_64SVR4(
6225 SDValue Chain
, SDValue Callee
, CallFlags CFlags
,
6226 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6227 const SmallVectorImpl
<SDValue
> &OutVals
,
6228 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
6229 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
6230 const CallBase
*CB
) const {
6231 bool isELFv2ABI
= Subtarget
.isELFv2ABI();
6232 bool isLittleEndian
= Subtarget
.isLittleEndian();
6233 unsigned NumOps
= Outs
.size();
6234 bool IsSibCall
= false;
6235 bool IsFastCall
= CFlags
.CallConv
== CallingConv::Fast
;
6237 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6238 unsigned PtrByteSize
= 8;
6240 MachineFunction
&MF
= DAG
.getMachineFunction();
6242 if (CFlags
.IsTailCall
&& !getTargetMachine().Options
.GuaranteedTailCallOpt
)
6245 // Mark this function as potentially containing a function that contains a
6246 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6247 // and restoring the callers stack pointer in this functions epilog. This is
6248 // done because by tail calling the called function might overwrite the value
6249 // in this function's (MF) stack pointer stack slot 0(SP).
6250 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&& IsFastCall
)
6251 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
6253 assert(!(IsFastCall
&& CFlags
.IsVarArg
) &&
6254 "fastcc not supported on varargs functions");
6256 // Count how many bytes are to be pushed on the stack, including the linkage
6257 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6258 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6259 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6260 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
6261 unsigned NumBytes
= LinkageSize
;
6262 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
6264 static const MCPhysReg GPR
[] = {
6265 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
6266 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
6268 static const MCPhysReg VR
[] = {
6269 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
6270 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
6273 const unsigned NumGPRs
= std::size(GPR
);
6274 const unsigned NumFPRs
= useSoftFloat() ? 0 : 13;
6275 const unsigned NumVRs
= std::size(VR
);
6277 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6278 // can be passed to the callee in registers.
6279 // For the fast calling convention, there is another check below.
6280 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6281 bool HasParameterArea
= !isELFv2ABI
|| CFlags
.IsVarArg
|| IsFastCall
;
6282 if (!HasParameterArea
) {
6283 unsigned ParamAreaSize
= NumGPRs
* PtrByteSize
;
6284 unsigned AvailableFPRs
= NumFPRs
;
6285 unsigned AvailableVRs
= NumVRs
;
6286 unsigned NumBytesTmp
= NumBytes
;
6287 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6288 if (Outs
[i
].Flags
.isNest()) continue;
6289 if (CalculateStackSlotUsed(Outs
[i
].VT
, Outs
[i
].ArgVT
, Outs
[i
].Flags
,
6290 PtrByteSize
, LinkageSize
, ParamAreaSize
,
6291 NumBytesTmp
, AvailableFPRs
, AvailableVRs
))
6292 HasParameterArea
= true;
6296 // When using the fast calling convention, we don't provide backing for
6297 // arguments that will be in registers.
6298 unsigned NumGPRsUsed
= 0, NumFPRsUsed
= 0, NumVRsUsed
= 0;
6300 // Avoid allocating parameter area for fastcc functions if all the arguments
6301 // can be passed in the registers.
6303 HasParameterArea
= false;
6305 // Add up all the space actually used.
6306 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6307 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6308 EVT ArgVT
= Outs
[i
].VT
;
6309 EVT OrigVT
= Outs
[i
].ArgVT
;
6315 if (Flags
.isByVal()) {
6316 NumGPRsUsed
+= (Flags
.getByValSize()+7)/8;
6317 if (NumGPRsUsed
> NumGPRs
)
6318 HasParameterArea
= true;
6320 switch (ArgVT
.getSimpleVT().SimpleTy
) {
6321 default: llvm_unreachable("Unexpected ValueType for argument!");
6325 if (++NumGPRsUsed
<= NumGPRs
)
6335 if (++NumVRsUsed
<= NumVRs
)
6339 if (++NumVRsUsed
<= NumVRs
)
6344 if (++NumFPRsUsed
<= NumFPRs
)
6348 HasParameterArea
= true;
6352 /* Respect alignment of argument on the stack. */
6354 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
6355 NumBytes
= alignTo(NumBytes
, Alignement
);
6357 NumBytes
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
6358 if (Flags
.isInConsecutiveRegsLast())
6359 NumBytes
= ((NumBytes
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
6362 unsigned NumBytesActuallyUsed
= NumBytes
;
6364 // In the old ELFv1 ABI,
6365 // the prolog code of the callee may store up to 8 GPR argument registers to
6366 // the stack, allowing va_start to index over them in memory if its varargs.
6367 // Because we cannot tell if this is needed on the caller side, we have to
6368 // conservatively assume that it is needed. As such, make sure we have at
6369 // least enough stack space for the caller to store the 8 GPRs.
6370 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6371 // really requires memory operands, e.g. a vararg function.
6372 if (HasParameterArea
)
6373 NumBytes
= std::max(NumBytes
, LinkageSize
+ 8 * PtrByteSize
);
6375 NumBytes
= LinkageSize
;
6377 // Tail call needs the stack to be aligned.
6378 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&& IsFastCall
)
6379 NumBytes
= EnsureStackAlignment(Subtarget
.getFrameLowering(), NumBytes
);
6383 // Calculate by how many bytes the stack has to be adjusted in case of tail
6384 // call optimization.
6386 SPDiff
= CalculateTailCallSPDiff(DAG
, CFlags
.IsTailCall
, NumBytes
);
6388 // To protect arguments on the stack from being clobbered in a tail call,
6389 // force all the loads to happen before doing any other lowering.
6390 if (CFlags
.IsTailCall
)
6391 Chain
= DAG
.getStackArgumentTokenFactor(Chain
);
6393 // Adjust the stack pointer for the new arguments...
6394 // These operations are automatically eliminated by the prolog/epilog pass
6396 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
6397 SDValue CallSeqStart
= Chain
;
6399 // Load the return address and frame pointer so it can be move somewhere else
6402 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
6404 // Set up a copy of the stack pointer for use loading and storing any
6405 // arguments that may not fit in the registers available for argument
6407 SDValue StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
6409 // Figure out which arguments are going to go in registers, and which in
6410 // memory. Also, if this is a vararg function, floating point operations
6411 // must be stored to our stack, and loaded into integer regs as well, if
6412 // any integer regs are available for argument passing.
6413 unsigned ArgOffset
= LinkageSize
;
6415 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
6416 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
6418 SmallVector
<SDValue
, 8> MemOpChains
;
6419 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6420 SDValue Arg
= OutVals
[i
];
6421 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6422 EVT ArgVT
= Outs
[i
].VT
;
6423 EVT OrigVT
= Outs
[i
].ArgVT
;
6425 // PtrOff will be used to store the current argument to the stack if a
6426 // register cannot be found for it.
6429 // We re-align the argument offset for each argument, except when using the
6430 // fast calling convention, when we need to make sure we do that only when
6431 // we'll actually use a stack slot.
6432 auto ComputePtrOff
= [&]() {
6433 /* Respect alignment of argument on the stack. */
6435 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
6436 ArgOffset
= alignTo(ArgOffset
, Alignment
);
6438 PtrOff
= DAG
.getConstant(ArgOffset
, dl
, StackPtr
.getValueType());
6440 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
6446 /* Compute GPR index associated with argument offset. */
6447 GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
6448 GPR_idx
= std::min(GPR_idx
, NumGPRs
);
6451 // Promote integers to 64-bit values.
6452 if (Arg
.getValueType() == MVT::i32
|| Arg
.getValueType() == MVT::i1
) {
6453 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6454 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
6455 Arg
= DAG
.getNode(ExtOp
, dl
, MVT::i64
, Arg
);
6458 // FIXME memcpy is used way more than necessary. Correctness first.
6459 // Note: "by value" is code for passing a structure by value, not
6461 if (Flags
.isByVal()) {
6462 // Note: Size includes alignment padding, so
6463 // struct x { short a; char b; }
6464 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6465 // These are the proper values we need for right-justifying the
6466 // aggregate in a parameter register.
6467 unsigned Size
= Flags
.getByValSize();
6469 // An empty aggregate parameter takes up no storage and no
6477 // All aggregates smaller than 8 bytes must be passed right-justified.
6478 if (Size
==1 || Size
==2 || Size
==4) {
6479 EVT VT
= (Size
==1) ? MVT::i8
: ((Size
==2) ? MVT::i16
: MVT::i32
);
6480 if (GPR_idx
!= NumGPRs
) {
6481 SDValue Load
= DAG
.getExtLoad(ISD::EXTLOAD
, dl
, PtrVT
, Chain
, Arg
,
6482 MachinePointerInfo(), VT
);
6483 MemOpChains
.push_back(Load
.getValue(1));
6484 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6486 ArgOffset
+= PtrByteSize
;
6491 if (GPR_idx
== NumGPRs
&& Size
< 8) {
6492 SDValue AddPtr
= PtrOff
;
6493 if (!isLittleEndian
) {
6494 SDValue Const
= DAG
.getConstant(PtrByteSize
- Size
, dl
,
6495 PtrOff
.getValueType());
6496 AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
6498 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
6501 ArgOffset
+= PtrByteSize
;
6504 // Copy the object to parameter save area if it can not be entirely passed
6506 // FIXME: we only need to copy the parts which need to be passed in
6507 // parameter save area. For the parts passed by registers, we don't need
6508 // to copy them to the stack although we need to allocate space for them
6509 // in parameter save area.
6510 if ((NumGPRs
- GPR_idx
) * PtrByteSize
< Size
)
6511 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, PtrOff
,
6515 // When a register is available, pass a small aggregate right-justified.
6516 if (Size
< 8 && GPR_idx
!= NumGPRs
) {
6517 // The easiest way to get this right-justified in a register
6518 // is to copy the structure into the rightmost portion of a
6519 // local variable slot, then load the whole slot into the
6521 // FIXME: The memcpy seems to produce pretty awful code for
6522 // small aggregates, particularly for packed ones.
6523 // FIXME: It would be preferable to use the slot in the
6524 // parameter save area instead of a new local variable.
6525 SDValue AddPtr
= PtrOff
;
6526 if (!isLittleEndian
) {
6527 SDValue Const
= DAG
.getConstant(8 - Size
, dl
, PtrOff
.getValueType());
6528 AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
6530 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
6534 // Load the slot into the register.
6536 DAG
.getLoad(PtrVT
, dl
, Chain
, PtrOff
, MachinePointerInfo());
6537 MemOpChains
.push_back(Load
.getValue(1));
6538 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6540 // Done with this argument.
6541 ArgOffset
+= PtrByteSize
;
6545 // For aggregates larger than PtrByteSize, copy the pieces of the
6546 // object that fit into registers from the parameter save area.
6547 for (unsigned j
=0; j
<Size
; j
+=PtrByteSize
) {
6548 SDValue Const
= DAG
.getConstant(j
, dl
, PtrOff
.getValueType());
6549 SDValue AddArg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Arg
, Const
);
6550 if (GPR_idx
!= NumGPRs
) {
6551 unsigned LoadSizeInBits
= std::min(PtrByteSize
, (Size
- j
)) * 8;
6552 EVT ObjType
= EVT::getIntegerVT(*DAG
.getContext(), LoadSizeInBits
);
6553 SDValue Load
= DAG
.getExtLoad(ISD::EXTLOAD
, dl
, PtrVT
, Chain
, AddArg
,
6554 MachinePointerInfo(), ObjType
);
6556 MemOpChains
.push_back(Load
.getValue(1));
6557 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6558 ArgOffset
+= PtrByteSize
;
6560 ArgOffset
+= ((Size
- j
+ PtrByteSize
-1)/PtrByteSize
)*PtrByteSize
;
6567 switch (Arg
.getSimpleValueType().SimpleTy
) {
6568 default: llvm_unreachable("Unexpected ValueType for argument!");
6572 if (Flags
.isNest()) {
6573 // The 'nest' parameter, if any, is passed in R11.
6574 RegsToPass
.push_back(std::make_pair(PPC::X11
, Arg
));
6578 // These can be scalar arguments or elements of an integer array type
6579 // passed directly. Clang may use those instead of "byval" aggregate
6580 // types to avoid forcing arguments to memory unnecessarily.
6581 if (GPR_idx
!= NumGPRs
) {
6582 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
6587 assert(HasParameterArea
&&
6588 "Parameter area must exist to pass an argument in memory.");
6589 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6590 true, CFlags
.IsTailCall
, false, MemOpChains
,
6591 TailCallArguments
, dl
);
6593 ArgOffset
+= PtrByteSize
;
6596 ArgOffset
+= PtrByteSize
;
6600 // These can be scalar arguments or elements of a float array type
6601 // passed directly. The latter are used to implement ELFv2 homogenous
6602 // float aggregates.
6604 // Named arguments go into FPRs first, and once they overflow, the
6605 // remaining arguments go into GPRs and then the parameter save area.
6606 // Unnamed arguments for vararg functions always go to GPRs and
6607 // then the parameter save area. For now, put all arguments to vararg
6608 // routines always in both locations (FPR *and* GPR or stack slot).
6609 bool NeedGPROrStack
= CFlags
.IsVarArg
|| FPR_idx
== NumFPRs
;
6610 bool NeededLoad
= false;
6612 // First load the argument into the next available FPR.
6613 if (FPR_idx
!= NumFPRs
)
6614 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
6616 // Next, load the argument into GPR or stack slot if needed.
6617 if (!NeedGPROrStack
)
6619 else if (GPR_idx
!= NumGPRs
&& !IsFastCall
) {
6620 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6621 // once we support fp <-> gpr moves.
6623 // In the non-vararg case, this can only ever happen in the
6624 // presence of f32 array types, since otherwise we never run
6625 // out of FPRs before running out of GPRs.
6628 // Double values are always passed in a single GPR.
6629 if (Arg
.getValueType() != MVT::f32
) {
6630 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i64
, Arg
);
6632 // Non-array float values are extended and passed in a GPR.
6633 } else if (!Flags
.isInConsecutiveRegs()) {
6634 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6635 ArgVal
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i64
, ArgVal
);
6637 // If we have an array of floats, we collect every odd element
6638 // together with its predecessor into one GPR.
6639 } else if (ArgOffset
% PtrByteSize
!= 0) {
6641 Lo
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, OutVals
[i
- 1]);
6642 Hi
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6643 if (!isLittleEndian
)
6645 ArgVal
= DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Lo
, Hi
);
6647 // The final element, if even, goes into the first half of a GPR.
6648 } else if (Flags
.isInConsecutiveRegsLast()) {
6649 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6650 ArgVal
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i64
, ArgVal
);
6651 if (!isLittleEndian
)
6652 ArgVal
= DAG
.getNode(ISD::SHL
, dl
, MVT::i64
, ArgVal
,
6653 DAG
.getConstant(32, dl
, MVT::i32
));
6655 // Non-final even elements are skipped; they will be handled
6656 // together the with subsequent argument on the next go-around.
6660 if (ArgVal
.getNode())
6661 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], ArgVal
));
6666 // Single-precision floating-point values are mapped to the
6667 // second (rightmost) word of the stack doubleword.
6668 if (Arg
.getValueType() == MVT::f32
&&
6669 !isLittleEndian
&& !Flags
.isInConsecutiveRegs()) {
6670 SDValue ConstFour
= DAG
.getConstant(4, dl
, PtrOff
.getValueType());
6671 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, ConstFour
);
6674 assert(HasParameterArea
&&
6675 "Parameter area must exist to pass an argument in memory.");
6676 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6677 true, CFlags
.IsTailCall
, false, MemOpChains
,
6678 TailCallArguments
, dl
);
6682 // When passing an array of floats, the array occupies consecutive
6683 // space in the argument area; only round up to the next doubleword
6684 // at the end of the array. Otherwise, each float takes 8 bytes.
6685 if (!IsFastCall
|| NeededLoad
) {
6686 ArgOffset
+= (Arg
.getValueType() == MVT::f32
&&
6687 Flags
.isInConsecutiveRegs()) ? 4 : 8;
6688 if (Flags
.isInConsecutiveRegsLast())
6689 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
6701 // These can be scalar arguments or elements of a vector array type
6702 // passed directly. The latter are used to implement ELFv2 homogenous
6703 // vector aggregates.
6705 // For a varargs call, named arguments go into VRs or on the stack as
6706 // usual; unnamed arguments always go to the stack or the corresponding
6707 // GPRs when within range. For now, we always put the value in both
6708 // locations (or even all three).
6709 if (CFlags
.IsVarArg
) {
6710 assert(HasParameterArea
&&
6711 "Parameter area must exist if we have a varargs call.");
6712 // We could elide this store in the case where the object fits
6713 // entirely in R registers. Maybe later.
6715 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6716 MemOpChains
.push_back(Store
);
6717 if (VR_idx
!= NumVRs
) {
6719 DAG
.getLoad(MVT::v4f32
, dl
, Store
, PtrOff
, MachinePointerInfo());
6720 MemOpChains
.push_back(Load
.getValue(1));
6721 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Load
));
6724 for (unsigned i
=0; i
<16; i
+=PtrByteSize
) {
6725 if (GPR_idx
== NumGPRs
)
6727 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6728 DAG
.getConstant(i
, dl
, PtrVT
));
6730 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6731 MemOpChains
.push_back(Load
.getValue(1));
6732 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6737 // Non-varargs Altivec params go into VRs or on the stack.
6738 if (VR_idx
!= NumVRs
) {
6739 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Arg
));
6744 assert(HasParameterArea
&&
6745 "Parameter area must exist to pass an argument in memory.");
6746 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6747 true, CFlags
.IsTailCall
, true, MemOpChains
,
6748 TailCallArguments
, dl
);
6759 assert((!HasParameterArea
|| NumBytesActuallyUsed
== ArgOffset
) &&
6760 "mismatch in size of parameter area");
6761 (void)NumBytesActuallyUsed
;
6763 if (!MemOpChains
.empty())
6764 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
6766 // Check if this is an indirect call (MTCTR/BCTRL).
6767 // See prepareDescriptorIndirectCall and buildCallOperands for more
6768 // information about calls through function pointers in the 64-bit SVR4 ABI.
6769 if (CFlags
.IsIndirect
) {
6770 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6771 // caller in the TOC save area.
6772 if (isTOCSaveRestoreRequired(Subtarget
)) {
6773 assert(!CFlags
.IsTailCall
&& "Indirect tails calls not supported");
6774 // Load r2 into a virtual register and store it to the TOC save area.
6775 setUsesTOCBasePtr(DAG
);
6776 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, PPC::X2
, MVT::i64
);
6777 // TOC save area offset.
6778 unsigned TOCSaveOffset
= Subtarget
.getFrameLowering()->getTOCSaveOffset();
6779 SDValue PtrOff
= DAG
.getIntPtrConstant(TOCSaveOffset
, dl
);
6780 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
6781 Chain
= DAG
.getStore(Val
.getValue(1), dl
, Val
, AddPtr
,
6782 MachinePointerInfo::getStack(
6783 DAG
.getMachineFunction(), TOCSaveOffset
));
6785 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6786 // This does not mean the MTCTR instruction must use R12; it's easier
6787 // to model this as an extra parameter, so do that.
6788 if (isELFv2ABI
&& !CFlags
.IsPatchPoint
)
6789 RegsToPass
.push_back(std::make_pair((unsigned)PPC::X12
, Callee
));
6792 // Build a sequence of copy-to-reg nodes chained together with token chain
6793 // and flag operands which copy the outgoing args into the appropriate regs.
6795 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
6796 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
6797 RegsToPass
[i
].second
, InGlue
);
6798 InGlue
= Chain
.getValue(1);
6801 if (CFlags
.IsTailCall
&& !IsSibCall
)
6802 PrepareTailCall(DAG
, InGlue
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
6805 return FinishCall(CFlags
, dl
, DAG
, RegsToPass
, InGlue
, Chain
, CallSeqStart
,
6806 Callee
, SPDiff
, NumBytes
, Ins
, InVals
, CB
);
6809 // Returns true when the shadow of a general purpose argument register
6810 // in the parameter save area is aligned to at least 'RequiredAlign'.
6811 static bool isGPRShadowAligned(MCPhysReg Reg
, Align RequiredAlign
) {
6812 assert(RequiredAlign
.value() <= 16 &&
6813 "Required alignment greater than stack alignment.");
6816 report_fatal_error("called on invalid register.");
6823 // These registers are 16 byte aligned which is the most strict aligment
6832 // The shadow of these registers in the PSA is 8 byte aligned.
6833 return RequiredAlign
<= 8;
6838 return RequiredAlign
<= 4;
6842 static bool CC_AIX(unsigned ValNo
, MVT ValVT
, MVT LocVT
,
6843 CCValAssign::LocInfo LocInfo
, ISD::ArgFlagsTy ArgFlags
,
6845 AIXCCState
&State
= static_cast<AIXCCState
&>(S
);
6846 const PPCSubtarget
&Subtarget
= static_cast<const PPCSubtarget
&>(
6847 State
.getMachineFunction().getSubtarget());
6848 const bool IsPPC64
= Subtarget
.isPPC64();
6849 const unsigned PtrSize
= IsPPC64
? 8 : 4;
6850 const Align
PtrAlign(PtrSize
);
6851 const Align
StackAlign(16);
6852 const MVT RegVT
= IsPPC64
? MVT::i64
: MVT::i32
;
6854 if (ValVT
== MVT::f128
)
6855 report_fatal_error("f128 is unimplemented on AIX.");
6857 if (ArgFlags
.isNest())
6858 report_fatal_error("Nest arguments are unimplemented.");
6860 static const MCPhysReg GPR_32
[] = {// 32-bit registers.
6861 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
6862 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
};
6863 static const MCPhysReg GPR_64
[] = {// 64-bit registers.
6864 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
6865 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
};
6867 static const MCPhysReg VR
[] = {// Vector registers.
6868 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
,
6869 PPC::V6
, PPC::V7
, PPC::V8
, PPC::V9
,
6870 PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
};
6872 const ArrayRef
<MCPhysReg
> GPRs
= IsPPC64
? GPR_64
: GPR_32
;
6874 if (ArgFlags
.isByVal()) {
6875 const Align
ByValAlign(ArgFlags
.getNonZeroByValAlign());
6876 if (ByValAlign
> StackAlign
)
6877 report_fatal_error("Pass-by-value arguments with alignment greater than "
6878 "16 are not supported.");
6880 const unsigned ByValSize
= ArgFlags
.getByValSize();
6881 const Align ObjAlign
= ByValAlign
> PtrAlign
? ByValAlign
: PtrAlign
;
6883 // An empty aggregate parameter takes up no storage and no registers,
6884 // but needs a MemLoc for a stack slot for the formal arguments side.
6885 if (ByValSize
== 0) {
6886 State
.addLoc(CCValAssign::getMem(ValNo
, MVT::INVALID_SIMPLE_VALUE_TYPE
,
6887 State
.getStackSize(), RegVT
, LocInfo
));
6891 // Shadow allocate any registers that are not properly aligned.
6892 unsigned NextReg
= State
.getFirstUnallocated(GPRs
);
6893 while (NextReg
!= GPRs
.size() &&
6894 !isGPRShadowAligned(GPRs
[NextReg
], ObjAlign
)) {
6895 // Shadow allocate next registers since its aligment is not strict enough.
6896 unsigned Reg
= State
.AllocateReg(GPRs
);
6897 // Allocate the stack space shadowed by said register.
6898 State
.AllocateStack(PtrSize
, PtrAlign
);
6899 assert(Reg
&& "Alocating register unexpectedly failed.");
6901 NextReg
= State
.getFirstUnallocated(GPRs
);
6904 const unsigned StackSize
= alignTo(ByValSize
, ObjAlign
);
6905 unsigned Offset
= State
.AllocateStack(StackSize
, ObjAlign
);
6906 for (const unsigned E
= Offset
+ StackSize
; Offset
< E
; Offset
+= PtrSize
) {
6907 if (unsigned Reg
= State
.AllocateReg(GPRs
))
6908 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, RegVT
, LocInfo
));
6910 State
.addLoc(CCValAssign::getMem(ValNo
, MVT::INVALID_SIMPLE_VALUE_TYPE
,
6911 Offset
, MVT::INVALID_SIMPLE_VALUE_TYPE
,
6919 // Arguments always reserve parameter save area.
6920 switch (ValVT
.SimpleTy
) {
6922 report_fatal_error("Unhandled value type for argument.");
6924 // i64 arguments should have been split to i32 for PPC32.
6925 assert(IsPPC64
&& "PPC32 should have split i64 values.");
6929 const unsigned Offset
= State
.AllocateStack(PtrSize
, PtrAlign
);
6930 // AIX integer arguments are always passed in register width.
6931 if (ValVT
.getFixedSizeInBits() < RegVT
.getFixedSizeInBits())
6932 LocInfo
= ArgFlags
.isSExt() ? CCValAssign::LocInfo::SExt
6933 : CCValAssign::LocInfo::ZExt
;
6934 if (unsigned Reg
= State
.AllocateReg(GPRs
))
6935 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, RegVT
, LocInfo
));
6937 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset
, RegVT
, LocInfo
));
6943 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6944 const unsigned StoreSize
= LocVT
.getStoreSize();
6945 // Floats are always 4-byte aligned in the PSA on AIX.
6946 // This includes f64 in 64-bit mode for ABI compatibility.
6947 const unsigned Offset
=
6948 State
.AllocateStack(IsPPC64
? 8 : StoreSize
, Align(4));
6949 unsigned FReg
= State
.AllocateReg(FPR
);
6951 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, FReg
, LocVT
, LocInfo
));
6953 // Reserve and initialize GPRs or initialize the PSA as required.
6954 for (unsigned I
= 0; I
< StoreSize
; I
+= PtrSize
) {
6955 if (unsigned Reg
= State
.AllocateReg(GPRs
)) {
6956 assert(FReg
&& "An FPR should be available when a GPR is reserved.");
6957 if (State
.isVarArg()) {
6958 // Successfully reserved GPRs are only initialized for vararg calls.
6959 // Custom handling is required for:
6960 // f64 in PPC32 needs to be split into 2 GPRs.
6961 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6963 CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, RegVT
, LocInfo
));
6966 // If there are insufficient GPRs, the PSA needs to be initialized.
6967 // Initialization occurs even if an FPR was initialized for
6968 // compatibility with the AIX XL compiler. The full memory for the
6969 // argument will be initialized even if a prior word is saved in GPR.
6970 // A custom memLoc is used when the argument also passes in FPR so
6971 // that the callee handling can skip over it easily.
6973 FReg
? CCValAssign::getCustomMem(ValNo
, ValVT
, Offset
, LocVT
,
6975 : CCValAssign::getMem(ValNo
, ValVT
, Offset
, LocVT
, LocInfo
));
6989 const unsigned VecSize
= 16;
6990 const Align
VecAlign(VecSize
);
6992 if (!State
.isVarArg()) {
6993 // If there are vector registers remaining we don't consume any stack
6995 if (unsigned VReg
= State
.AllocateReg(VR
)) {
6996 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, VReg
, LocVT
, LocInfo
));
6999 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7000 // might be allocated in the portion of the PSA that is shadowed by the
7002 const unsigned Offset
= State
.AllocateStack(VecSize
, VecAlign
);
7003 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset
, LocVT
, LocInfo
));
7007 unsigned NextRegIndex
= State
.getFirstUnallocated(GPRs
);
7008 // Burn any underaligned registers and their shadowed stack space until
7009 // we reach the required alignment.
7010 while (NextRegIndex
!= GPRs
.size() &&
7011 !isGPRShadowAligned(GPRs
[NextRegIndex
], VecAlign
)) {
7012 // Shadow allocate register and its stack shadow.
7013 unsigned Reg
= State
.AllocateReg(GPRs
);
7014 State
.AllocateStack(PtrSize
, PtrAlign
);
7015 assert(Reg
&& "Allocating register unexpectedly failed.");
7017 NextRegIndex
= State
.getFirstUnallocated(GPRs
);
7020 // Vectors that are passed as fixed arguments are handled differently.
7021 // They are passed in VRs if any are available (unlike arguments passed
7022 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7024 if (State
.isFixed(ValNo
)) {
7025 if (unsigned VReg
= State
.AllocateReg(VR
)) {
7026 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, VReg
, LocVT
, LocInfo
));
7027 // Shadow allocate GPRs and stack space even though we pass in a VR.
7028 for (unsigned I
= 0; I
!= VecSize
; I
+= PtrSize
)
7029 State
.AllocateReg(GPRs
);
7030 State
.AllocateStack(VecSize
, VecAlign
);
7033 // No vector registers remain so pass on the stack.
7034 const unsigned Offset
= State
.AllocateStack(VecSize
, VecAlign
);
7035 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset
, LocVT
, LocInfo
));
7039 // If all GPRS are consumed then we pass the argument fully on the stack.
7040 if (NextRegIndex
== GPRs
.size()) {
7041 const unsigned Offset
= State
.AllocateStack(VecSize
, VecAlign
);
7042 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset
, LocVT
, LocInfo
));
7046 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7047 // half of the argument, and then need to pass the remaining half on the
7049 if (GPRs
[NextRegIndex
] == PPC::R9
) {
7050 const unsigned Offset
= State
.AllocateStack(VecSize
, VecAlign
);
7052 CCValAssign::getCustomMem(ValNo
, ValVT
, Offset
, LocVT
, LocInfo
));
7054 const unsigned FirstReg
= State
.AllocateReg(PPC::R9
);
7055 const unsigned SecondReg
= State
.AllocateReg(PPC::R10
);
7056 assert(FirstReg
&& SecondReg
&&
7057 "Allocating R9 or R10 unexpectedly failed.");
7059 CCValAssign::getCustomReg(ValNo
, ValVT
, FirstReg
, RegVT
, LocInfo
));
7061 CCValAssign::getCustomReg(ValNo
, ValVT
, SecondReg
, RegVT
, LocInfo
));
7065 // We have enough GPRs to fully pass the vector argument, and we have
7066 // already consumed any underaligned registers. Start with the custom
7067 // MemLoc and then the custom RegLocs.
7068 const unsigned Offset
= State
.AllocateStack(VecSize
, VecAlign
);
7070 CCValAssign::getCustomMem(ValNo
, ValVT
, Offset
, LocVT
, LocInfo
));
7071 for (unsigned I
= 0; I
!= VecSize
; I
+= PtrSize
) {
7072 const unsigned Reg
= State
.AllocateReg(GPRs
);
7073 assert(Reg
&& "Failed to allocated register for vararg vector argument");
7075 CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, RegVT
, LocInfo
));
7083 // So far, this function is only used by LowerFormalArguments_AIX()
7084 static const TargetRegisterClass
*getRegClassForSVT(MVT::SimpleValueType SVT
,
7088 assert((IsPPC64
|| SVT
!= MVT::i64
) &&
7089 "i64 should have been split for 32-bit codegen.");
7093 report_fatal_error("Unexpected value type for formal argument");
7097 return IsPPC64
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
7099 return HasP8Vector
? &PPC::VSSRCRegClass
: &PPC::F4RCRegClass
;
7101 return HasVSX
? &PPC::VSFRCRegClass
: &PPC::F8RCRegClass
;
7109 return &PPC::VRRCRegClass
;
7113 static SDValue
truncateScalarIntegerArg(ISD::ArgFlagsTy Flags
, EVT ValVT
,
7114 SelectionDAG
&DAG
, SDValue ArgValue
,
7115 MVT LocVT
, const SDLoc
&dl
) {
7116 assert(ValVT
.isScalarInteger() && LocVT
.isScalarInteger());
7117 assert(ValVT
.getFixedSizeInBits() < LocVT
.getFixedSizeInBits());
7120 ArgValue
= DAG
.getNode(ISD::AssertSext
, dl
, LocVT
, ArgValue
,
7121 DAG
.getValueType(ValVT
));
7122 else if (Flags
.isZExt())
7123 ArgValue
= DAG
.getNode(ISD::AssertZext
, dl
, LocVT
, ArgValue
,
7124 DAG
.getValueType(ValVT
));
7126 return DAG
.getNode(ISD::TRUNCATE
, dl
, ValVT
, ArgValue
);
7129 static unsigned mapArgRegToOffsetAIX(unsigned Reg
, const PPCFrameLowering
*FL
) {
7130 const unsigned LASize
= FL
->getLinkageSize();
7132 if (PPC::GPRCRegClass
.contains(Reg
)) {
7133 assert(Reg
>= PPC::R3
&& Reg
<= PPC::R10
&&
7134 "Reg must be a valid argument register!");
7135 return LASize
+ 4 * (Reg
- PPC::R3
);
7138 if (PPC::G8RCRegClass
.contains(Reg
)) {
7139 assert(Reg
>= PPC::X3
&& Reg
<= PPC::X10
&&
7140 "Reg must be a valid argument register!");
7141 return LASize
+ 8 * (Reg
- PPC::X3
);
7144 llvm_unreachable("Only general purpose registers expected.");
7147 // AIX ABI Stack Frame Layout:
7149 // Low Memory +--------------------------------------------+
7150 // SP +---> | Back chain | ---+
7151 // | +--------------------------------------------+ |
7152 // | | Saved Condition Register | |
7153 // | +--------------------------------------------+ |
7154 // | | Saved Linkage Register | |
7155 // | +--------------------------------------------+ | Linkage Area
7156 // | | Reserved for compilers | |
7157 // | +--------------------------------------------+ |
7158 // | | Reserved for binders | |
7159 // | +--------------------------------------------+ |
7160 // | | Saved TOC pointer | ---+
7161 // | +--------------------------------------------+
7162 // | | Parameter save area |
7163 // | +--------------------------------------------+
7164 // | | Alloca space |
7165 // | +--------------------------------------------+
7166 // | | Local variable space |
7167 // | +--------------------------------------------+
7168 // | | Float/int conversion temporary |
7169 // | +--------------------------------------------+
7170 // | | Save area for AltiVec registers |
7171 // | +--------------------------------------------+
7172 // | | AltiVec alignment padding |
7173 // | +--------------------------------------------+
7174 // | | Save area for VRSAVE register |
7175 // | +--------------------------------------------+
7176 // | | Save area for General Purpose registers |
7177 // | +--------------------------------------------+
7178 // | | Save area for Floating Point registers |
7179 // | +--------------------------------------------+
7180 // +---- | Back chain |
7181 // High Memory +--------------------------------------------+
7184 // AIX 7.2 Assembler Language Reference
7185 // Subroutine linkage convention
7187 SDValue
PPCTargetLowering::LowerFormalArguments_AIX(
7188 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
7189 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
7190 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
7192 assert((CallConv
== CallingConv::C
|| CallConv
== CallingConv::Cold
||
7193 CallConv
== CallingConv::Fast
) &&
7194 "Unexpected calling convention!");
7196 if (getTargetMachine().Options
.GuaranteedTailCallOpt
)
7197 report_fatal_error("Tail call support is unimplemented on AIX.");
7200 report_fatal_error("Soft float support is unimplemented on AIX.");
7202 const PPCSubtarget
&Subtarget
= DAG
.getSubtarget
<PPCSubtarget
>();
7204 const bool IsPPC64
= Subtarget
.isPPC64();
7205 const unsigned PtrByteSize
= IsPPC64
? 8 : 4;
7207 // Assign locations to all of the incoming arguments.
7208 SmallVector
<CCValAssign
, 16> ArgLocs
;
7209 MachineFunction
&MF
= DAG
.getMachineFunction();
7210 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7211 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
7212 AIXCCState
CCInfo(CallConv
, isVarArg
, MF
, ArgLocs
, *DAG
.getContext());
7214 const EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7215 // Reserve space for the linkage area on the stack.
7216 const unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
7217 CCInfo
.AllocateStack(LinkageSize
, Align(PtrByteSize
));
7218 CCInfo
.AnalyzeFormalArguments(Ins
, CC_AIX
);
7220 SmallVector
<SDValue
, 8> MemOps
;
7222 for (size_t I
= 0, End
= ArgLocs
.size(); I
!= End
; /* No increment here */) {
7223 CCValAssign
&VA
= ArgLocs
[I
++];
7224 MVT LocVT
= VA
.getLocVT();
7225 MVT ValVT
= VA
.getValVT();
7226 ISD::ArgFlagsTy Flags
= Ins
[VA
.getValNo()].Flags
;
7227 // For compatibility with the AIX XL compiler, the float args in the
7228 // parameter save area are initialized even if the argument is available
7229 // in register. The caller is required to initialize both the register
7230 // and memory, however, the callee can choose to expect it in either.
7231 // The memloc is dismissed here because the argument is retrieved from
7233 if (VA
.isMemLoc() && VA
.needsCustom() && ValVT
.isFloatingPoint())
7236 auto HandleMemLoc
= [&]() {
7237 const unsigned LocSize
= LocVT
.getStoreSize();
7238 const unsigned ValSize
= ValVT
.getStoreSize();
7239 assert((ValSize
<= LocSize
) &&
7240 "Object size is larger than size of MemLoc");
7241 int CurArgOffset
= VA
.getLocMemOffset();
7242 // Objects are right-justified because AIX is big-endian.
7243 if (LocSize
> ValSize
)
7244 CurArgOffset
+= LocSize
- ValSize
;
7245 // Potential tail calls could cause overwriting of argument stack slots.
7246 const bool IsImmutable
=
7247 !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
7248 (CallConv
== CallingConv::Fast
));
7249 int FI
= MFI
.CreateFixedObject(ValSize
, CurArgOffset
, IsImmutable
);
7250 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
7252 DAG
.getLoad(ValVT
, dl
, Chain
, FIN
, MachinePointerInfo());
7253 InVals
.push_back(ArgValue
);
7256 // Vector arguments to VaArg functions are passed both on the stack, and
7257 // in any available GPRs. Load the value from the stack and add the GPRs
7259 if (VA
.isMemLoc() && VA
.needsCustom()) {
7260 assert(ValVT
.isVector() && "Unexpected Custom MemLoc type.");
7261 assert(isVarArg
&& "Only use custom memloc for vararg.");
7262 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7263 // matching custom RegLocs.
7264 const unsigned OriginalValNo
= VA
.getValNo();
7265 (void)OriginalValNo
;
7267 auto HandleCustomVecRegLoc
= [&]() {
7268 assert(I
!= End
&& ArgLocs
[I
].isRegLoc() && ArgLocs
[I
].needsCustom() &&
7269 "Missing custom RegLoc.");
7271 assert(VA
.getValVT().isVector() &&
7272 "Unexpected Val type for custom RegLoc.");
7273 assert(VA
.getValNo() == OriginalValNo
&&
7274 "ValNo mismatch between custom MemLoc and RegLoc.");
7275 MVT::SimpleValueType SVT
= VA
.getLocVT().SimpleTy
;
7276 MF
.addLiveIn(VA
.getLocReg(),
7277 getRegClassForSVT(SVT
, IsPPC64
, Subtarget
.hasP8Vector(),
7278 Subtarget
.hasVSX()));
7282 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7283 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7285 HandleCustomVecRegLoc();
7286 HandleCustomVecRegLoc();
7288 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7289 // we passed the vector in R5, R6, R7 and R8.
7290 if (I
!= End
&& ArgLocs
[I
].isRegLoc() && ArgLocs
[I
].needsCustom()) {
7292 "Only 2 custom RegLocs expected for 64-bit codegen.");
7293 HandleCustomVecRegLoc();
7294 HandleCustomVecRegLoc();
7300 if (VA
.isRegLoc()) {
7301 if (VA
.getValVT().isScalarInteger())
7302 FuncInfo
->appendParameterType(PPCFunctionInfo::FixedType
);
7303 else if (VA
.getValVT().isFloatingPoint() && !VA
.getValVT().isVector()) {
7304 switch (VA
.getValVT().SimpleTy
) {
7306 report_fatal_error("Unhandled value type for argument.");
7308 FuncInfo
->appendParameterType(PPCFunctionInfo::ShortFloatingPoint
);
7311 FuncInfo
->appendParameterType(PPCFunctionInfo::LongFloatingPoint
);
7314 } else if (VA
.getValVT().isVector()) {
7315 switch (VA
.getValVT().SimpleTy
) {
7317 report_fatal_error("Unhandled value type for argument.");
7319 FuncInfo
->appendParameterType(PPCFunctionInfo::VectorChar
);
7322 FuncInfo
->appendParameterType(PPCFunctionInfo::VectorShort
);
7327 FuncInfo
->appendParameterType(PPCFunctionInfo::VectorInt
);
7331 FuncInfo
->appendParameterType(PPCFunctionInfo::VectorFloat
);
7337 if (Flags
.isByVal() && VA
.isMemLoc()) {
7338 const unsigned Size
=
7339 alignTo(Flags
.getByValSize() ? Flags
.getByValSize() : PtrByteSize
,
7341 const int FI
= MF
.getFrameInfo().CreateFixedObject(
7342 Size
, VA
.getLocMemOffset(), /* IsImmutable */ false,
7343 /* IsAliased */ true);
7344 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
7345 InVals
.push_back(FIN
);
7350 if (Flags
.isByVal()) {
7351 assert(VA
.isRegLoc() && "MemLocs should already be handled.");
7353 const MCPhysReg ArgReg
= VA
.getLocReg();
7354 const PPCFrameLowering
*FL
= Subtarget
.getFrameLowering();
7356 const unsigned StackSize
= alignTo(Flags
.getByValSize(), PtrByteSize
);
7357 const int FI
= MF
.getFrameInfo().CreateFixedObject(
7358 StackSize
, mapArgRegToOffsetAIX(ArgReg
, FL
), /* IsImmutable */ false,
7359 /* IsAliased */ true);
7360 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
7361 InVals
.push_back(FIN
);
7363 // Add live ins for all the RegLocs for the same ByVal.
7364 const TargetRegisterClass
*RegClass
=
7365 IsPPC64
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
7367 auto HandleRegLoc
= [&, RegClass
, LocVT
](const MCPhysReg PhysReg
,
7369 const Register VReg
= MF
.addLiveIn(PhysReg
, RegClass
);
7370 // Since the callers side has left justified the aggregate in the
7371 // register, we can simply store the entire register into the stack
7373 SDValue CopyFrom
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, LocVT
);
7374 // The store to the fixedstack object is needed becuase accessing a
7375 // field of the ByVal will use a gep and load. Ideally we will optimize
7376 // to extracting the value from the register directly, and elide the
7377 // stores when the arguments address is not taken, but that will need to
7379 SDValue Store
= DAG
.getStore(
7380 CopyFrom
.getValue(1), dl
, CopyFrom
,
7381 DAG
.getObjectPtrOffset(dl
, FIN
, TypeSize::getFixed(Offset
)),
7382 MachinePointerInfo::getFixedStack(MF
, FI
, Offset
));
7384 MemOps
.push_back(Store
);
7387 unsigned Offset
= 0;
7388 HandleRegLoc(VA
.getLocReg(), Offset
);
7389 Offset
+= PtrByteSize
;
7390 for (; Offset
!= StackSize
&& ArgLocs
[I
].isRegLoc();
7391 Offset
+= PtrByteSize
) {
7392 assert(ArgLocs
[I
].getValNo() == VA
.getValNo() &&
7393 "RegLocs should be for ByVal argument.");
7395 const CCValAssign RL
= ArgLocs
[I
++];
7396 HandleRegLoc(RL
.getLocReg(), Offset
);
7397 FuncInfo
->appendParameterType(PPCFunctionInfo::FixedType
);
7400 if (Offset
!= StackSize
) {
7401 assert(ArgLocs
[I
].getValNo() == VA
.getValNo() &&
7402 "Expected MemLoc for remaining bytes.");
7403 assert(ArgLocs
[I
].isMemLoc() && "Expected MemLoc for remaining bytes.");
7404 // Consume the MemLoc.The InVal has already been emitted, so nothing
7405 // more needs to be done.
7412 if (VA
.isRegLoc() && !VA
.needsCustom()) {
7413 MVT::SimpleValueType SVT
= ValVT
.SimpleTy
;
7415 MF
.addLiveIn(VA
.getLocReg(),
7416 getRegClassForSVT(SVT
, IsPPC64
, Subtarget
.hasP8Vector(),
7417 Subtarget
.hasVSX()));
7418 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, LocVT
);
7419 if (ValVT
.isScalarInteger() &&
7420 (ValVT
.getFixedSizeInBits() < LocVT
.getFixedSizeInBits())) {
7422 truncateScalarIntegerArg(Flags
, ValVT
, DAG
, ArgValue
, LocVT
, dl
);
7424 InVals
.push_back(ArgValue
);
7427 if (VA
.isMemLoc()) {
7433 // On AIX a minimum of 8 words is saved to the parameter save area.
7434 const unsigned MinParameterSaveArea
= 8 * PtrByteSize
;
7435 // Area that is at least reserved in the caller of this function.
7436 unsigned CallerReservedArea
= std::max
<unsigned>(
7437 CCInfo
.getStackSize(), LinkageSize
+ MinParameterSaveArea
);
7439 // Set the size that is at least reserved in caller of this function. Tail
7440 // call optimized function's reserved stack space needs to be aligned so
7441 // that taking the difference between two stack areas will result in an
7443 CallerReservedArea
=
7444 EnsureStackAlignment(Subtarget
.getFrameLowering(), CallerReservedArea
);
7445 FuncInfo
->setMinReservedArea(CallerReservedArea
);
7448 FuncInfo
->setVarArgsFrameIndex(
7449 MFI
.CreateFixedObject(PtrByteSize
, CCInfo
.getStackSize(), true));
7450 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
7452 static const MCPhysReg GPR_32
[] = {PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
7453 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
};
7455 static const MCPhysReg GPR_64
[] = {PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
7456 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
};
7457 const unsigned NumGPArgRegs
= std::size(IsPPC64
? GPR_64
: GPR_32
);
7459 // The fixed integer arguments of a variadic function are stored to the
7460 // VarArgsFrameIndex on the stack so that they may be loaded by
7461 // dereferencing the result of va_next.
7462 for (unsigned GPRIndex
=
7463 (CCInfo
.getStackSize() - LinkageSize
) / PtrByteSize
;
7464 GPRIndex
< NumGPArgRegs
; ++GPRIndex
) {
7466 const Register VReg
=
7467 IsPPC64
? MF
.addLiveIn(GPR_64
[GPRIndex
], &PPC::G8RCRegClass
)
7468 : MF
.addLiveIn(GPR_32
[GPRIndex
], &PPC::GPRCRegClass
);
7470 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
7472 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
7473 MemOps
.push_back(Store
);
7474 // Increment the address for the next argument to store.
7475 SDValue PtrOff
= DAG
.getConstant(PtrByteSize
, dl
, PtrVT
);
7476 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
7480 if (!MemOps
.empty())
7481 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
7486 SDValue
PPCTargetLowering::LowerCall_AIX(
7487 SDValue Chain
, SDValue Callee
, CallFlags CFlags
,
7488 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
7489 const SmallVectorImpl
<SDValue
> &OutVals
,
7490 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
7491 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
7492 const CallBase
*CB
) const {
7493 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7494 // AIX ABI stack frame layout.
7496 assert((CFlags
.CallConv
== CallingConv::C
||
7497 CFlags
.CallConv
== CallingConv::Cold
||
7498 CFlags
.CallConv
== CallingConv::Fast
) &&
7499 "Unexpected calling convention!");
7501 if (CFlags
.IsPatchPoint
)
7502 report_fatal_error("This call type is unimplemented on AIX.");
7504 const PPCSubtarget
&Subtarget
= DAG
.getSubtarget
<PPCSubtarget
>();
7506 MachineFunction
&MF
= DAG
.getMachineFunction();
7507 SmallVector
<CCValAssign
, 16> ArgLocs
;
7508 AIXCCState
CCInfo(CFlags
.CallConv
, CFlags
.IsVarArg
, MF
, ArgLocs
,
7511 // Reserve space for the linkage save area (LSA) on the stack.
7512 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7513 // [SP][CR][LR][2 x reserved][TOC].
7514 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7515 const unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
7516 const bool IsPPC64
= Subtarget
.isPPC64();
7517 const EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7518 const unsigned PtrByteSize
= IsPPC64
? 8 : 4;
7519 CCInfo
.AllocateStack(LinkageSize
, Align(PtrByteSize
));
7520 CCInfo
.AnalyzeCallOperands(Outs
, CC_AIX
);
7522 // The prolog code of the callee may store up to 8 GPR argument registers to
7523 // the stack, allowing va_start to index over them in memory if the callee
7525 // Because we cannot tell if this is needed on the caller side, we have to
7526 // conservatively assume that it is needed. As such, make sure we have at
7527 // least enough stack space for the caller to store the 8 GPRs.
7528 const unsigned MinParameterSaveAreaSize
= 8 * PtrByteSize
;
7529 const unsigned NumBytes
= std::max
<unsigned>(
7530 LinkageSize
+ MinParameterSaveAreaSize
, CCInfo
.getStackSize());
7532 // Adjust the stack pointer for the new arguments...
7533 // These operations are automatically eliminated by the prolog/epilog pass.
7534 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
7535 SDValue CallSeqStart
= Chain
;
7537 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
7538 SmallVector
<SDValue
, 8> MemOpChains
;
7540 // Set up a copy of the stack pointer for loading and storing any
7541 // arguments that may not fit in the registers available for argument
7543 const SDValue StackPtr
= IsPPC64
? DAG
.getRegister(PPC::X1
, MVT::i64
)
7544 : DAG
.getRegister(PPC::R1
, MVT::i32
);
7546 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
;) {
7547 const unsigned ValNo
= ArgLocs
[I
].getValNo();
7548 SDValue Arg
= OutVals
[ValNo
];
7549 ISD::ArgFlagsTy Flags
= Outs
[ValNo
].Flags
;
7551 if (Flags
.isByVal()) {
7552 const unsigned ByValSize
= Flags
.getByValSize();
7554 // Nothing to do for zero-sized ByVals on the caller side.
7560 auto GetLoad
= [&](EVT VT
, unsigned LoadOffset
) {
7561 return DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, PtrVT
, Chain
,
7563 ? DAG
.getObjectPtrOffset(
7564 dl
, Arg
, TypeSize::getFixed(LoadOffset
))
7566 MachinePointerInfo(), VT
);
7569 unsigned LoadOffset
= 0;
7571 // Initialize registers, which are fully occupied by the by-val argument.
7572 while (LoadOffset
+ PtrByteSize
<= ByValSize
&& ArgLocs
[I
].isRegLoc()) {
7573 SDValue Load
= GetLoad(PtrVT
, LoadOffset
);
7574 MemOpChains
.push_back(Load
.getValue(1));
7575 LoadOffset
+= PtrByteSize
;
7576 const CCValAssign
&ByValVA
= ArgLocs
[I
++];
7577 assert(ByValVA
.getValNo() == ValNo
&&
7578 "Unexpected location for pass-by-value argument.");
7579 RegsToPass
.push_back(std::make_pair(ByValVA
.getLocReg(), Load
));
7582 if (LoadOffset
== ByValSize
)
7585 // There must be one more loc to handle the remainder.
7586 assert(ArgLocs
[I
].getValNo() == ValNo
&&
7587 "Expected additional location for by-value argument.");
7589 if (ArgLocs
[I
].isMemLoc()) {
7590 assert(LoadOffset
< ByValSize
&& "Unexpected memloc for by-val arg.");
7591 const CCValAssign
&ByValVA
= ArgLocs
[I
++];
7592 ISD::ArgFlagsTy MemcpyFlags
= Flags
;
7593 // Only memcpy the bytes that don't pass in register.
7594 MemcpyFlags
.setByValSize(ByValSize
- LoadOffset
);
7595 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(
7596 (LoadOffset
!= 0) ? DAG
.getObjectPtrOffset(
7597 dl
, Arg
, TypeSize::getFixed(LoadOffset
))
7599 DAG
.getObjectPtrOffset(
7600 dl
, StackPtr
, TypeSize::getFixed(ByValVA
.getLocMemOffset())),
7601 CallSeqStart
, MemcpyFlags
, DAG
, dl
);
7605 // Initialize the final register residue.
7606 // Any residue that occupies the final by-val arg register must be
7607 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7608 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7609 // 2 and 1 byte loads.
7610 const unsigned ResidueBytes
= ByValSize
% PtrByteSize
;
7611 assert(ResidueBytes
!= 0 && LoadOffset
+ PtrByteSize
> ByValSize
&&
7612 "Unexpected register residue for by-value argument.");
7614 for (unsigned Bytes
= 0; Bytes
!= ResidueBytes
;) {
7615 const unsigned N
= llvm::bit_floor(ResidueBytes
- Bytes
);
7618 : ((N
== 2) ? MVT::i16
: (N
== 4 ? MVT::i32
: MVT::i64
));
7619 SDValue Load
= GetLoad(VT
, LoadOffset
);
7620 MemOpChains
.push_back(Load
.getValue(1));
7624 // By-val arguments are passed left-justfied in register.
7625 // Every load here needs to be shifted, otherwise a full register load
7626 // should have been used.
7627 assert(PtrVT
.getSimpleVT().getSizeInBits() > (Bytes
* 8) &&
7628 "Unexpected load emitted during handling of pass-by-value "
7630 unsigned NumSHLBits
= PtrVT
.getSimpleVT().getSizeInBits() - (Bytes
* 8);
7632 getShiftAmountTy(Load
->getValueType(0), DAG
.getDataLayout());
7633 SDValue SHLAmt
= DAG
.getConstant(NumSHLBits
, dl
, ShiftAmountTy
);
7634 SDValue ShiftedLoad
=
7635 DAG
.getNode(ISD::SHL
, dl
, Load
.getValueType(), Load
, SHLAmt
);
7636 ResidueVal
= ResidueVal
? DAG
.getNode(ISD::OR
, dl
, PtrVT
, ResidueVal
,
7641 const CCValAssign
&ByValVA
= ArgLocs
[I
++];
7642 RegsToPass
.push_back(std::make_pair(ByValVA
.getLocReg(), ResidueVal
));
7646 CCValAssign
&VA
= ArgLocs
[I
++];
7647 const MVT LocVT
= VA
.getLocVT();
7648 const MVT ValVT
= VA
.getValVT();
7650 switch (VA
.getLocInfo()) {
7652 report_fatal_error("Unexpected argument extension type.");
7653 case CCValAssign::Full
:
7655 case CCValAssign::ZExt
:
7656 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
7658 case CCValAssign::SExt
:
7659 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
7663 if (VA
.isRegLoc() && !VA
.needsCustom()) {
7664 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
7668 // Vector arguments passed to VarArg functions need custom handling when
7669 // they are passed (at least partially) in GPRs.
7670 if (VA
.isMemLoc() && VA
.needsCustom() && ValVT
.isVector()) {
7671 assert(CFlags
.IsVarArg
&& "Custom MemLocs only used for Vector args.");
7672 // Store value to its stack slot.
7674 DAG
.getConstant(VA
.getLocMemOffset(), dl
, StackPtr
.getValueType());
7675 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
7677 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
7678 MemOpChains
.push_back(Store
);
7679 const unsigned OriginalValNo
= VA
.getValNo();
7680 // Then load the GPRs from the stack
7681 unsigned LoadOffset
= 0;
7682 auto HandleCustomVecRegLoc
= [&]() {
7683 assert(I
!= E
&& "Unexpected end of CCvalAssigns.");
7684 assert(ArgLocs
[I
].isRegLoc() && ArgLocs
[I
].needsCustom() &&
7685 "Expected custom RegLoc.");
7686 CCValAssign RegVA
= ArgLocs
[I
++];
7687 assert(RegVA
.getValNo() == OriginalValNo
&&
7688 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7689 SDValue Add
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
7690 DAG
.getConstant(LoadOffset
, dl
, PtrVT
));
7691 SDValue Load
= DAG
.getLoad(PtrVT
, dl
, Store
, Add
, MachinePointerInfo());
7692 MemOpChains
.push_back(Load
.getValue(1));
7693 RegsToPass
.push_back(std::make_pair(RegVA
.getLocReg(), Load
));
7694 LoadOffset
+= PtrByteSize
;
7697 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7698 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7700 HandleCustomVecRegLoc();
7701 HandleCustomVecRegLoc();
7703 if (I
!= E
&& ArgLocs
[I
].isRegLoc() && ArgLocs
[I
].needsCustom() &&
7704 ArgLocs
[I
].getValNo() == OriginalValNo
) {
7706 "Only 2 custom RegLocs expected for 64-bit codegen.");
7707 HandleCustomVecRegLoc();
7708 HandleCustomVecRegLoc();
7714 if (VA
.isMemLoc()) {
7716 DAG
.getConstant(VA
.getLocMemOffset(), dl
, StackPtr
.getValueType());
7717 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
7718 MemOpChains
.push_back(
7719 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo()));
7724 if (!ValVT
.isFloatingPoint())
7726 "Unexpected register handling for calling convention.");
7728 // Custom handling is used for GPR initializations for vararg float
7730 assert(VA
.isRegLoc() && VA
.needsCustom() && CFlags
.IsVarArg
&&
7731 LocVT
.isInteger() &&
7732 "Custom register handling only expected for VarArg.");
7735 DAG
.getBitcast(MVT::getIntegerVT(ValVT
.getSizeInBits()), Arg
);
7737 if (Arg
.getValueType().getStoreSize() == LocVT
.getStoreSize())
7738 // f32 in 32-bit GPR
7739 // f64 in 64-bit GPR
7740 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), ArgAsInt
));
7741 else if (Arg
.getValueType().getFixedSizeInBits() <
7742 LocVT
.getFixedSizeInBits())
7743 // f32 in 64-bit GPR.
7744 RegsToPass
.push_back(std::make_pair(
7745 VA
.getLocReg(), DAG
.getZExtOrTrunc(ArgAsInt
, dl
, LocVT
)));
7747 // f64 in two 32-bit GPRs
7748 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7749 assert(Arg
.getValueType() == MVT::f64
&& CFlags
.IsVarArg
&& !IsPPC64
&&
7750 "Unexpected custom register for argument!");
7751 CCValAssign
&GPR1
= VA
;
7752 SDValue MSWAsI64
= DAG
.getNode(ISD::SRL
, dl
, MVT::i64
, ArgAsInt
,
7753 DAG
.getConstant(32, dl
, MVT::i8
));
7754 RegsToPass
.push_back(std::make_pair(
7755 GPR1
.getLocReg(), DAG
.getZExtOrTrunc(MSWAsI64
, dl
, MVT::i32
)));
7758 // If only 1 GPR was available, there will only be one custom GPR and
7759 // the argument will also pass in memory.
7760 CCValAssign
&PeekArg
= ArgLocs
[I
];
7761 if (PeekArg
.isRegLoc() && PeekArg
.getValNo() == PeekArg
.getValNo()) {
7762 assert(PeekArg
.needsCustom() && "A second custom GPR is expected.");
7763 CCValAssign
&GPR2
= ArgLocs
[I
++];
7764 RegsToPass
.push_back(std::make_pair(
7765 GPR2
.getLocReg(), DAG
.getZExtOrTrunc(ArgAsInt
, dl
, MVT::i32
)));
7771 if (!MemOpChains
.empty())
7772 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
7774 // For indirect calls, we need to save the TOC base to the stack for
7775 // restoration after the call.
7776 if (CFlags
.IsIndirect
) {
7777 assert(!CFlags
.IsTailCall
&& "Indirect tail-calls not supported.");
7778 const MCRegister TOCBaseReg
= Subtarget
.getTOCPointerRegister();
7779 const MCRegister StackPtrReg
= Subtarget
.getStackPointerRegister();
7780 const MVT PtrVT
= Subtarget
.isPPC64() ? MVT::i64
: MVT::i32
;
7781 const unsigned TOCSaveOffset
=
7782 Subtarget
.getFrameLowering()->getTOCSaveOffset();
7784 setUsesTOCBasePtr(DAG
);
7785 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, TOCBaseReg
, PtrVT
);
7786 SDValue PtrOff
= DAG
.getIntPtrConstant(TOCSaveOffset
, dl
);
7787 SDValue StackPtr
= DAG
.getRegister(StackPtrReg
, PtrVT
);
7788 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
7789 Chain
= DAG
.getStore(
7790 Val
.getValue(1), dl
, Val
, AddPtr
,
7791 MachinePointerInfo::getStack(DAG
.getMachineFunction(), TOCSaveOffset
));
7794 // Build a sequence of copy-to-reg nodes chained together with token chain
7795 // and flag operands which copy the outgoing args into the appropriate regs.
7797 for (auto Reg
: RegsToPass
) {
7798 Chain
= DAG
.getCopyToReg(Chain
, dl
, Reg
.first
, Reg
.second
, InGlue
);
7799 InGlue
= Chain
.getValue(1);
7802 const int SPDiff
= 0;
7803 return FinishCall(CFlags
, dl
, DAG
, RegsToPass
, InGlue
, Chain
, CallSeqStart
,
7804 Callee
, SPDiff
, NumBytes
, Ins
, InVals
, CB
);
7808 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv
,
7809 MachineFunction
&MF
, bool isVarArg
,
7810 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
7811 LLVMContext
&Context
) const {
7812 SmallVector
<CCValAssign
, 16> RVLocs
;
7813 CCState
CCInfo(CallConv
, isVarArg
, MF
, RVLocs
, Context
);
7814 return CCInfo
.CheckReturn(
7815 Outs
, (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
7821 PPCTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
7823 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
7824 const SmallVectorImpl
<SDValue
> &OutVals
,
7825 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
7826 SmallVector
<CCValAssign
, 16> RVLocs
;
7827 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), RVLocs
,
7829 CCInfo
.AnalyzeReturn(Outs
,
7830 (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
7835 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
7837 // Copy the result values into the output registers.
7838 for (unsigned i
= 0, RealResIdx
= 0; i
!= RVLocs
.size(); ++i
, ++RealResIdx
) {
7839 CCValAssign
&VA
= RVLocs
[i
];
7840 assert(VA
.isRegLoc() && "Can only return in registers!");
7842 SDValue Arg
= OutVals
[RealResIdx
];
7844 switch (VA
.getLocInfo()) {
7845 default: llvm_unreachable("Unknown loc info!");
7846 case CCValAssign::Full
: break;
7847 case CCValAssign::AExt
:
7848 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
7850 case CCValAssign::ZExt
:
7851 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
7853 case CCValAssign::SExt
:
7854 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
7857 if (Subtarget
.hasSPE() && VA
.getLocVT() == MVT::f64
) {
7858 bool isLittleEndian
= Subtarget
.isLittleEndian();
7859 // Legalize ret f64 -> ret 2 x i32.
7861 DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
7862 DAG
.getIntPtrConstant(isLittleEndian
? 0 : 1, dl
));
7863 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), SVal
, Glue
);
7864 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
7865 SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
7866 DAG
.getIntPtrConstant(isLittleEndian
? 1 : 0, dl
));
7867 Glue
= Chain
.getValue(1);
7868 VA
= RVLocs
[++i
]; // skip ahead to next loc
7869 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), SVal
, Glue
);
7871 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), Arg
, Glue
);
7872 Glue
= Chain
.getValue(1);
7873 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
7876 RetOps
[0] = Chain
; // Update chain.
7878 // Add the glue if we have it.
7880 RetOps
.push_back(Glue
);
7882 return DAG
.getNode(PPCISD::RET_GLUE
, dl
, MVT::Other
, RetOps
);
7886 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op
,
7887 SelectionDAG
&DAG
) const {
7890 // Get the correct type for integers.
7891 EVT IntVT
= Op
.getValueType();
7894 SDValue Chain
= Op
.getOperand(0);
7895 SDValue FPSIdx
= getFramePointerFrameIndex(DAG
);
7896 // Build a DYNAREAOFFSET node.
7897 SDValue Ops
[2] = {Chain
, FPSIdx
};
7898 SDVTList VTs
= DAG
.getVTList(IntVT
);
7899 return DAG
.getNode(PPCISD::DYNAREAOFFSET
, dl
, VTs
, Ops
);
7902 SDValue
PPCTargetLowering::LowerSTACKRESTORE(SDValue Op
,
7903 SelectionDAG
&DAG
) const {
7904 // When we pop the dynamic allocation we need to restore the SP link.
7907 // Get the correct type for pointers.
7908 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7910 // Construct the stack pointer operand.
7911 bool isPPC64
= Subtarget
.isPPC64();
7912 unsigned SP
= isPPC64
? PPC::X1
: PPC::R1
;
7913 SDValue StackPtr
= DAG
.getRegister(SP
, PtrVT
);
7915 // Get the operands for the STACKRESTORE.
7916 SDValue Chain
= Op
.getOperand(0);
7917 SDValue SaveSP
= Op
.getOperand(1);
7919 // Load the old link SP.
7920 SDValue LoadLinkSP
=
7921 DAG
.getLoad(PtrVT
, dl
, Chain
, StackPtr
, MachinePointerInfo());
7923 // Restore the stack pointer.
7924 Chain
= DAG
.getCopyToReg(LoadLinkSP
.getValue(1), dl
, SP
, SaveSP
);
7926 // Store the old link SP.
7927 return DAG
.getStore(Chain
, dl
, LoadLinkSP
, StackPtr
, MachinePointerInfo());
7930 SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG
&DAG
) const {
7931 MachineFunction
&MF
= DAG
.getMachineFunction();
7932 bool isPPC64
= Subtarget
.isPPC64();
7933 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7935 // Get current frame pointer save index. The users of this index will be
7936 // primarily DYNALLOC instructions.
7937 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
7938 int RASI
= FI
->getReturnAddrSaveIndex();
7940 // If the frame pointer save index hasn't been defined yet.
7942 // Find out what the fix offset of the frame pointer save area.
7943 int LROffset
= Subtarget
.getFrameLowering()->getReturnSaveOffset();
7944 // Allocate the frame index for frame pointer save area.
7945 RASI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, LROffset
, false);
7947 FI
->setReturnAddrSaveIndex(RASI
);
7949 return DAG
.getFrameIndex(RASI
, PtrVT
);
7953 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG
& DAG
) const {
7954 MachineFunction
&MF
= DAG
.getMachineFunction();
7955 bool isPPC64
= Subtarget
.isPPC64();
7956 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7958 // Get current frame pointer save index. The users of this index will be
7959 // primarily DYNALLOC instructions.
7960 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
7961 int FPSI
= FI
->getFramePointerSaveIndex();
7963 // If the frame pointer save index hasn't been defined yet.
7965 // Find out what the fix offset of the frame pointer save area.
7966 int FPOffset
= Subtarget
.getFrameLowering()->getFramePointerSaveOffset();
7967 // Allocate the frame index for frame pointer save area.
7968 FPSI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, FPOffset
, true);
7970 FI
->setFramePointerSaveIndex(FPSI
);
7972 return DAG
.getFrameIndex(FPSI
, PtrVT
);
7975 SDValue
PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
7976 SelectionDAG
&DAG
) const {
7977 MachineFunction
&MF
= DAG
.getMachineFunction();
7979 SDValue Chain
= Op
.getOperand(0);
7980 SDValue Size
= Op
.getOperand(1);
7983 // Get the correct type for pointers.
7984 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7986 SDValue NegSize
= DAG
.getNode(ISD::SUB
, dl
, PtrVT
,
7987 DAG
.getConstant(0, dl
, PtrVT
), Size
);
7988 // Construct a node for the frame pointer save index.
7989 SDValue FPSIdx
= getFramePointerFrameIndex(DAG
);
7990 SDValue Ops
[3] = { Chain
, NegSize
, FPSIdx
};
7991 SDVTList VTs
= DAG
.getVTList(PtrVT
, MVT::Other
);
7992 if (hasInlineStackProbe(MF
))
7993 return DAG
.getNode(PPCISD::PROBED_ALLOCA
, dl
, VTs
, Ops
);
7994 return DAG
.getNode(PPCISD::DYNALLOC
, dl
, VTs
, Ops
);
7997 SDValue
PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op
,
7998 SelectionDAG
&DAG
) const {
7999 MachineFunction
&MF
= DAG
.getMachineFunction();
8001 bool isPPC64
= Subtarget
.isPPC64();
8002 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
8004 int FI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, 0, false);
8005 return DAG
.getFrameIndex(FI
, PtrVT
);
8008 SDValue
PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op
,
8009 SelectionDAG
&DAG
) const {
8011 return DAG
.getNode(PPCISD::EH_SJLJ_SETJMP
, DL
,
8012 DAG
.getVTList(MVT::i32
, MVT::Other
),
8013 Op
.getOperand(0), Op
.getOperand(1));
8016 SDValue
PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op
,
8017 SelectionDAG
&DAG
) const {
8019 return DAG
.getNode(PPCISD::EH_SJLJ_LONGJMP
, DL
, MVT::Other
,
8020 Op
.getOperand(0), Op
.getOperand(1));
8023 SDValue
PPCTargetLowering::LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
8024 if (Op
.getValueType().isVector())
8025 return LowerVectorLoad(Op
, DAG
);
8027 assert(Op
.getValueType() == MVT::i1
&&
8028 "Custom lowering only for i1 loads");
8030 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8033 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op
);
8035 SDValue Chain
= LD
->getChain();
8036 SDValue BasePtr
= LD
->getBasePtr();
8037 MachineMemOperand
*MMO
= LD
->getMemOperand();
8040 DAG
.getExtLoad(ISD::EXTLOAD
, dl
, getPointerTy(DAG
.getDataLayout()), Chain
,
8041 BasePtr
, MVT::i8
, MMO
);
8042 SDValue Result
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, NewLD
);
8044 SDValue Ops
[] = { Result
, SDValue(NewLD
.getNode(), 1) };
8045 return DAG
.getMergeValues(Ops
, dl
);
8048 SDValue
PPCTargetLowering::LowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
8049 if (Op
.getOperand(1).getValueType().isVector())
8050 return LowerVectorStore(Op
, DAG
);
8052 assert(Op
.getOperand(1).getValueType() == MVT::i1
&&
8053 "Custom lowering only for i1 stores");
8055 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8058 StoreSDNode
*ST
= cast
<StoreSDNode
>(Op
);
8060 SDValue Chain
= ST
->getChain();
8061 SDValue BasePtr
= ST
->getBasePtr();
8062 SDValue Value
= ST
->getValue();
8063 MachineMemOperand
*MMO
= ST
->getMemOperand();
8065 Value
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, getPointerTy(DAG
.getDataLayout()),
8067 return DAG
.getTruncStore(Chain
, dl
, Value
, BasePtr
, MVT::i8
, MMO
);
8070 // FIXME: Remove this once the ANDI glue bug is fixed:
8071 SDValue
PPCTargetLowering::LowerTRUNCATE(SDValue Op
, SelectionDAG
&DAG
) const {
8072 assert(Op
.getValueType() == MVT::i1
&&
8073 "Custom lowering only for i1 results");
8076 return DAG
.getNode(PPCISD::ANDI_rec_1_GT_BIT
, DL
, MVT::i1
, Op
.getOperand(0));
8079 SDValue
PPCTargetLowering::LowerTRUNCATEVector(SDValue Op
,
8080 SelectionDAG
&DAG
) const {
8082 // Implements a vector truncate that fits in a vector register as a shuffle.
8083 // We want to legalize vector truncates down to where the source fits in
8084 // a vector register (and target is therefore smaller than vector register
8085 // size). At that point legalization will try to custom lower the sub-legal
8086 // result and get here - where we can contain the truncate as a single target
8089 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8090 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8092 // We will implement it for big-endian ordering as this (where x denotes
8094 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8095 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8097 // The same operation in little-endian ordering will be:
8098 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8099 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8101 EVT TrgVT
= Op
.getValueType();
8102 assert(TrgVT
.isVector() && "Vector type expected.");
8103 unsigned TrgNumElts
= TrgVT
.getVectorNumElements();
8104 EVT EltVT
= TrgVT
.getVectorElementType();
8105 if (!isOperationCustom(Op
.getOpcode(), TrgVT
) ||
8106 TrgVT
.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts
) ||
8107 !llvm::has_single_bit
<uint32_t>(EltVT
.getSizeInBits()))
8110 SDValue N1
= Op
.getOperand(0);
8111 EVT SrcVT
= N1
.getValueType();
8112 unsigned SrcSize
= SrcVT
.getSizeInBits();
8113 if (SrcSize
> 256 || !isPowerOf2_32(SrcVT
.getVectorNumElements()) ||
8114 !llvm::has_single_bit
<uint32_t>(
8115 SrcVT
.getVectorElementType().getSizeInBits()))
8117 if (SrcSize
== 256 && SrcVT
.getVectorNumElements() < 2)
8120 unsigned WideNumElts
= 128 / EltVT
.getSizeInBits();
8121 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, WideNumElts
);
8125 if (SrcSize
== 256) {
8126 EVT VecIdxTy
= getVectorIdxTy(DAG
.getDataLayout());
8128 N1
.getValueType().getHalfNumVectorElementsVT(*DAG
.getContext());
8129 unsigned SplitNumElts
= SplitVT
.getVectorNumElements();
8130 Op1
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, SplitVT
, N1
,
8131 DAG
.getConstant(0, DL
, VecIdxTy
));
8132 Op2
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, SplitVT
, N1
,
8133 DAG
.getConstant(SplitNumElts
, DL
, VecIdxTy
));
8136 Op1
= SrcSize
== 128 ? N1
: widenVec(DAG
, N1
, DL
);
8137 Op2
= DAG
.getUNDEF(WideVT
);
8140 // First list the elements we want to keep.
8141 unsigned SizeMult
= SrcSize
/ TrgVT
.getSizeInBits();
8142 SmallVector
<int, 16> ShuffV
;
8143 if (Subtarget
.isLittleEndian())
8144 for (unsigned i
= 0; i
< TrgNumElts
; ++i
)
8145 ShuffV
.push_back(i
* SizeMult
);
8147 for (unsigned i
= 1; i
<= TrgNumElts
; ++i
)
8148 ShuffV
.push_back(i
* SizeMult
- 1);
8150 // Populate the remaining elements with undefs.
8151 for (unsigned i
= TrgNumElts
; i
< WideNumElts
; ++i
)
8152 // ShuffV.push_back(i + WideNumElts);
8153 ShuffV
.push_back(WideNumElts
+ 1);
8155 Op1
= DAG
.getNode(ISD::BITCAST
, DL
, WideVT
, Op1
);
8156 Op2
= DAG
.getNode(ISD::BITCAST
, DL
, WideVT
, Op2
);
8157 return DAG
.getVectorShuffle(WideVT
, DL
, Op1
, Op2
, ShuffV
);
8160 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8162 SDValue
PPCTargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const {
8163 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
8164 EVT ResVT
= Op
.getValueType();
8165 EVT CmpVT
= Op
.getOperand(0).getValueType();
8166 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
8167 SDValue TV
= Op
.getOperand(2), FV
= Op
.getOperand(3);
8170 // Without power9-vector, we don't have native instruction for f128 comparison.
8171 // Following transformation to libcall is needed for setcc:
8172 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8173 if (!Subtarget
.hasP9Vector() && CmpVT
== MVT::f128
) {
8174 SDValue Z
= DAG
.getSetCC(
8175 dl
, getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), CmpVT
),
8177 SDValue Zero
= DAG
.getConstant(0, dl
, Z
.getValueType());
8178 return DAG
.getSelectCC(dl
, Z
, Zero
, TV
, FV
, ISD::SETNE
);
8181 // Not FP, or using SPE? Not a fsel.
8182 if (!CmpVT
.isFloatingPoint() || !TV
.getValueType().isFloatingPoint() ||
8186 SDNodeFlags Flags
= Op
.getNode()->getFlags();
8188 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8189 // presence of infinities.
8190 if (Subtarget
.hasP9Vector() && LHS
== TV
&& RHS
== FV
) {
8196 return DAG
.getNode(PPCISD::XSMAXC
, dl
, Op
.getValueType(), LHS
, RHS
);
8199 return DAG
.getNode(PPCISD::XSMINC
, dl
, Op
.getValueType(), LHS
, RHS
);
8203 // We might be able to do better than this under some circumstances, but in
8204 // general, fsel-based lowering of select is a finite-math-only optimization.
8205 // For more information, see section F.3 of the 2.06 ISA specification.
8207 if ((!DAG
.getTarget().Options
.NoInfsFPMath
&& !Flags
.hasNoInfs()) ||
8208 (!DAG
.getTarget().Options
.NoNaNsFPMath
&& !Flags
.hasNoNaNs()) ||
8212 // If the RHS of the comparison is a 0.0, we don't need to do the
8213 // subtraction at all.
8215 if (isFloatingPointZero(RHS
))
8217 default: break; // SETUO etc aren't handled by fsel.
8222 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
8223 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
8224 Sel1
= DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, LHS
, TV
, FV
);
8225 if (Sel1
.getValueType() == MVT::f32
) // Comparison is always 64-bits
8226 Sel1
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Sel1
);
8227 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
8228 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, LHS
), Sel1
, FV
);
8231 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
8235 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
8236 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
8237 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, LHS
, TV
, FV
);
8240 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
8244 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
8245 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
8246 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
8247 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, LHS
), TV
, FV
);
8252 default: break; // SETUO etc aren't handled by fsel.
8257 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
8258 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
8259 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
8260 Sel1
= DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
8261 if (Sel1
.getValueType() == MVT::f32
) // Comparison is always 64-bits
8262 Sel1
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Sel1
);
8263 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
8264 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, Cmp
), Sel1
, FV
);
8267 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
8268 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
8269 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
8270 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
8273 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
8274 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
8275 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
8276 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
8279 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
, Flags
);
8280 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
8281 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
8282 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
8285 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
, Flags
);
8286 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
8287 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
8288 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
8293 static unsigned getPPCStrictOpcode(unsigned Opc
) {
8296 llvm_unreachable("No strict version of this opcode!");
8297 case PPCISD::FCTIDZ
:
8298 return PPCISD::STRICT_FCTIDZ
;
8299 case PPCISD::FCTIWZ
:
8300 return PPCISD::STRICT_FCTIWZ
;
8301 case PPCISD::FCTIDUZ
:
8302 return PPCISD::STRICT_FCTIDUZ
;
8303 case PPCISD::FCTIWUZ
:
8304 return PPCISD::STRICT_FCTIWUZ
;
8306 return PPCISD::STRICT_FCFID
;
8307 case PPCISD::FCFIDU
:
8308 return PPCISD::STRICT_FCFIDU
;
8309 case PPCISD::FCFIDS
:
8310 return PPCISD::STRICT_FCFIDS
;
8311 case PPCISD::FCFIDUS
:
8312 return PPCISD::STRICT_FCFIDUS
;
8316 static SDValue
convertFPToInt(SDValue Op
, SelectionDAG
&DAG
,
8317 const PPCSubtarget
&Subtarget
) {
8319 bool IsStrict
= Op
->isStrictFPOpcode();
8320 bool IsSigned
= Op
.getOpcode() == ISD::FP_TO_SINT
||
8321 Op
.getOpcode() == ISD::STRICT_FP_TO_SINT
;
8323 // TODO: Any other flags to propagate?
8325 Flags
.setNoFPExcept(Op
->getFlags().hasNoFPExcept());
8327 // For strict nodes, source is the second operand.
8328 SDValue Src
= Op
.getOperand(IsStrict
? 1 : 0);
8329 SDValue Chain
= IsStrict
? Op
.getOperand(0) : SDValue();
8330 MVT DestTy
= Op
.getSimpleValueType();
8331 assert(Src
.getValueType().isFloatingPoint() &&
8332 (DestTy
== MVT::i8
|| DestTy
== MVT::i16
|| DestTy
== MVT::i32
||
8333 DestTy
== MVT::i64
) &&
8334 "Invalid FP_TO_INT types");
8335 if (Src
.getValueType() == MVT::f32
) {
8338 DAG
.getNode(ISD::STRICT_FP_EXTEND
, dl
,
8339 DAG
.getVTList(MVT::f64
, MVT::Other
), {Chain
, Src
}, Flags
);
8340 Chain
= Src
.getValue(1);
8342 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
8344 if ((DestTy
== MVT::i8
|| DestTy
== MVT::i16
) && Subtarget
.hasP9Vector())
8345 DestTy
= Subtarget
.isPPC64() ? MVT::i64
: MVT::i32
;
8346 unsigned Opc
= ISD::DELETED_NODE
;
8347 switch (DestTy
.SimpleTy
) {
8348 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8350 Opc
= IsSigned
? PPCISD::FCTIWZ
8351 : (Subtarget
.hasFPCVT() ? PPCISD::FCTIWUZ
: PPCISD::FCTIDZ
);
8354 assert((IsSigned
|| Subtarget
.hasFPCVT()) &&
8355 "i64 FP_TO_UINT is supported only with FPCVT");
8356 Opc
= IsSigned
? PPCISD::FCTIDZ
: PPCISD::FCTIDUZ
;
8358 EVT ConvTy
= Src
.getValueType() == MVT::f128
? MVT::f128
: MVT::f64
;
8361 Opc
= getPPCStrictOpcode(Opc
);
8362 Conv
= DAG
.getNode(Opc
, dl
, DAG
.getVTList(ConvTy
, MVT::Other
), {Chain
, Src
},
8365 Conv
= DAG
.getNode(Opc
, dl
, ConvTy
, Src
);
8370 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op
, ReuseLoadInfo
&RLI
,
8372 const SDLoc
&dl
) const {
8373 SDValue Tmp
= convertFPToInt(Op
, DAG
, Subtarget
);
8374 bool IsSigned
= Op
.getOpcode() == ISD::FP_TO_SINT
||
8375 Op
.getOpcode() == ISD::STRICT_FP_TO_SINT
;
8376 bool IsStrict
= Op
->isStrictFPOpcode();
8378 // Convert the FP value to an int value through memory.
8379 bool i32Stack
= Op
.getValueType() == MVT::i32
&& Subtarget
.hasSTFIWX() &&
8380 (IsSigned
|| Subtarget
.hasFPCVT());
8381 SDValue FIPtr
= DAG
.CreateStackTemporary(i32Stack
? MVT::i32
: MVT::f64
);
8382 int FI
= cast
<FrameIndexSDNode
>(FIPtr
)->getIndex();
8383 MachinePointerInfo MPI
=
8384 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
);
8386 // Emit a store to the stack slot.
8387 SDValue Chain
= IsStrict
? Tmp
.getValue(1) : DAG
.getEntryNode();
8388 Align
Alignment(DAG
.getEVTAlign(Tmp
.getValueType()));
8390 MachineFunction
&MF
= DAG
.getMachineFunction();
8391 Alignment
= Align(4);
8392 MachineMemOperand
*MMO
=
8393 MF
.getMachineMemOperand(MPI
, MachineMemOperand::MOStore
, 4, Alignment
);
8394 SDValue Ops
[] = { Chain
, Tmp
, FIPtr
};
8395 Chain
= DAG
.getMemIntrinsicNode(PPCISD::STFIWX
, dl
,
8396 DAG
.getVTList(MVT::Other
), Ops
, MVT::i32
, MMO
);
8398 Chain
= DAG
.getStore(Chain
, dl
, Tmp
, FIPtr
, MPI
, Alignment
);
8400 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8401 // add in a bias on big endian.
8402 if (Op
.getValueType() == MVT::i32
&& !i32Stack
) {
8403 FIPtr
= DAG
.getNode(ISD::ADD
, dl
, FIPtr
.getValueType(), FIPtr
,
8404 DAG
.getConstant(4, dl
, FIPtr
.getValueType()));
8405 MPI
= MPI
.getWithOffset(Subtarget
.isLittleEndian() ? 0 : 4);
8411 RLI
.Alignment
= Alignment
;
8414 /// Custom lowers floating point to integer conversions to use
8415 /// the direct move instructions available in ISA 2.07 to avoid the
8416 /// need for load/store combinations.
8417 SDValue
PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op
,
8419 const SDLoc
&dl
) const {
8420 SDValue Conv
= convertFPToInt(Op
, DAG
, Subtarget
);
8421 SDValue Mov
= DAG
.getNode(PPCISD::MFVSR
, dl
, Op
.getValueType(), Conv
);
8422 if (Op
->isStrictFPOpcode())
8423 return DAG
.getMergeValues({Mov
, Conv
.getValue(1)}, dl
);
8428 SDValue
PPCTargetLowering::LowerFP_TO_INT(SDValue Op
, SelectionDAG
&DAG
,
8429 const SDLoc
&dl
) const {
8430 bool IsStrict
= Op
->isStrictFPOpcode();
8431 bool IsSigned
= Op
.getOpcode() == ISD::FP_TO_SINT
||
8432 Op
.getOpcode() == ISD::STRICT_FP_TO_SINT
;
8433 SDValue Src
= Op
.getOperand(IsStrict
? 1 : 0);
8434 EVT SrcVT
= Src
.getValueType();
8435 EVT DstVT
= Op
.getValueType();
8437 // FP to INT conversions are legal for f128.
8438 if (SrcVT
== MVT::f128
)
8439 return Subtarget
.hasP9Vector() ? Op
: SDValue();
8441 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8442 // PPC (the libcall is not available).
8443 if (SrcVT
== MVT::ppcf128
) {
8444 if (DstVT
== MVT::i32
) {
8445 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8446 // set other fast-math flags to FP operations in both strict and
8447 // non-strict cases. (FP_TO_SINT, FSUB)
8449 Flags
.setNoFPExcept(Op
->getFlags().hasNoFPExcept());
8453 std::tie(Lo
, Hi
) = DAG
.SplitScalar(Src
, dl
, MVT::f64
, MVT::f64
);
8455 // Add the two halves of the long double in round-to-zero mode, and use
8456 // a smaller FP_TO_SINT.
8458 SDValue Res
= DAG
.getNode(PPCISD::STRICT_FADDRTZ
, dl
,
8459 DAG
.getVTList(MVT::f64
, MVT::Other
),
8460 {Op
.getOperand(0), Lo
, Hi
}, Flags
);
8461 return DAG
.getNode(ISD::STRICT_FP_TO_SINT
, dl
,
8462 DAG
.getVTList(MVT::i32
, MVT::Other
),
8463 {Res
.getValue(1), Res
}, Flags
);
8465 SDValue Res
= DAG
.getNode(PPCISD::FADDRTZ
, dl
, MVT::f64
, Lo
, Hi
);
8466 return DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, Res
);
8469 const uint64_t TwoE31
[] = {0x41e0000000000000LL
, 0};
8470 APFloat APF
= APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31
));
8471 SDValue Cst
= DAG
.getConstantFP(APF
, dl
, SrcVT
);
8472 SDValue SignMask
= DAG
.getConstant(0x80000000, dl
, DstVT
);
8474 // Sel = Src < 0x80000000
8475 // FltOfs = select Sel, 0.0, 0x80000000
8476 // IntOfs = select Sel, 0, 0x80000000
8477 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8478 SDValue Chain
= Op
.getOperand(0);
8480 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), SrcVT
);
8482 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), DstVT
);
8483 SDValue Sel
= DAG
.getSetCC(dl
, SetCCVT
, Src
, Cst
, ISD::SETLT
,
8485 Chain
= Sel
.getValue(1);
8487 SDValue FltOfs
= DAG
.getSelect(
8488 dl
, SrcVT
, Sel
, DAG
.getConstantFP(0.0, dl
, SrcVT
), Cst
);
8489 Sel
= DAG
.getBoolExtOrTrunc(Sel
, dl
, DstSetCCVT
, DstVT
);
8491 SDValue Val
= DAG
.getNode(ISD::STRICT_FSUB
, dl
,
8492 DAG
.getVTList(SrcVT
, MVT::Other
),
8493 {Chain
, Src
, FltOfs
}, Flags
);
8494 Chain
= Val
.getValue(1);
8495 SDValue SInt
= DAG
.getNode(ISD::STRICT_FP_TO_SINT
, dl
,
8496 DAG
.getVTList(DstVT
, MVT::Other
),
8497 {Chain
, Val
}, Flags
);
8498 Chain
= SInt
.getValue(1);
8499 SDValue IntOfs
= DAG
.getSelect(
8500 dl
, DstVT
, Sel
, DAG
.getConstant(0, dl
, DstVT
), SignMask
);
8501 SDValue Result
= DAG
.getNode(ISD::XOR
, dl
, DstVT
, SInt
, IntOfs
);
8502 return DAG
.getMergeValues({Result
, Chain
}, dl
);
8504 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8505 // FIXME: generated code sucks.
8506 SDValue True
= DAG
.getNode(ISD::FSUB
, dl
, MVT::ppcf128
, Src
, Cst
);
8507 True
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, True
);
8508 True
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, True
, SignMask
);
8509 SDValue False
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, Src
);
8510 return DAG
.getSelectCC(dl
, Src
, Cst
, True
, False
, ISD::SETGE
);
8518 if (Subtarget
.hasDirectMove() && Subtarget
.isPPC64())
8519 return LowerFP_TO_INTDirectMove(Op
, DAG
, dl
);
8522 LowerFP_TO_INTForReuse(Op
, RLI
, DAG
, dl
);
8524 return DAG
.getLoad(Op
.getValueType(), dl
, RLI
.Chain
, RLI
.Ptr
, RLI
.MPI
,
8525 RLI
.Alignment
, RLI
.MMOFlags(), RLI
.AAInfo
, RLI
.Ranges
);
8528 // We're trying to insert a regular store, S, and then a load, L. If the
8529 // incoming value, O, is a load, we might just be able to have our load use the
8530 // address used by O. However, we don't know if anything else will store to
8531 // that address before we can load from it. To prevent this situation, we need
8532 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8533 // the same chain operand as O, we create a token factor from the chain results
8534 // of O and L, and we replace all uses of O's chain result with that token
8535 // factor (see spliceIntoChain below for this last part).
8536 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op
, EVT MemVT
,
8539 ISD::LoadExtType ET
) const {
8540 // Conservatively skip reusing for constrained FP nodes.
8541 if (Op
->isStrictFPOpcode())
8545 bool ValidFPToUint
= Op
.getOpcode() == ISD::FP_TO_UINT
&&
8546 (Subtarget
.hasFPCVT() || Op
.getValueType() == MVT::i32
);
8547 if (ET
== ISD::NON_EXTLOAD
&&
8548 (ValidFPToUint
|| Op
.getOpcode() == ISD::FP_TO_SINT
) &&
8549 isOperationLegalOrCustom(Op
.getOpcode(),
8550 Op
.getOperand(0).getValueType())) {
8552 LowerFP_TO_INTForReuse(Op
, RLI
, DAG
, dl
);
8556 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Op
);
8557 if (!LD
|| LD
->getExtensionType() != ET
|| LD
->isVolatile() ||
8558 LD
->isNonTemporal())
8560 if (LD
->getMemoryVT() != MemVT
)
8563 // If the result of the load is an illegal type, then we can't build a
8564 // valid chain for reuse since the legalised loads and token factor node that
8565 // ties the legalised loads together uses a different output chain then the
8567 if (!isTypeLegal(LD
->getValueType(0)))
8570 RLI
.Ptr
= LD
->getBasePtr();
8571 if (LD
->isIndexed() && !LD
->getOffset().isUndef()) {
8572 assert(LD
->getAddressingMode() == ISD::PRE_INC
&&
8573 "Non-pre-inc AM on PPC?");
8574 RLI
.Ptr
= DAG
.getNode(ISD::ADD
, dl
, RLI
.Ptr
.getValueType(), RLI
.Ptr
,
8578 RLI
.Chain
= LD
->getChain();
8579 RLI
.MPI
= LD
->getPointerInfo();
8580 RLI
.IsDereferenceable
= LD
->isDereferenceable();
8581 RLI
.IsInvariant
= LD
->isInvariant();
8582 RLI
.Alignment
= LD
->getAlign();
8583 RLI
.AAInfo
= LD
->getAAInfo();
8584 RLI
.Ranges
= LD
->getRanges();
8586 RLI
.ResChain
= SDValue(LD
, LD
->isIndexed() ? 2 : 1);
8590 // Given the head of the old chain, ResChain, insert a token factor containing
8591 // it and NewResChain, and make users of ResChain now be users of that token
8593 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8594 void PPCTargetLowering::spliceIntoChain(SDValue ResChain
,
8595 SDValue NewResChain
,
8596 SelectionDAG
&DAG
) const {
8600 SDLoc
dl(NewResChain
);
8602 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
8603 NewResChain
, DAG
.getUNDEF(MVT::Other
));
8604 assert(TF
.getNode() != NewResChain
.getNode() &&
8605 "A new TF really is required here");
8607 DAG
.ReplaceAllUsesOfValueWith(ResChain
, TF
);
8608 DAG
.UpdateNodeOperands(TF
.getNode(), ResChain
, NewResChain
);
8611 /// Analyze profitability of direct move
8612 /// prefer float load to int load plus direct move
8613 /// when there is no integer use of int load
8614 bool PPCTargetLowering::directMoveIsProfitable(const SDValue
&Op
) const {
8615 SDNode
*Origin
= Op
.getOperand(Op
->isStrictFPOpcode() ? 1 : 0).getNode();
8616 if (Origin
->getOpcode() != ISD::LOAD
)
8619 // If there is no LXSIBZX/LXSIHZX, like Power8,
8620 // prefer direct move if the memory size is 1 or 2 bytes.
8621 MachineMemOperand
*MMO
= cast
<LoadSDNode
>(Origin
)->getMemOperand();
8622 if (!Subtarget
.hasP9Vector() &&
8623 (!MMO
->getSize().hasValue() || MMO
->getSize().getValue() <= 2))
8626 for (SDNode::use_iterator UI
= Origin
->use_begin(),
8627 UE
= Origin
->use_end();
8630 // Only look at the users of the loaded value.
8631 if (UI
.getUse().get().getResNo() != 0)
8634 if (UI
->getOpcode() != ISD::SINT_TO_FP
&&
8635 UI
->getOpcode() != ISD::UINT_TO_FP
&&
8636 UI
->getOpcode() != ISD::STRICT_SINT_TO_FP
&&
8637 UI
->getOpcode() != ISD::STRICT_UINT_TO_FP
)
8644 static SDValue
convertIntToFP(SDValue Op
, SDValue Src
, SelectionDAG
&DAG
,
8645 const PPCSubtarget
&Subtarget
,
8646 SDValue Chain
= SDValue()) {
8647 bool IsSigned
= Op
.getOpcode() == ISD::SINT_TO_FP
||
8648 Op
.getOpcode() == ISD::STRICT_SINT_TO_FP
;
8651 // TODO: Any other flags to propagate?
8653 Flags
.setNoFPExcept(Op
->getFlags().hasNoFPExcept());
8655 // If we have FCFIDS, then use it when converting to single-precision.
8656 // Otherwise, convert to double-precision and then round.
8657 bool IsSingle
= Op
.getValueType() == MVT::f32
&& Subtarget
.hasFPCVT();
8658 unsigned ConvOpc
= IsSingle
? (IsSigned
? PPCISD::FCFIDS
: PPCISD::FCFIDUS
)
8659 : (IsSigned
? PPCISD::FCFID
: PPCISD::FCFIDU
);
8660 EVT ConvTy
= IsSingle
? MVT::f32
: MVT::f64
;
8661 if (Op
->isStrictFPOpcode()) {
8663 Chain
= Op
.getOperand(0);
8664 return DAG
.getNode(getPPCStrictOpcode(ConvOpc
), dl
,
8665 DAG
.getVTList(ConvTy
, MVT::Other
), {Chain
, Src
}, Flags
);
8667 return DAG
.getNode(ConvOpc
, dl
, ConvTy
, Src
);
8670 /// Custom lowers integer to floating point conversions to use
8671 /// the direct move instructions available in ISA 2.07 to avoid the
8672 /// need for load/store combinations.
8673 SDValue
PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op
,
8675 const SDLoc
&dl
) const {
8676 assert((Op
.getValueType() == MVT::f32
||
8677 Op
.getValueType() == MVT::f64
) &&
8678 "Invalid floating point type as target of conversion");
8679 assert(Subtarget
.hasFPCVT() &&
8680 "Int to FP conversions with direct moves require FPCVT");
8681 SDValue Src
= Op
.getOperand(Op
->isStrictFPOpcode() ? 1 : 0);
8682 bool WordInt
= Src
.getSimpleValueType().SimpleTy
== MVT::i32
;
8683 bool Signed
= Op
.getOpcode() == ISD::SINT_TO_FP
||
8684 Op
.getOpcode() == ISD::STRICT_SINT_TO_FP
;
8685 unsigned MovOpc
= (WordInt
&& !Signed
) ? PPCISD::MTVSRZ
: PPCISD::MTVSRA
;
8686 SDValue Mov
= DAG
.getNode(MovOpc
, dl
, MVT::f64
, Src
);
8687 return convertIntToFP(Op
, Mov
, DAG
, Subtarget
);
8690 static SDValue
widenVec(SelectionDAG
&DAG
, SDValue Vec
, const SDLoc
&dl
) {
8692 EVT VecVT
= Vec
.getValueType();
8693 assert(VecVT
.isVector() && "Expected a vector type.");
8694 assert(VecVT
.getSizeInBits() < 128 && "Vector is already full width.");
8696 EVT EltVT
= VecVT
.getVectorElementType();
8697 unsigned WideNumElts
= 128 / EltVT
.getSizeInBits();
8698 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, WideNumElts
);
8700 unsigned NumConcat
= WideNumElts
/ VecVT
.getVectorNumElements();
8701 SmallVector
<SDValue
, 16> Ops(NumConcat
);
8703 SDValue UndefVec
= DAG
.getUNDEF(VecVT
);
8704 for (unsigned i
= 1; i
< NumConcat
; ++i
)
8707 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, WideVT
, Ops
);
8710 SDValue
PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op
, SelectionDAG
&DAG
,
8711 const SDLoc
&dl
) const {
8712 bool IsStrict
= Op
->isStrictFPOpcode();
8713 unsigned Opc
= Op
.getOpcode();
8714 SDValue Src
= Op
.getOperand(IsStrict
? 1 : 0);
8715 assert((Opc
== ISD::UINT_TO_FP
|| Opc
== ISD::SINT_TO_FP
||
8716 Opc
== ISD::STRICT_UINT_TO_FP
|| Opc
== ISD::STRICT_SINT_TO_FP
) &&
8717 "Unexpected conversion type");
8718 assert((Op
.getValueType() == MVT::v2f64
|| Op
.getValueType() == MVT::v4f32
) &&
8719 "Supports conversions to v2f64/v4f32 only.");
8721 // TODO: Any other flags to propagate?
8723 Flags
.setNoFPExcept(Op
->getFlags().hasNoFPExcept());
8725 bool SignedConv
= Opc
== ISD::SINT_TO_FP
|| Opc
== ISD::STRICT_SINT_TO_FP
;
8726 bool FourEltRes
= Op
.getValueType() == MVT::v4f32
;
8728 SDValue Wide
= widenVec(DAG
, Src
, dl
);
8729 EVT WideVT
= Wide
.getValueType();
8730 unsigned WideNumElts
= WideVT
.getVectorNumElements();
8731 MVT IntermediateVT
= FourEltRes
? MVT::v4i32
: MVT::v2i64
;
8733 SmallVector
<int, 16> ShuffV
;
8734 for (unsigned i
= 0; i
< WideNumElts
; ++i
)
8735 ShuffV
.push_back(i
+ WideNumElts
);
8737 int Stride
= FourEltRes
? WideNumElts
/ 4 : WideNumElts
/ 2;
8738 int SaveElts
= FourEltRes
? 4 : 2;
8739 if (Subtarget
.isLittleEndian())
8740 for (int i
= 0; i
< SaveElts
; i
++)
8741 ShuffV
[i
* Stride
] = i
;
8743 for (int i
= 1; i
<= SaveElts
; i
++)
8744 ShuffV
[i
* Stride
- 1] = i
- 1;
8746 SDValue ShuffleSrc2
=
8747 SignedConv
? DAG
.getUNDEF(WideVT
) : DAG
.getConstant(0, dl
, WideVT
);
8748 SDValue Arrange
= DAG
.getVectorShuffle(WideVT
, dl
, Wide
, ShuffleSrc2
, ShuffV
);
8752 Arrange
= DAG
.getBitcast(IntermediateVT
, Arrange
);
8753 EVT ExtVT
= Src
.getValueType();
8754 if (Subtarget
.hasP9Altivec())
8755 ExtVT
= EVT::getVectorVT(*DAG
.getContext(), WideVT
.getVectorElementType(),
8756 IntermediateVT
.getVectorNumElements());
8758 Extend
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, dl
, IntermediateVT
, Arrange
,
8759 DAG
.getValueType(ExtVT
));
8761 Extend
= DAG
.getNode(ISD::BITCAST
, dl
, IntermediateVT
, Arrange
);
8764 return DAG
.getNode(Opc
, dl
, DAG
.getVTList(Op
.getValueType(), MVT::Other
),
8765 {Op
.getOperand(0), Extend
}, Flags
);
8767 return DAG
.getNode(Opc
, dl
, Op
.getValueType(), Extend
);
8770 SDValue
PPCTargetLowering::LowerINT_TO_FP(SDValue Op
,
8771 SelectionDAG
&DAG
) const {
8773 bool IsSigned
= Op
.getOpcode() == ISD::SINT_TO_FP
||
8774 Op
.getOpcode() == ISD::STRICT_SINT_TO_FP
;
8775 bool IsStrict
= Op
->isStrictFPOpcode();
8776 SDValue Src
= Op
.getOperand(IsStrict
? 1 : 0);
8777 SDValue Chain
= IsStrict
? Op
.getOperand(0) : DAG
.getEntryNode();
8779 // TODO: Any other flags to propagate?
8781 Flags
.setNoFPExcept(Op
->getFlags().hasNoFPExcept());
8783 EVT InVT
= Src
.getValueType();
8784 EVT OutVT
= Op
.getValueType();
8785 if (OutVT
.isVector() && OutVT
.isFloatingPoint() &&
8786 isOperationCustom(Op
.getOpcode(), InVT
))
8787 return LowerINT_TO_FPVector(Op
, DAG
, dl
);
8789 // Conversions to f128 are legal.
8790 if (Op
.getValueType() == MVT::f128
)
8791 return Subtarget
.hasP9Vector() ? Op
: SDValue();
8793 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8794 if (Op
.getValueType() != MVT::f32
&& Op
.getValueType() != MVT::f64
)
8797 if (Src
.getValueType() == MVT::i1
) {
8798 SDValue Sel
= DAG
.getNode(ISD::SELECT
, dl
, Op
.getValueType(), Src
,
8799 DAG
.getConstantFP(1.0, dl
, Op
.getValueType()),
8800 DAG
.getConstantFP(0.0, dl
, Op
.getValueType()));
8802 return DAG
.getMergeValues({Sel
, Chain
}, dl
);
8807 // If we have direct moves, we can do all the conversion, skip the store/load
8808 // however, without FPCVT we can't do most conversions.
8809 if (Subtarget
.hasDirectMove() && directMoveIsProfitable(Op
) &&
8810 Subtarget
.isPPC64() && Subtarget
.hasFPCVT())
8811 return LowerINT_TO_FPDirectMove(Op
, DAG
, dl
);
8813 assert((IsSigned
|| Subtarget
.hasFPCVT()) &&
8814 "UINT_TO_FP is supported only with FPCVT");
8816 if (Src
.getValueType() == MVT::i64
) {
8818 // When converting to single-precision, we actually need to convert
8819 // to double-precision first and then round to single-precision.
8820 // To avoid double-rounding effects during that operation, we have
8821 // to prepare the input operand. Bits that might be truncated when
8822 // converting to double-precision are replaced by a bit that won't
8823 // be lost at this stage, but is below the single-precision rounding
8826 // However, if -enable-unsafe-fp-math is in effect, accept double
8827 // rounding to avoid the extra overhead.
8828 if (Op
.getValueType() == MVT::f32
&&
8829 !Subtarget
.hasFPCVT() &&
8830 !DAG
.getTarget().Options
.UnsafeFPMath
) {
8832 // Twiddle input to make sure the low 11 bits are zero. (If this
8833 // is the case, we are guaranteed the value will fit into the 53 bit
8834 // mantissa of an IEEE double-precision value without rounding.)
8835 // If any of those low 11 bits were not zero originally, make sure
8836 // bit 12 (value 2048) is set instead, so that the final rounding
8837 // to single-precision gets the correct result.
8838 SDValue Round
= DAG
.getNode(ISD::AND
, dl
, MVT::i64
,
8839 SINT
, DAG
.getConstant(2047, dl
, MVT::i64
));
8840 Round
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
,
8841 Round
, DAG
.getConstant(2047, dl
, MVT::i64
));
8842 Round
= DAG
.getNode(ISD::OR
, dl
, MVT::i64
, Round
, SINT
);
8843 Round
= DAG
.getNode(ISD::AND
, dl
, MVT::i64
,
8844 Round
, DAG
.getConstant(-2048, dl
, MVT::i64
));
8846 // However, we cannot use that value unconditionally: if the magnitude
8847 // of the input value is small, the bit-twiddling we did above might
8848 // end up visibly changing the output. Fortunately, in that case, we
8849 // don't need to twiddle bits since the original input will convert
8850 // exactly to double-precision floating-point already. Therefore,
8851 // construct a conditional to use the original value if the top 11
8852 // bits are all sign-bit copies, and use the rounded value computed
8854 SDValue Cond
= DAG
.getNode(ISD::SRA
, dl
, MVT::i64
,
8855 SINT
, DAG
.getConstant(53, dl
, MVT::i32
));
8856 Cond
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
,
8857 Cond
, DAG
.getConstant(1, dl
, MVT::i64
));
8858 Cond
= DAG
.getSetCC(
8860 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), MVT::i64
),
8861 Cond
, DAG
.getConstant(1, dl
, MVT::i64
), ISD::SETUGT
);
8863 SINT
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i64
, Cond
, Round
, SINT
);
8869 MachineFunction
&MF
= DAG
.getMachineFunction();
8870 if (canReuseLoadAddress(SINT
, MVT::i64
, RLI
, DAG
)) {
8871 Bits
= DAG
.getLoad(MVT::f64
, dl
, RLI
.Chain
, RLI
.Ptr
, RLI
.MPI
,
8872 RLI
.Alignment
, RLI
.MMOFlags(), RLI
.AAInfo
, RLI
.Ranges
);
8873 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
8874 } else if (Subtarget
.hasLFIWAX() &&
8875 canReuseLoadAddress(SINT
, MVT::i32
, RLI
, DAG
, ISD::SEXTLOAD
)) {
8876 MachineMemOperand
*MMO
=
8877 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
8878 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
8879 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
8880 Bits
= DAG
.getMemIntrinsicNode(PPCISD::LFIWAX
, dl
,
8881 DAG
.getVTList(MVT::f64
, MVT::Other
),
8882 Ops
, MVT::i32
, MMO
);
8883 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
8884 } else if (Subtarget
.hasFPCVT() &&
8885 canReuseLoadAddress(SINT
, MVT::i32
, RLI
, DAG
, ISD::ZEXTLOAD
)) {
8886 MachineMemOperand
*MMO
=
8887 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
8888 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
8889 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
8890 Bits
= DAG
.getMemIntrinsicNode(PPCISD::LFIWZX
, dl
,
8891 DAG
.getVTList(MVT::f64
, MVT::Other
),
8892 Ops
, MVT::i32
, MMO
);
8893 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
8894 } else if (((Subtarget
.hasLFIWAX() &&
8895 SINT
.getOpcode() == ISD::SIGN_EXTEND
) ||
8896 (Subtarget
.hasFPCVT() &&
8897 SINT
.getOpcode() == ISD::ZERO_EXTEND
)) &&
8898 SINT
.getOperand(0).getValueType() == MVT::i32
) {
8899 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
8900 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
8902 int FrameIdx
= MFI
.CreateStackObject(4, Align(4), false);
8903 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
8905 SDValue Store
= DAG
.getStore(Chain
, dl
, SINT
.getOperand(0), FIdx
,
8906 MachinePointerInfo::getFixedStack(
8907 DAG
.getMachineFunction(), FrameIdx
));
8910 assert(cast
<StoreSDNode
>(Store
)->getMemoryVT() == MVT::i32
&&
8911 "Expected an i32 store");
8916 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
8917 RLI
.Alignment
= Align(4);
8919 MachineMemOperand
*MMO
=
8920 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
8921 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
8922 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
8923 Bits
= DAG
.getMemIntrinsicNode(SINT
.getOpcode() == ISD::ZERO_EXTEND
?
8924 PPCISD::LFIWZX
: PPCISD::LFIWAX
,
8925 dl
, DAG
.getVTList(MVT::f64
, MVT::Other
),
8926 Ops
, MVT::i32
, MMO
);
8927 Chain
= Bits
.getValue(1);
8929 Bits
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::f64
, SINT
);
8931 SDValue FP
= convertIntToFP(Op
, Bits
, DAG
, Subtarget
, Chain
);
8933 Chain
= FP
.getValue(1);
8935 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT()) {
8937 FP
= DAG
.getNode(ISD::STRICT_FP_ROUND
, dl
,
8938 DAG
.getVTList(MVT::f32
, MVT::Other
),
8939 {Chain
, FP
, DAG
.getIntPtrConstant(0, dl
)}, Flags
);
8941 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
, MVT::f32
, FP
,
8942 DAG
.getIntPtrConstant(0, dl
, /*isTarget=*/true));
8947 assert(Src
.getValueType() == MVT::i32
&&
8948 "Unhandled INT_TO_FP type in custom expander!");
8949 // Since we only generate this in 64-bit mode, we can take advantage of
8950 // 64-bit registers. In particular, sign extend the input value into the
8951 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8952 // then lfd it and fcfid it.
8953 MachineFunction
&MF
= DAG
.getMachineFunction();
8954 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
8955 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
8958 if (Subtarget
.hasLFIWAX() || Subtarget
.hasFPCVT()) {
8961 if (!(ReusingLoad
= canReuseLoadAddress(Src
, MVT::i32
, RLI
, DAG
))) {
8962 int FrameIdx
= MFI
.CreateStackObject(4, Align(4), false);
8963 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
8965 SDValue Store
= DAG
.getStore(Chain
, dl
, Src
, FIdx
,
8966 MachinePointerInfo::getFixedStack(
8967 DAG
.getMachineFunction(), FrameIdx
));
8970 assert(cast
<StoreSDNode
>(Store
)->getMemoryVT() == MVT::i32
&&
8971 "Expected an i32 store");
8976 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
8977 RLI
.Alignment
= Align(4);
8980 MachineMemOperand
*MMO
=
8981 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
8982 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
8983 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
8984 Ld
= DAG
.getMemIntrinsicNode(IsSigned
? PPCISD::LFIWAX
: PPCISD::LFIWZX
, dl
,
8985 DAG
.getVTList(MVT::f64
, MVT::Other
), Ops
,
8987 Chain
= Ld
.getValue(1);
8989 spliceIntoChain(RLI
.ResChain
, Ld
.getValue(1), DAG
);
8991 assert(Subtarget
.isPPC64() &&
8992 "i32->FP without LFIWAX supported only on PPC64");
8994 int FrameIdx
= MFI
.CreateStackObject(8, Align(8), false);
8995 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
8997 SDValue Ext64
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i64
, Src
);
8999 // STD the extended value into the stack slot.
9000 SDValue Store
= DAG
.getStore(
9001 Chain
, dl
, Ext64
, FIdx
,
9002 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
));
9005 // Load the value as a double.
9007 MVT::f64
, dl
, Chain
, FIdx
,
9008 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
));
9009 Chain
= Ld
.getValue(1);
9012 // FCFID it and return it.
9013 SDValue FP
= convertIntToFP(Op
, Ld
, DAG
, Subtarget
, Chain
);
9015 Chain
= FP
.getValue(1);
9016 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT()) {
9018 FP
= DAG
.getNode(ISD::STRICT_FP_ROUND
, dl
,
9019 DAG
.getVTList(MVT::f32
, MVT::Other
),
9020 {Chain
, FP
, DAG
.getIntPtrConstant(0, dl
)}, Flags
);
9022 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
, MVT::f32
, FP
,
9023 DAG
.getIntPtrConstant(0, dl
, /*isTarget=*/true));
9028 SDValue
PPCTargetLowering::LowerGET_ROUNDING(SDValue Op
,
9029 SelectionDAG
&DAG
) const {
9032 The rounding mode is in bits 30:31 of FPSR, and has the following
9039 GET_ROUNDING, on the other hand, expects the following:
9046 To perform the conversion, we do:
9047 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9050 MachineFunction
&MF
= DAG
.getMachineFunction();
9051 EVT VT
= Op
.getValueType();
9052 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
9054 // Save FP Control Word to register
9055 SDValue Chain
= Op
.getOperand(0);
9056 SDValue MFFS
= DAG
.getNode(PPCISD::MFFS
, dl
, {MVT::f64
, MVT::Other
}, Chain
);
9057 Chain
= MFFS
.getValue(1);
9060 if (isTypeLegal(MVT::i64
)) {
9061 CWD
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
,
9062 DAG
.getNode(ISD::BITCAST
, dl
, MVT::i64
, MFFS
));
9064 // Save FP register to stack slot
9065 int SSFI
= MF
.getFrameInfo().CreateStackObject(8, Align(8), false);
9066 SDValue StackSlot
= DAG
.getFrameIndex(SSFI
, PtrVT
);
9067 Chain
= DAG
.getStore(Chain
, dl
, MFFS
, StackSlot
, MachinePointerInfo());
9069 // Load FP Control Word from low 32 bits of stack slot.
9070 assert(hasBigEndianPartOrdering(MVT::i64
, MF
.getDataLayout()) &&
9071 "Stack slot adjustment is valid only on big endian subtargets!");
9072 SDValue Four
= DAG
.getConstant(4, dl
, PtrVT
);
9073 SDValue Addr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackSlot
, Four
);
9074 CWD
= DAG
.getLoad(MVT::i32
, dl
, Chain
, Addr
, MachinePointerInfo());
9075 Chain
= CWD
.getValue(1);
9078 // Transform as necessary
9080 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
9081 CWD
, DAG
.getConstant(3, dl
, MVT::i32
));
9083 DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
9084 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
9085 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
,
9086 CWD
, DAG
.getConstant(3, dl
, MVT::i32
)),
9087 DAG
.getConstant(3, dl
, MVT::i32
)),
9088 DAG
.getConstant(1, dl
, MVT::i32
));
9091 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, CWD1
, CWD2
);
9094 DAG
.getNode((VT
.getSizeInBits() < 16 ? ISD::TRUNCATE
: ISD::ZERO_EXTEND
),
9097 return DAG
.getMergeValues({RetVal
, Chain
}, dl
);
9100 SDValue
PPCTargetLowering::LowerSHL_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
9101 EVT VT
= Op
.getValueType();
9102 unsigned BitWidth
= VT
.getSizeInBits();
9104 assert(Op
.getNumOperands() == 3 &&
9105 VT
== Op
.getOperand(1).getValueType() &&
9108 // Expand into a bunch of logical ops. Note that these ops
9109 // depend on the PPC behavior for oversized shift amounts.
9110 SDValue Lo
= Op
.getOperand(0);
9111 SDValue Hi
= Op
.getOperand(1);
9112 SDValue Amt
= Op
.getOperand(2);
9113 EVT AmtVT
= Amt
.getValueType();
9115 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
9116 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
9117 SDValue Tmp2
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Amt
);
9118 SDValue Tmp3
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Tmp1
);
9119 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
9120 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
9121 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
9122 SDValue Tmp6
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Tmp5
);
9123 SDValue OutHi
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
9124 SDValue OutLo
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Amt
);
9125 SDValue OutOps
[] = { OutLo
, OutHi
};
9126 return DAG
.getMergeValues(OutOps
, dl
);
9129 SDValue
PPCTargetLowering::LowerSRL_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
9130 EVT VT
= Op
.getValueType();
9132 unsigned BitWidth
= VT
.getSizeInBits();
9133 assert(Op
.getNumOperands() == 3 &&
9134 VT
== Op
.getOperand(1).getValueType() &&
9137 // Expand into a bunch of logical ops. Note that these ops
9138 // depend on the PPC behavior for oversized shift amounts.
9139 SDValue Lo
= Op
.getOperand(0);
9140 SDValue Hi
= Op
.getOperand(1);
9141 SDValue Amt
= Op
.getOperand(2);
9142 EVT AmtVT
= Amt
.getValueType();
9144 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
9145 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
9146 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
9147 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
9148 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
9149 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
9150 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
9151 SDValue Tmp6
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Tmp5
);
9152 SDValue OutLo
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
9153 SDValue OutHi
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Amt
);
9154 SDValue OutOps
[] = { OutLo
, OutHi
};
9155 return DAG
.getMergeValues(OutOps
, dl
);
9158 SDValue
PPCTargetLowering::LowerSRA_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
9160 EVT VT
= Op
.getValueType();
9161 unsigned BitWidth
= VT
.getSizeInBits();
9162 assert(Op
.getNumOperands() == 3 &&
9163 VT
== Op
.getOperand(1).getValueType() &&
9166 // Expand into a bunch of logical ops, followed by a select_cc.
9167 SDValue Lo
= Op
.getOperand(0);
9168 SDValue Hi
= Op
.getOperand(1);
9169 SDValue Amt
= Op
.getOperand(2);
9170 EVT AmtVT
= Amt
.getValueType();
9172 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
9173 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
9174 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
9175 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
9176 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
9177 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
9178 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
9179 SDValue Tmp6
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Tmp5
);
9180 SDValue OutHi
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Amt
);
9181 SDValue OutLo
= DAG
.getSelectCC(dl
, Tmp5
, DAG
.getConstant(0, dl
, AmtVT
),
9182 Tmp4
, Tmp6
, ISD::SETLE
);
9183 SDValue OutOps
[] = { OutLo
, OutHi
};
9184 return DAG
.getMergeValues(OutOps
, dl
);
9187 SDValue
PPCTargetLowering::LowerFunnelShift(SDValue Op
,
9188 SelectionDAG
&DAG
) const {
9190 EVT VT
= Op
.getValueType();
9191 unsigned BitWidth
= VT
.getSizeInBits();
9193 bool IsFSHL
= Op
.getOpcode() == ISD::FSHL
;
9194 SDValue X
= Op
.getOperand(0);
9195 SDValue Y
= Op
.getOperand(1);
9196 SDValue Z
= Op
.getOperand(2);
9197 EVT AmtVT
= Z
.getValueType();
9199 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9200 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9201 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9202 // on PowerPC shift by BW being well defined.
9203 Z
= DAG
.getNode(ISD::AND
, dl
, AmtVT
, Z
,
9204 DAG
.getConstant(BitWidth
- 1, dl
, AmtVT
));
9206 DAG
.getNode(ISD::SUB
, dl
, AmtVT
, DAG
.getConstant(BitWidth
, dl
, AmtVT
), Z
);
9207 X
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, X
, IsFSHL
? Z
: SubZ
);
9208 Y
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Y
, IsFSHL
? SubZ
: Z
);
9209 return DAG
.getNode(ISD::OR
, dl
, VT
, X
, Y
);
9212 //===----------------------------------------------------------------------===//
9213 // Vector related lowering.
9216 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9217 /// element size of SplatSize. Cast the result to VT.
9218 static SDValue
getCanonicalConstSplat(uint64_t Val
, unsigned SplatSize
, EVT VT
,
9219 SelectionDAG
&DAG
, const SDLoc
&dl
) {
9220 static const MVT VTys
[] = { // canonical VT to use for each size.
9221 MVT::v16i8
, MVT::v8i16
, MVT::Other
, MVT::v4i32
9224 EVT ReqVT
= VT
!= MVT::Other
? VT
: VTys
[SplatSize
-1];
9226 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9227 if (Val
== ((1LLU << (SplatSize
* 8)) - 1)) {
9232 EVT CanonicalVT
= VTys
[SplatSize
-1];
9234 // Build a canonical splat for this value.
9235 return DAG
.getBitcast(ReqVT
, DAG
.getConstant(Val
, dl
, CanonicalVT
));
9238 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9239 /// specified intrinsic ID.
9240 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue Op
, SelectionDAG
&DAG
,
9241 const SDLoc
&dl
, EVT DestVT
= MVT::Other
) {
9242 if (DestVT
== MVT::Other
) DestVT
= Op
.getValueType();
9243 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
9244 DAG
.getConstant(IID
, dl
, MVT::i32
), Op
);
9247 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9248 /// specified intrinsic ID.
9249 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue LHS
, SDValue RHS
,
9250 SelectionDAG
&DAG
, const SDLoc
&dl
,
9251 EVT DestVT
= MVT::Other
) {
9252 if (DestVT
== MVT::Other
) DestVT
= LHS
.getValueType();
9253 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
9254 DAG
.getConstant(IID
, dl
, MVT::i32
), LHS
, RHS
);
9257 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9258 /// specified intrinsic ID.
9259 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue Op0
, SDValue Op1
,
9260 SDValue Op2
, SelectionDAG
&DAG
, const SDLoc
&dl
,
9261 EVT DestVT
= MVT::Other
) {
9262 if (DestVT
== MVT::Other
) DestVT
= Op0
.getValueType();
9263 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
9264 DAG
.getConstant(IID
, dl
, MVT::i32
), Op0
, Op1
, Op2
);
9267 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9268 /// amount. The result has the specified value type.
9269 static SDValue
BuildVSLDOI(SDValue LHS
, SDValue RHS
, unsigned Amt
, EVT VT
,
9270 SelectionDAG
&DAG
, const SDLoc
&dl
) {
9271 // Force LHS/RHS to be the right type.
9272 LHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, LHS
);
9273 RHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, RHS
);
9276 for (unsigned i
= 0; i
!= 16; ++i
)
9278 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, LHS
, RHS
, Ops
);
9279 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, T
);
9282 /// Do we have an efficient pattern in a .td file for this node?
9284 /// \param V - pointer to the BuildVectorSDNode being matched
9285 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9287 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9288 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9289 /// the opposite is true (expansion is beneficial) are:
9290 /// - The node builds a vector out of integers that are not 32 or 64-bits
9291 /// - The node builds a vector out of constants
9292 /// - The node is a "load-and-splat"
9293 /// In all other cases, we will choose to keep the BUILD_VECTOR.
9294 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode
*V
,
9297 EVT VecVT
= V
->getValueType(0);
9298 bool RightType
= VecVT
== MVT::v2f64
||
9299 (HasP8Vector
&& VecVT
== MVT::v4f32
) ||
9300 (HasDirectMove
&& (VecVT
== MVT::v2i64
|| VecVT
== MVT::v4i32
));
9304 bool IsSplat
= true;
9305 bool IsLoad
= false;
9306 SDValue Op0
= V
->getOperand(0);
9308 // This function is called in a block that confirms the node is not a constant
9309 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9310 // different constants.
9311 if (V
->isConstant())
9313 for (int i
= 0, e
= V
->getNumOperands(); i
< e
; ++i
) {
9314 if (V
->getOperand(i
).isUndef())
9316 // We want to expand nodes that represent load-and-splat even if the
9317 // loaded value is a floating point truncation or conversion to int.
9318 if (V
->getOperand(i
).getOpcode() == ISD::LOAD
||
9319 (V
->getOperand(i
).getOpcode() == ISD::FP_ROUND
&&
9320 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
) ||
9321 (V
->getOperand(i
).getOpcode() == ISD::FP_TO_SINT
&&
9322 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
) ||
9323 (V
->getOperand(i
).getOpcode() == ISD::FP_TO_UINT
&&
9324 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
))
9326 // If the operands are different or the input is not a load and has more
9327 // uses than just this BV node, then it isn't a splat.
9328 if (V
->getOperand(i
) != Op0
||
9329 (!IsLoad
&& !V
->isOnlyUserOf(V
->getOperand(i
).getNode())))
9332 return !(IsSplat
&& IsLoad
);
9335 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9336 SDValue
PPCTargetLowering::LowerBITCAST(SDValue Op
, SelectionDAG
&DAG
) const {
9339 SDValue Op0
= Op
->getOperand(0);
9341 if (!Subtarget
.isPPC64() || (Op0
.getOpcode() != ISD::BUILD_PAIR
) ||
9342 (Op
.getValueType() != MVT::f128
))
9345 SDValue Lo
= Op0
.getOperand(0);
9346 SDValue Hi
= Op0
.getOperand(1);
9347 if ((Lo
.getValueType() != MVT::i64
) || (Hi
.getValueType() != MVT::i64
))
9350 if (!Subtarget
.isLittleEndian())
9353 return DAG
.getNode(PPCISD::BUILD_FP128
, dl
, MVT::f128
, Lo
, Hi
);
9356 static const SDValue
*getNormalLoadInput(const SDValue
&Op
, bool &IsPermuted
) {
9357 const SDValue
*InputLoad
= &Op
;
9358 while (InputLoad
->getOpcode() == ISD::BITCAST
)
9359 InputLoad
= &InputLoad
->getOperand(0);
9360 if (InputLoad
->getOpcode() == ISD::SCALAR_TO_VECTOR
||
9361 InputLoad
->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED
) {
9362 IsPermuted
= InputLoad
->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED
;
9363 InputLoad
= &InputLoad
->getOperand(0);
9365 if (InputLoad
->getOpcode() != ISD::LOAD
)
9367 LoadSDNode
*LD
= cast
<LoadSDNode
>(*InputLoad
);
9368 return ISD::isNormalLoad(LD
) ? InputLoad
: nullptr;
9371 // Convert the argument APFloat to a single precision APFloat if there is no
9372 // loss in information during the conversion to single precision APFloat and the
9373 // resulting number is not a denormal number. Return true if successful.
9374 bool llvm::convertToNonDenormSingle(APFloat
&ArgAPFloat
) {
9375 APFloat APFloatToConvert
= ArgAPFloat
;
9376 bool LosesInfo
= true;
9377 APFloatToConvert
.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven
,
9379 bool Success
= (!LosesInfo
&& !APFloatToConvert
.isDenormal());
9381 ArgAPFloat
= APFloatToConvert
;
9385 // Bitcast the argument APInt to a double and convert it to a single precision
9386 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9387 // argument if there is no loss in information during the conversion from
9388 // double to single precision APFloat and the resulting number is not a denormal
9389 // number. Return true if successful.
9390 bool llvm::convertToNonDenormSingle(APInt
&ArgAPInt
) {
9391 double DpValue
= ArgAPInt
.bitsToDouble();
9392 APFloat
APFloatDp(DpValue
);
9393 bool Success
= convertToNonDenormSingle(APFloatDp
);
9395 ArgAPInt
= APFloatDp
.bitcastToAPInt();
9399 // Nondestructive check for convertTonNonDenormSingle.
9400 bool llvm::checkConvertToNonDenormSingle(APFloat
&ArgAPFloat
) {
9401 // Only convert if it loses info, since XXSPLTIDP should
9402 // handle the other case.
9403 APFloat APFloatToConvert
= ArgAPFloat
;
9404 bool LosesInfo
= true;
9405 APFloatToConvert
.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven
,
9408 return (!LosesInfo
&& !APFloatToConvert
.isDenormal());
9411 static bool isValidSplatLoad(const PPCSubtarget
&Subtarget
, const SDValue
&Op
,
9413 LoadSDNode
*InputNode
= dyn_cast
<LoadSDNode
>(Op
.getOperand(0));
9414 if (!InputNode
|| !Subtarget
.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode
))
9417 EVT Ty
= Op
->getValueType(0);
9418 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9419 // as we cannot handle extending loads for these types.
9420 if ((Ty
== MVT::v2f64
|| Ty
== MVT::v4f32
|| Ty
== MVT::v4i32
) &&
9421 ISD::isNON_EXTLoad(InputNode
))
9424 EVT MemVT
= InputNode
->getMemoryVT();
9425 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9426 // memory VT is the same vector element VT type.
9427 // The loads feeding into the v8i16 and v16i8 types will be extending because
9428 // scalar i8/i16 are not legal types.
9429 if ((Ty
== MVT::v8i16
|| Ty
== MVT::v16i8
) && ISD::isEXTLoad(InputNode
) &&
9430 (MemVT
== Ty
.getVectorElementType()))
9433 if (Ty
== MVT::v2i64
) {
9434 // Check the extend type, when the input type is i32, and the output vector
9436 if (MemVT
== MVT::i32
) {
9437 if (ISD::isZEXTLoad(InputNode
))
9438 Opcode
= PPCISD::ZEXT_LD_SPLAT
;
9439 if (ISD::isSEXTLoad(InputNode
))
9440 Opcode
= PPCISD::SEXT_LD_SPLAT
;
9447 // If this is a case we can't handle, return null and let the default
9448 // expansion code take care of it. If we CAN select this case, and if it
9449 // selects to a single instruction, return Op. Otherwise, if we can codegen
9450 // this case more efficiently than a constant pool load, lower it to the
9451 // sequence of ops that should be used.
9452 SDValue
PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op
,
9453 SelectionDAG
&DAG
) const {
9455 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(Op
.getNode());
9456 assert(BVN
&& "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9458 // Check if this is a splat of a constant value.
9459 APInt APSplatBits
, APSplatUndef
;
9460 unsigned SplatBitSize
;
9462 bool BVNIsConstantSplat
=
9463 BVN
->isConstantSplat(APSplatBits
, APSplatUndef
, SplatBitSize
,
9464 HasAnyUndefs
, 0, !Subtarget
.isLittleEndian());
9466 // If it is a splat of a double, check if we can shrink it to a 32 bit
9467 // non-denormal float which when converted back to double gives us the same
9468 // double. This is to exploit the XXSPLTIDP instruction.
9469 // If we lose precision, we use XXSPLTI32DX.
9470 if (BVNIsConstantSplat
&& (SplatBitSize
== 64) &&
9471 Subtarget
.hasPrefixInstrs() && Subtarget
.hasP10Vector()) {
9472 // Check the type first to short-circuit so we don't modify APSplatBits if
9473 // this block isn't executed.
9474 if ((Op
->getValueType(0) == MVT::v2f64
) &&
9475 convertToNonDenormSingle(APSplatBits
)) {
9476 SDValue SplatNode
= DAG
.getNode(
9477 PPCISD::XXSPLTI_SP_TO_DP
, dl
, MVT::v2f64
,
9478 DAG
.getTargetConstant(APSplatBits
.getZExtValue(), dl
, MVT::i32
));
9479 return DAG
.getBitcast(Op
.getValueType(), SplatNode
);
9481 // We may lose precision, so we have to use XXSPLTI32DX.
9484 (uint32_t)((APSplatBits
.getZExtValue() & 0xFFFFFFFF00000000LL
) >> 32);
9486 (uint32_t)(APSplatBits
.getZExtValue() & 0xFFFFFFFF);
9487 SDValue SplatNode
= DAG
.getUNDEF(MVT::v2i64
);
9490 // If either load is 0, then we should generate XXLXOR to set to 0.
9491 SplatNode
= DAG
.getTargetConstant(0, dl
, MVT::v2i64
);
9494 SplatNode
= DAG
.getNode(
9495 PPCISD::XXSPLTI32DX
, dl
, MVT::v2i64
, SplatNode
,
9496 DAG
.getTargetConstant(0, dl
, MVT::i32
),
9497 DAG
.getTargetConstant(Hi
, dl
, MVT::i32
));
9501 DAG
.getNode(PPCISD::XXSPLTI32DX
, dl
, MVT::v2i64
, SplatNode
,
9502 DAG
.getTargetConstant(1, dl
, MVT::i32
),
9503 DAG
.getTargetConstant(Lo
, dl
, MVT::i32
));
9505 return DAG
.getBitcast(Op
.getValueType(), SplatNode
);
9509 if (!BVNIsConstantSplat
|| SplatBitSize
> 32) {
9510 unsigned NewOpcode
= PPCISD::LD_SPLAT
;
9512 // Handle load-and-splat patterns as we have instructions that will do this
9514 if (DAG
.isSplatValue(Op
, true) &&
9515 isValidSplatLoad(Subtarget
, Op
, NewOpcode
)) {
9516 const SDValue
*InputLoad
= &Op
.getOperand(0);
9517 LoadSDNode
*LD
= cast
<LoadSDNode
>(*InputLoad
);
9519 // If the input load is an extending load, it will be an i32 -> i64
9520 // extending load and isValidSplatLoad() will update NewOpcode.
9521 unsigned MemorySize
= LD
->getMemoryVT().getScalarSizeInBits();
9522 unsigned ElementSize
=
9523 MemorySize
* ((NewOpcode
== PPCISD::LD_SPLAT
) ? 1 : 2);
9525 assert(((ElementSize
== 2 * MemorySize
)
9526 ? (NewOpcode
== PPCISD::ZEXT_LD_SPLAT
||
9527 NewOpcode
== PPCISD::SEXT_LD_SPLAT
)
9528 : (NewOpcode
== PPCISD::LD_SPLAT
)) &&
9529 "Unmatched element size and opcode!\n");
9531 // Checking for a single use of this load, we have to check for vector
9532 // width (128 bits) / ElementSize uses (since each operand of the
9533 // BUILD_VECTOR is a separate use of the value.
9534 unsigned NumUsesOfInputLD
= 128 / ElementSize
;
9535 for (SDValue BVInOp
: Op
->ops())
9536 if (BVInOp
.isUndef())
9539 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9540 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9541 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9542 // 15", but function IsValidSplatLoad() now will only return true when
9543 // the data at index 0 is not nullptr. So we will not get into trouble for
9546 // case 1 - lfiwzx/lfiwax
9547 // 1.1: load result is i32 and is sign/zero extend to i64;
9548 // 1.2: build a v2i64 vector type with above loaded value;
9549 // 1.3: the vector has only one value at index 0, others are all undef;
9550 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9551 if (NumUsesOfInputLD
== 1 &&
9552 (Op
->getValueType(0) == MVT::v2i64
&& NewOpcode
!= PPCISD::LD_SPLAT
&&
9553 !Subtarget
.isLittleEndian() && Subtarget
.hasVSX() &&
9554 Subtarget
.hasLFIWAX()))
9557 // case 2 - lxvr[hb]x
9558 // 2.1: load result is at most i16;
9559 // 2.2: build a vector with above loaded value;
9560 // 2.3: the vector has only one value at index 0, others are all undef;
9561 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9562 if (NumUsesOfInputLD
== 1 && Subtarget
.isLittleEndian() &&
9563 Subtarget
.isISA3_1() && ElementSize
<= 16)
9566 assert(NumUsesOfInputLD
> 0 && "No uses of input LD of a build_vector?");
9567 if (InputLoad
->getNode()->hasNUsesOfValue(NumUsesOfInputLD
, 0) &&
9568 Subtarget
.hasVSX()) {
9570 LD
->getChain(), // Chain
9571 LD
->getBasePtr(), // Ptr
9572 DAG
.getValueType(Op
.getValueType()) // VT
9574 SDValue LdSplt
= DAG
.getMemIntrinsicNode(
9575 NewOpcode
, dl
, DAG
.getVTList(Op
.getValueType(), MVT::Other
), Ops
,
9576 LD
->getMemoryVT(), LD
->getMemOperand());
9577 // Replace all uses of the output chain of the original load with the
9578 // output chain of the new load.
9579 DAG
.ReplaceAllUsesOfValueWith(InputLoad
->getValue(1),
9580 LdSplt
.getValue(1));
9585 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9586 // 32-bits can be lowered to VSX instructions under certain conditions.
9587 // Without VSX, there is no pattern more efficient than expanding the node.
9588 if (Subtarget
.hasVSX() && Subtarget
.isPPC64() &&
9589 haveEfficientBuildVectorPattern(BVN
, Subtarget
.hasDirectMove(),
9590 Subtarget
.hasP8Vector()))
9595 uint64_t SplatBits
= APSplatBits
.getZExtValue();
9596 uint64_t SplatUndef
= APSplatUndef
.getZExtValue();
9597 unsigned SplatSize
= SplatBitSize
/ 8;
9599 // First, handle single instruction cases.
9602 if (SplatBits
== 0) {
9603 // Canonicalize all zero vectors to be v4i32.
9604 if (Op
.getValueType() != MVT::v4i32
|| HasAnyUndefs
) {
9605 SDValue Z
= DAG
.getConstant(0, dl
, MVT::v4i32
);
9606 Op
= DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Z
);
9611 // We have XXSPLTIW for constant splats four bytes wide.
9612 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9613 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9614 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9615 // turned into a 4-byte splat of 0xABABABAB.
9616 if (Subtarget
.hasPrefixInstrs() && Subtarget
.hasP10Vector() && SplatSize
== 2)
9617 return getCanonicalConstSplat(SplatBits
| (SplatBits
<< 16), SplatSize
* 2,
9618 Op
.getValueType(), DAG
, dl
);
9620 if (Subtarget
.hasPrefixInstrs() && Subtarget
.hasP10Vector() && SplatSize
== 4)
9621 return getCanonicalConstSplat(SplatBits
, SplatSize
, Op
.getValueType(), DAG
,
9624 // We have XXSPLTIB for constant splats one byte wide.
9625 if (Subtarget
.hasP9Vector() && SplatSize
== 1)
9626 return getCanonicalConstSplat(SplatBits
, SplatSize
, Op
.getValueType(), DAG
,
9629 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9630 int32_t SextVal
= (int32_t(SplatBits
<< (32-SplatBitSize
)) >>
9632 if (SextVal
>= -16 && SextVal
<= 15)
9633 return getCanonicalConstSplat(SextVal
, SplatSize
, Op
.getValueType(), DAG
,
9636 // Two instruction sequences.
9638 // If this value is in the range [-32,30] and is even, use:
9639 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9640 // If this value is in the range [17,31] and is odd, use:
9641 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9642 // If this value is in the range [-31,-17] and is odd, use:
9643 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9644 // Note the last two are three-instruction sequences.
9645 if (SextVal
>= -32 && SextVal
<= 31) {
9646 // To avoid having these optimizations undone by constant folding,
9647 // we convert to a pseudo that will be expanded later into one of
9649 SDValue Elt
= DAG
.getConstant(SextVal
, dl
, MVT::i32
);
9650 EVT VT
= (SplatSize
== 1 ? MVT::v16i8
:
9651 (SplatSize
== 2 ? MVT::v8i16
: MVT::v4i32
));
9652 SDValue EltSize
= DAG
.getConstant(SplatSize
, dl
, MVT::i32
);
9653 SDValue RetVal
= DAG
.getNode(PPCISD::VADD_SPLAT
, dl
, VT
, Elt
, EltSize
);
9654 if (VT
== Op
.getValueType())
9657 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), RetVal
);
9660 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9661 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9663 if (SplatSize
== 4 && SplatBits
== (0x7FFFFFFF&~SplatUndef
)) {
9664 // Make -1 and vspltisw -1:
9665 SDValue OnesV
= getCanonicalConstSplat(-1, 4, MVT::v4i32
, DAG
, dl
);
9667 // Make the VSLW intrinsic, computing 0x8000_0000.
9668 SDValue Res
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, OnesV
,
9671 // xor by OnesV to invert it.
9672 Res
= DAG
.getNode(ISD::XOR
, dl
, MVT::v4i32
, Res
, OnesV
);
9673 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
9676 // Check to see if this is a wide variety of vsplti*, binop self cases.
9677 static const signed char SplatCsts
[] = {
9678 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9679 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9682 for (unsigned idx
= 0; idx
< std::size(SplatCsts
); ++idx
) {
9683 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9684 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9685 int i
= SplatCsts
[idx
];
9687 // Figure out what shift amount will be used by altivec if shifted by i in
9689 unsigned TypeShiftAmt
= i
& (SplatBitSize
-1);
9691 // vsplti + shl self.
9692 if (SextVal
== (int)((unsigned)i
<< TypeShiftAmt
)) {
9693 SDValue Res
= getCanonicalConstSplat(i
, SplatSize
, MVT::Other
, DAG
, dl
);
9694 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
9695 Intrinsic::ppc_altivec_vslb
, Intrinsic::ppc_altivec_vslh
, 0,
9696 Intrinsic::ppc_altivec_vslw
9698 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
9699 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
9702 // vsplti + srl self.
9703 if (SextVal
== (int)((unsigned)i
>> TypeShiftAmt
)) {
9704 SDValue Res
= getCanonicalConstSplat(i
, SplatSize
, MVT::Other
, DAG
, dl
);
9705 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
9706 Intrinsic::ppc_altivec_vsrb
, Intrinsic::ppc_altivec_vsrh
, 0,
9707 Intrinsic::ppc_altivec_vsrw
9709 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
9710 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
9713 // vsplti + rol self.
9714 if (SextVal
== (int)(((unsigned)i
<< TypeShiftAmt
) |
9715 ((unsigned)i
>> (SplatBitSize
-TypeShiftAmt
)))) {
9716 SDValue Res
= getCanonicalConstSplat(i
, SplatSize
, MVT::Other
, DAG
, dl
);
9717 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
9718 Intrinsic::ppc_altivec_vrlb
, Intrinsic::ppc_altivec_vrlh
, 0,
9719 Intrinsic::ppc_altivec_vrlw
9721 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
9722 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
9725 // t = vsplti c, result = vsldoi t, t, 1
9726 if (SextVal
== (int)(((unsigned)i
<< 8) | (i
< 0 ? 0xFF : 0))) {
9727 SDValue T
= getCanonicalConstSplat(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
9728 unsigned Amt
= Subtarget
.isLittleEndian() ? 15 : 1;
9729 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
9731 // t = vsplti c, result = vsldoi t, t, 2
9732 if (SextVal
== (int)(((unsigned)i
<< 16) | (i
< 0 ? 0xFFFF : 0))) {
9733 SDValue T
= getCanonicalConstSplat(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
9734 unsigned Amt
= Subtarget
.isLittleEndian() ? 14 : 2;
9735 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
9737 // t = vsplti c, result = vsldoi t, t, 3
9738 if (SextVal
== (int)(((unsigned)i
<< 24) | (i
< 0 ? 0xFFFFFF : 0))) {
9739 SDValue T
= getCanonicalConstSplat(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
9740 unsigned Amt
= Subtarget
.isLittleEndian() ? 13 : 3;
9741 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
9748 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9749 /// the specified operations to build the shuffle.
9750 static SDValue
GeneratePerfectShuffle(unsigned PFEntry
, SDValue LHS
,
9751 SDValue RHS
, SelectionDAG
&DAG
,
9753 unsigned OpNum
= (PFEntry
>> 26) & 0x0F;
9754 unsigned LHSID
= (PFEntry
>> 13) & ((1 << 13)-1);
9755 unsigned RHSID
= (PFEntry
>> 0) & ((1 << 13)-1);
9758 OP_COPY
= 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9770 if (OpNum
== OP_COPY
) {
9771 if (LHSID
== (1*9+2)*9+3) return LHS
;
9772 assert(LHSID
== ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9776 SDValue OpLHS
, OpRHS
;
9777 OpLHS
= GeneratePerfectShuffle(PerfectShuffleTable
[LHSID
], LHS
, RHS
, DAG
, dl
);
9778 OpRHS
= GeneratePerfectShuffle(PerfectShuffleTable
[RHSID
], LHS
, RHS
, DAG
, dl
);
9782 default: llvm_unreachable("Unknown i32 permute!");
9784 ShufIdxs
[ 0] = 0; ShufIdxs
[ 1] = 1; ShufIdxs
[ 2] = 2; ShufIdxs
[ 3] = 3;
9785 ShufIdxs
[ 4] = 16; ShufIdxs
[ 5] = 17; ShufIdxs
[ 6] = 18; ShufIdxs
[ 7] = 19;
9786 ShufIdxs
[ 8] = 4; ShufIdxs
[ 9] = 5; ShufIdxs
[10] = 6; ShufIdxs
[11] = 7;
9787 ShufIdxs
[12] = 20; ShufIdxs
[13] = 21; ShufIdxs
[14] = 22; ShufIdxs
[15] = 23;
9790 ShufIdxs
[ 0] = 8; ShufIdxs
[ 1] = 9; ShufIdxs
[ 2] = 10; ShufIdxs
[ 3] = 11;
9791 ShufIdxs
[ 4] = 24; ShufIdxs
[ 5] = 25; ShufIdxs
[ 6] = 26; ShufIdxs
[ 7] = 27;
9792 ShufIdxs
[ 8] = 12; ShufIdxs
[ 9] = 13; ShufIdxs
[10] = 14; ShufIdxs
[11] = 15;
9793 ShufIdxs
[12] = 28; ShufIdxs
[13] = 29; ShufIdxs
[14] = 30; ShufIdxs
[15] = 31;
9796 for (unsigned i
= 0; i
!= 16; ++i
)
9797 ShufIdxs
[i
] = (i
&3)+0;
9800 for (unsigned i
= 0; i
!= 16; ++i
)
9801 ShufIdxs
[i
] = (i
&3)+4;
9804 for (unsigned i
= 0; i
!= 16; ++i
)
9805 ShufIdxs
[i
] = (i
&3)+8;
9808 for (unsigned i
= 0; i
!= 16; ++i
)
9809 ShufIdxs
[i
] = (i
&3)+12;
9812 return BuildVSLDOI(OpLHS
, OpRHS
, 4, OpLHS
.getValueType(), DAG
, dl
);
9814 return BuildVSLDOI(OpLHS
, OpRHS
, 8, OpLHS
.getValueType(), DAG
, dl
);
9816 return BuildVSLDOI(OpLHS
, OpRHS
, 12, OpLHS
.getValueType(), DAG
, dl
);
9818 EVT VT
= OpLHS
.getValueType();
9819 OpLHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OpLHS
);
9820 OpRHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OpRHS
);
9821 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, OpLHS
, OpRHS
, ShufIdxs
);
9822 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, T
);
9825 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9826 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9828 SDValue
PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode
*N
,
9829 SelectionDAG
&DAG
) const {
9830 const unsigned BytesInVector
= 16;
9831 bool IsLE
= Subtarget
.isLittleEndian();
9833 SDValue V1
= N
->getOperand(0);
9834 SDValue V2
= N
->getOperand(1);
9835 unsigned ShiftElts
= 0, InsertAtByte
= 0;
9838 // Shifts required to get the byte we want at element 7.
9839 unsigned LittleEndianShifts
[] = {8, 7, 6, 5, 4, 3, 2, 1,
9840 0, 15, 14, 13, 12, 11, 10, 9};
9841 unsigned BigEndianShifts
[] = {9, 10, 11, 12, 13, 14, 15, 0,
9842 1, 2, 3, 4, 5, 6, 7, 8};
9844 ArrayRef
<int> Mask
= N
->getMask();
9845 int OriginalOrder
[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9847 // For each mask element, find out if we're just inserting something
9848 // from V2 into V1 or vice versa.
9849 // Possible permutations inserting an element from V2 into V1:
9850 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9851 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9853 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9854 // Inserting from V1 into V2 will be similar, except mask range will be
9857 bool FoundCandidate
= false;
9858 // If both vector operands for the shuffle are the same vector, the mask
9859 // will contain only elements from the first one and the second one will be
9861 unsigned VINSERTBSrcElem
= IsLE
? 8 : 7;
9862 // Go through the mask of half-words to find an element that's being moved
9863 // from one vector to the other.
9864 for (unsigned i
= 0; i
< BytesInVector
; ++i
) {
9865 unsigned CurrentElement
= Mask
[i
];
9866 // If 2nd operand is undefined, we should only look for element 7 in the
9868 if (V2
.isUndef() && CurrentElement
!= VINSERTBSrcElem
)
9871 bool OtherElementsInOrder
= true;
9872 // Examine the other elements in the Mask to see if they're in original
9874 for (unsigned j
= 0; j
< BytesInVector
; ++j
) {
9877 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9878 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9879 // in which we always assume we're always picking from the 1st operand.
9881 (!V2
.isUndef() && CurrentElement
< BytesInVector
) ? BytesInVector
: 0;
9882 if (Mask
[j
] != OriginalOrder
[j
] + MaskOffset
) {
9883 OtherElementsInOrder
= false;
9887 // If other elements are in original order, we record the number of shifts
9888 // we need to get the element we want into element 7. Also record which byte
9889 // in the vector we should insert into.
9890 if (OtherElementsInOrder
) {
9891 // If 2nd operand is undefined, we assume no shifts and no swapping.
9896 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9897 ShiftElts
= IsLE
? LittleEndianShifts
[CurrentElement
& 0xF]
9898 : BigEndianShifts
[CurrentElement
& 0xF];
9899 Swap
= CurrentElement
< BytesInVector
;
9901 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) : i
;
9902 FoundCandidate
= true;
9907 if (!FoundCandidate
)
9910 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9911 // optionally with VECSHL if shift is required.
9917 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v16i8
, V2
, V2
,
9918 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
9919 return DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v16i8
, V1
, Shl
,
9920 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
9922 return DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v16i8
, V1
, V2
,
9923 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
9926 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9927 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9929 SDValue
PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode
*N
,
9930 SelectionDAG
&DAG
) const {
9931 const unsigned NumHalfWords
= 8;
9932 const unsigned BytesInVector
= NumHalfWords
* 2;
9933 // Check that the shuffle is on half-words.
9934 if (!isNByteElemShuffleMask(N
, 2, 1))
9937 bool IsLE
= Subtarget
.isLittleEndian();
9939 SDValue V1
= N
->getOperand(0);
9940 SDValue V2
= N
->getOperand(1);
9941 unsigned ShiftElts
= 0, InsertAtByte
= 0;
9944 // Shifts required to get the half-word we want at element 3.
9945 unsigned LittleEndianShifts
[] = {4, 3, 2, 1, 0, 7, 6, 5};
9946 unsigned BigEndianShifts
[] = {5, 6, 7, 0, 1, 2, 3, 4};
9949 uint32_t OriginalOrderLow
= 0x1234567;
9950 uint32_t OriginalOrderHigh
= 0x89ABCDEF;
9951 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9952 // 32-bit space, only need 4-bit nibbles per element.
9953 for (unsigned i
= 0; i
< NumHalfWords
; ++i
) {
9954 unsigned MaskShift
= (NumHalfWords
- 1 - i
) * 4;
9955 Mask
|= ((uint32_t)(N
->getMaskElt(i
* 2) / 2) << MaskShift
);
9958 // For each mask element, find out if we're just inserting something
9959 // from V2 into V1 or vice versa. Possible permutations inserting an element
9961 // X, 1, 2, 3, 4, 5, 6, 7
9962 // 0, X, 2, 3, 4, 5, 6, 7
9963 // 0, 1, X, 3, 4, 5, 6, 7
9964 // 0, 1, 2, X, 4, 5, 6, 7
9965 // 0, 1, 2, 3, X, 5, 6, 7
9966 // 0, 1, 2, 3, 4, X, 6, 7
9967 // 0, 1, 2, 3, 4, 5, X, 7
9968 // 0, 1, 2, 3, 4, 5, 6, X
9969 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9971 bool FoundCandidate
= false;
9972 // Go through the mask of half-words to find an element that's being moved
9973 // from one vector to the other.
9974 for (unsigned i
= 0; i
< NumHalfWords
; ++i
) {
9975 unsigned MaskShift
= (NumHalfWords
- 1 - i
) * 4;
9976 uint32_t MaskOneElt
= (Mask
>> MaskShift
) & 0xF;
9977 uint32_t MaskOtherElts
= ~(0xF << MaskShift
);
9978 uint32_t TargetOrder
= 0x0;
9980 // If both vector operands for the shuffle are the same vector, the mask
9981 // will contain only elements from the first one and the second one will be
9985 unsigned VINSERTHSrcElem
= IsLE
? 4 : 3;
9986 TargetOrder
= OriginalOrderLow
;
9988 // Skip if not the correct element or mask of other elements don't equal
9989 // to our expected order.
9990 if (MaskOneElt
== VINSERTHSrcElem
&&
9991 (Mask
& MaskOtherElts
) == (TargetOrder
& MaskOtherElts
)) {
9992 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) * 2 : i
* 2;
9993 FoundCandidate
= true;
9996 } else { // If both operands are defined.
9997 // Target order is [8,15] if the current mask is between [0,7].
9999 (MaskOneElt
< NumHalfWords
) ? OriginalOrderHigh
: OriginalOrderLow
;
10000 // Skip if mask of other elements don't equal our expected order.
10001 if ((Mask
& MaskOtherElts
) == (TargetOrder
& MaskOtherElts
)) {
10002 // We only need the last 3 bits for the number of shifts.
10003 ShiftElts
= IsLE
? LittleEndianShifts
[MaskOneElt
& 0x7]
10004 : BigEndianShifts
[MaskOneElt
& 0x7];
10005 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) * 2 : i
* 2;
10006 Swap
= MaskOneElt
< NumHalfWords
;
10007 FoundCandidate
= true;
10013 if (!FoundCandidate
)
10016 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10017 // optionally with VECSHL if shift is required.
10022 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V1
);
10024 // Double ShiftElts because we're left shifting on v16i8 type.
10025 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v16i8
, V2
, V2
,
10026 DAG
.getConstant(2 * ShiftElts
, dl
, MVT::i32
));
10027 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, Shl
);
10028 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v8i16
, Conv1
, Conv2
,
10029 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
10030 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
10032 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V2
);
10033 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v8i16
, Conv1
, Conv2
,
10034 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
10035 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
10038 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10039 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10040 /// return the default SDValue.
10041 SDValue
PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode
*SVN
,
10042 SelectionDAG
&DAG
) const {
10043 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10044 // to v16i8. Peek through the bitcasts to get the actual operands.
10045 SDValue LHS
= peekThroughBitcasts(SVN
->getOperand(0));
10046 SDValue RHS
= peekThroughBitcasts(SVN
->getOperand(1));
10048 auto ShuffleMask
= SVN
->getMask();
10049 SDValue
VecShuffle(SVN
, 0);
10052 // Check that we have a four byte shuffle.
10053 if (!isNByteElemShuffleMask(SVN
, 4, 1))
10056 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10057 if (RHS
->getOpcode() != ISD::BUILD_VECTOR
) {
10058 std::swap(LHS
, RHS
);
10059 VecShuffle
= peekThroughBitcasts(DAG
.getCommutedVectorShuffle(*SVN
));
10060 ShuffleVectorSDNode
*CommutedSV
= dyn_cast
<ShuffleVectorSDNode
>(VecShuffle
);
10063 ShuffleMask
= CommutedSV
->getMask();
10066 // Ensure that the RHS is a vector of constants.
10067 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(RHS
.getNode());
10071 // Check if RHS is a splat of 4-bytes (or smaller).
10072 APInt APSplatValue
, APSplatUndef
;
10073 unsigned SplatBitSize
;
10075 if (!BVN
->isConstantSplat(APSplatValue
, APSplatUndef
, SplatBitSize
,
10076 HasAnyUndefs
, 0, !Subtarget
.isLittleEndian()) ||
10080 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10081 // The instruction splats a constant C into two words of the source vector
10082 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10083 // Thus we check that the shuffle mask is the equivalent of
10084 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10085 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10086 // within each word are consecutive, so we only need to check the first byte.
10088 bool IsLE
= Subtarget
.isLittleEndian();
10089 if ((ShuffleMask
[0] == 0 && ShuffleMask
[8] == 8) &&
10090 (ShuffleMask
[4] % 4 == 0 && ShuffleMask
[12] % 4 == 0 &&
10091 ShuffleMask
[4] > 15 && ShuffleMask
[12] > 15))
10092 Index
= DAG
.getTargetConstant(IsLE
? 0 : 1, DL
, MVT::i32
);
10093 else if ((ShuffleMask
[4] == 4 && ShuffleMask
[12] == 12) &&
10094 (ShuffleMask
[0] % 4 == 0 && ShuffleMask
[8] % 4 == 0 &&
10095 ShuffleMask
[0] > 15 && ShuffleMask
[8] > 15))
10096 Index
= DAG
.getTargetConstant(IsLE
? 1 : 0, DL
, MVT::i32
);
10100 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10101 // for XXSPLTI32DX.
10102 unsigned SplatVal
= APSplatValue
.getZExtValue();
10103 for (; SplatBitSize
< 32; SplatBitSize
<<= 1)
10104 SplatVal
|= (SplatVal
<< SplatBitSize
);
10106 SDValue SplatNode
= DAG
.getNode(
10107 PPCISD::XXSPLTI32DX
, DL
, MVT::v2i64
, DAG
.getBitcast(MVT::v2i64
, LHS
),
10108 Index
, DAG
.getTargetConstant(SplatVal
, DL
, MVT::i32
));
10109 return DAG
.getNode(ISD::BITCAST
, DL
, MVT::v16i8
, SplatNode
);
10112 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10113 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10114 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10115 /// i.e (or (shl x, C1), (srl x, 128-C1)).
10116 SDValue
PPCTargetLowering::LowerROTL(SDValue Op
, SelectionDAG
&DAG
) const {
10117 assert(Op
.getOpcode() == ISD::ROTL
&& "Should only be called for ISD::ROTL");
10118 assert(Op
.getValueType() == MVT::v1i128
&&
10119 "Only set v1i128 as custom, other type shouldn't reach here!");
10121 SDValue N0
= peekThroughBitcasts(Op
.getOperand(0));
10122 SDValue N1
= peekThroughBitcasts(Op
.getOperand(1));
10123 unsigned SHLAmt
= N1
.getConstantOperandVal(0);
10124 if (SHLAmt
% 8 == 0) {
10125 std::array
<int, 16> Mask
;
10126 std::iota(Mask
.begin(), Mask
.end(), 0);
10127 std::rotate(Mask
.begin(), Mask
.begin() + SHLAmt
/ 8, Mask
.end());
10128 if (SDValue Shuffle
=
10129 DAG
.getVectorShuffle(MVT::v16i8
, dl
, DAG
.getBitcast(MVT::v16i8
, N0
),
10130 DAG
.getUNDEF(MVT::v16i8
), Mask
))
10131 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v1i128
, Shuffle
);
10133 SDValue ArgVal
= DAG
.getBitcast(MVT::i128
, N0
);
10134 SDValue SHLOp
= DAG
.getNode(ISD::SHL
, dl
, MVT::i128
, ArgVal
,
10135 DAG
.getConstant(SHLAmt
, dl
, MVT::i32
));
10136 SDValue SRLOp
= DAG
.getNode(ISD::SRL
, dl
, MVT::i128
, ArgVal
,
10137 DAG
.getConstant(128 - SHLAmt
, dl
, MVT::i32
));
10138 SDValue OROp
= DAG
.getNode(ISD::OR
, dl
, MVT::i128
, SHLOp
, SRLOp
);
10139 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v1i128
, OROp
);
10142 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10143 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
10144 /// return the code it can be lowered into. Worst case, it can always be
10145 /// lowered into a vperm.
10146 SDValue
PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
,
10147 SelectionDAG
&DAG
) const {
10149 SDValue V1
= Op
.getOperand(0);
10150 SDValue V2
= Op
.getOperand(1);
10151 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(Op
);
10153 // Any nodes that were combined in the target-independent combiner prior
10154 // to vector legalization will not be sent to the target combine. Try to
10155 // combine it here.
10156 if (SDValue NewShuffle
= combineVectorShuffle(SVOp
, DAG
)) {
10157 if (!isa
<ShuffleVectorSDNode
>(NewShuffle
))
10160 SVOp
= cast
<ShuffleVectorSDNode
>(Op
);
10161 V1
= Op
.getOperand(0);
10162 V2
= Op
.getOperand(1);
10164 EVT VT
= Op
.getValueType();
10165 bool isLittleEndian
= Subtarget
.isLittleEndian();
10167 unsigned ShiftElts
, InsertAtByte
;
10170 // If this is a load-and-splat, we can do that with a single instruction
10171 // in some cases. However if the load has multiple uses, we don't want to
10172 // combine it because that will just produce multiple loads.
10173 bool IsPermutedLoad
= false;
10174 const SDValue
*InputLoad
= getNormalLoadInput(V1
, IsPermutedLoad
);
10175 if (InputLoad
&& Subtarget
.hasVSX() && V2
.isUndef() &&
10176 (PPC::isSplatShuffleMask(SVOp
, 4) || PPC::isSplatShuffleMask(SVOp
, 8)) &&
10177 InputLoad
->hasOneUse()) {
10178 bool IsFourByte
= PPC::isSplatShuffleMask(SVOp
, 4);
10180 PPC::getSplatIdxForPPCMnemonics(SVOp
, IsFourByte
? 4 : 8, DAG
);
10182 // The splat index for permuted loads will be in the left half of the vector
10183 // which is strictly wider than the loaded value by 8 bytes. So we need to
10184 // adjust the splat index to point to the correct address in memory.
10185 if (IsPermutedLoad
) {
10186 assert((isLittleEndian
|| IsFourByte
) &&
10187 "Unexpected size for permuted load on big endian target");
10188 SplatIdx
+= IsFourByte
? 2 : 1;
10189 assert((SplatIdx
< (IsFourByte
? 4 : 2)) &&
10190 "Splat of a value outside of the loaded memory");
10193 LoadSDNode
*LD
= cast
<LoadSDNode
>(*InputLoad
);
10194 // For 4-byte load-and-splat, we need Power9.
10195 if ((IsFourByte
&& Subtarget
.hasP9Vector()) || !IsFourByte
) {
10196 uint64_t Offset
= 0;
10198 Offset
= isLittleEndian
? (3 - SplatIdx
) * 4 : SplatIdx
* 4;
10200 Offset
= isLittleEndian
? (1 - SplatIdx
) * 8 : SplatIdx
* 8;
10202 // If the width of the load is the same as the width of the splat,
10203 // loading with an offset would load the wrong memory.
10204 if (LD
->getValueType(0).getSizeInBits() == (IsFourByte
? 32 : 64))
10207 SDValue BasePtr
= LD
->getBasePtr();
10209 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(DAG
.getDataLayout()),
10210 BasePtr
, DAG
.getIntPtrConstant(Offset
, dl
));
10212 LD
->getChain(), // Chain
10213 BasePtr
, // BasePtr
10214 DAG
.getValueType(Op
.getValueType()) // VT
10217 DAG
.getVTList(IsFourByte
? MVT::v4i32
: MVT::v2i64
, MVT::Other
);
10219 DAG
.getMemIntrinsicNode(PPCISD::LD_SPLAT
, dl
, VTL
,
10220 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
10221 DAG
.ReplaceAllUsesOfValueWith(InputLoad
->getValue(1), LdSplt
.getValue(1));
10222 if (LdSplt
.getValueType() != SVOp
->getValueType(0))
10223 LdSplt
= DAG
.getBitcast(SVOp
->getValueType(0), LdSplt
);
10228 // All v2i64 and v2f64 shuffles are legal
10229 if (VT
== MVT::v2i64
|| VT
== MVT::v2f64
)
10232 if (Subtarget
.hasP9Vector() &&
10233 PPC::isXXINSERTWMask(SVOp
, ShiftElts
, InsertAtByte
, Swap
,
10239 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
10240 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V2
);
10242 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v4i32
, Conv2
, Conv2
,
10243 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
10244 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v4i32
, Conv1
, Shl
,
10245 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
10246 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
10248 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v4i32
, Conv1
, Conv2
,
10249 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
10250 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
10253 if (Subtarget
.hasPrefixInstrs() && Subtarget
.hasP10Vector()) {
10254 SDValue SplatInsertNode
;
10255 if ((SplatInsertNode
= lowerToXXSPLTI32DX(SVOp
, DAG
)))
10256 return SplatInsertNode
;
10259 if (Subtarget
.hasP9Altivec()) {
10260 SDValue NewISDNode
;
10261 if ((NewISDNode
= lowerToVINSERTH(SVOp
, DAG
)))
10264 if ((NewISDNode
= lowerToVINSERTB(SVOp
, DAG
)))
10268 if (Subtarget
.hasVSX() &&
10269 PPC::isXXSLDWIShuffleMask(SVOp
, ShiftElts
, Swap
, isLittleEndian
)) {
10272 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
10274 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V2
.isUndef() ? V1
: V2
);
10276 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v4i32
, Conv1
, Conv2
,
10277 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
10278 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Shl
);
10281 if (Subtarget
.hasVSX() &&
10282 PPC::isXXPERMDIShuffleMask(SVOp
, ShiftElts
, Swap
, isLittleEndian
)) {
10285 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V1
);
10287 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V2
.isUndef() ? V1
: V2
);
10289 SDValue PermDI
= DAG
.getNode(PPCISD::XXPERMDI
, dl
, MVT::v2i64
, Conv1
, Conv2
,
10290 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
10291 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, PermDI
);
10294 if (Subtarget
.hasP9Vector()) {
10295 if (PPC::isXXBRHShuffleMask(SVOp
)) {
10296 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V1
);
10297 SDValue ReveHWord
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::v8i16
, Conv
);
10298 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveHWord
);
10299 } else if (PPC::isXXBRWShuffleMask(SVOp
)) {
10300 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
10301 SDValue ReveWord
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::v4i32
, Conv
);
10302 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveWord
);
10303 } else if (PPC::isXXBRDShuffleMask(SVOp
)) {
10304 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V1
);
10305 SDValue ReveDWord
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::v2i64
, Conv
);
10306 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveDWord
);
10307 } else if (PPC::isXXBRQShuffleMask(SVOp
)) {
10308 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v1i128
, V1
);
10309 SDValue ReveQWord
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::v1i128
, Conv
);
10310 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveQWord
);
10314 if (Subtarget
.hasVSX()) {
10315 if (V2
.isUndef() && PPC::isSplatShuffleMask(SVOp
, 4)) {
10316 int SplatIdx
= PPC::getSplatIdxForPPCMnemonics(SVOp
, 4, DAG
);
10318 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
10319 SDValue Splat
= DAG
.getNode(PPCISD::XXSPLT
, dl
, MVT::v4i32
, Conv
,
10320 DAG
.getConstant(SplatIdx
, dl
, MVT::i32
));
10321 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Splat
);
10324 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10325 if (V2
.isUndef() && PPC::isVSLDOIShuffleMask(SVOp
, 1, DAG
) == 8) {
10326 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2f64
, V1
);
10327 SDValue Swap
= DAG
.getNode(PPCISD::SWAP_NO_CHAIN
, dl
, MVT::v2f64
, Conv
);
10328 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Swap
);
10332 // Cases that are handled by instructions that take permute immediates
10333 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10334 // selected by the instruction selector.
10335 if (V2
.isUndef()) {
10336 if (PPC::isSplatShuffleMask(SVOp
, 1) ||
10337 PPC::isSplatShuffleMask(SVOp
, 2) ||
10338 PPC::isSplatShuffleMask(SVOp
, 4) ||
10339 PPC::isVPKUWUMShuffleMask(SVOp
, 1, DAG
) ||
10340 PPC::isVPKUHUMShuffleMask(SVOp
, 1, DAG
) ||
10341 PPC::isVSLDOIShuffleMask(SVOp
, 1, DAG
) != -1 ||
10342 PPC::isVMRGLShuffleMask(SVOp
, 1, 1, DAG
) ||
10343 PPC::isVMRGLShuffleMask(SVOp
, 2, 1, DAG
) ||
10344 PPC::isVMRGLShuffleMask(SVOp
, 4, 1, DAG
) ||
10345 PPC::isVMRGHShuffleMask(SVOp
, 1, 1, DAG
) ||
10346 PPC::isVMRGHShuffleMask(SVOp
, 2, 1, DAG
) ||
10347 PPC::isVMRGHShuffleMask(SVOp
, 4, 1, DAG
) ||
10348 (Subtarget
.hasP8Altivec() && (
10349 PPC::isVPKUDUMShuffleMask(SVOp
, 1, DAG
) ||
10350 PPC::isVMRGEOShuffleMask(SVOp
, true, 1, DAG
) ||
10351 PPC::isVMRGEOShuffleMask(SVOp
, false, 1, DAG
)))) {
10356 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10357 // and produce a fixed permutation. If any of these match, do not lower to
10359 unsigned int ShuffleKind
= isLittleEndian
? 2 : 0;
10360 if (PPC::isVPKUWUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
10361 PPC::isVPKUHUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
10362 PPC::isVSLDOIShuffleMask(SVOp
, ShuffleKind
, DAG
) != -1 ||
10363 PPC::isVMRGLShuffleMask(SVOp
, 1, ShuffleKind
, DAG
) ||
10364 PPC::isVMRGLShuffleMask(SVOp
, 2, ShuffleKind
, DAG
) ||
10365 PPC::isVMRGLShuffleMask(SVOp
, 4, ShuffleKind
, DAG
) ||
10366 PPC::isVMRGHShuffleMask(SVOp
, 1, ShuffleKind
, DAG
) ||
10367 PPC::isVMRGHShuffleMask(SVOp
, 2, ShuffleKind
, DAG
) ||
10368 PPC::isVMRGHShuffleMask(SVOp
, 4, ShuffleKind
, DAG
) ||
10369 (Subtarget
.hasP8Altivec() && (
10370 PPC::isVPKUDUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
10371 PPC::isVMRGEOShuffleMask(SVOp
, true, ShuffleKind
, DAG
) ||
10372 PPC::isVMRGEOShuffleMask(SVOp
, false, ShuffleKind
, DAG
))))
10375 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10376 // perfect shuffle table to emit an optimal matching sequence.
10377 ArrayRef
<int> PermMask
= SVOp
->getMask();
10379 if (!DisablePerfectShuffle
&& !isLittleEndian
) {
10380 unsigned PFIndexes
[4];
10381 bool isFourElementShuffle
= true;
10382 for (unsigned i
= 0; i
!= 4 && isFourElementShuffle
;
10383 ++i
) { // Element number
10384 unsigned EltNo
= 8; // Start out undef.
10385 for (unsigned j
= 0; j
!= 4; ++j
) { // Intra-element byte.
10386 if (PermMask
[i
* 4 + j
] < 0)
10387 continue; // Undef, ignore it.
10389 unsigned ByteSource
= PermMask
[i
* 4 + j
];
10390 if ((ByteSource
& 3) != j
) {
10391 isFourElementShuffle
= false;
10396 EltNo
= ByteSource
/ 4;
10397 } else if (EltNo
!= ByteSource
/ 4) {
10398 isFourElementShuffle
= false;
10402 PFIndexes
[i
] = EltNo
;
10405 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10406 // perfect shuffle vector to determine if it is cost effective to do this as
10407 // discrete instructions, or whether we should use a vperm.
10408 // For now, we skip this for little endian until such time as we have a
10409 // little-endian perfect shuffle table.
10410 if (isFourElementShuffle
) {
10411 // Compute the index in the perfect shuffle table.
10412 unsigned PFTableIndex
= PFIndexes
[0] * 9 * 9 * 9 + PFIndexes
[1] * 9 * 9 +
10413 PFIndexes
[2] * 9 + PFIndexes
[3];
10415 unsigned PFEntry
= PerfectShuffleTable
[PFTableIndex
];
10416 unsigned Cost
= (PFEntry
>> 30);
10418 // Determining when to avoid vperm is tricky. Many things affect the cost
10419 // of vperm, particularly how many times the perm mask needs to be
10420 // computed. For example, if the perm mask can be hoisted out of a loop or
10421 // is already used (perhaps because there are multiple permutes with the
10422 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10423 // permute mask out of the loop requires an extra register.
10425 // As a compromise, we only emit discrete instructions if the shuffle can
10426 // be generated in 3 or fewer operations. When we have loop information
10427 // available, if this block is within a loop, we should avoid using vperm
10428 // for 3-operation perms and use a constant pool load instead.
10430 return GeneratePerfectShuffle(PFEntry
, V1
, V2
, DAG
, dl
);
10434 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10435 // vector that will get spilled to the constant pool.
10436 if (V2
.isUndef()) V2
= V1
;
10438 return LowerVPERM(Op
, DAG
, PermMask
, VT
, V1
, V2
);
10441 SDValue
PPCTargetLowering::LowerVPERM(SDValue Op
, SelectionDAG
&DAG
,
10442 ArrayRef
<int> PermMask
, EVT VT
,
10443 SDValue V1
, SDValue V2
) const {
10444 unsigned Opcode
= PPCISD::VPERM
;
10445 EVT ValType
= V1
.getValueType();
10447 bool NeedSwap
= false;
10448 bool isLittleEndian
= Subtarget
.isLittleEndian();
10449 bool isPPC64
= Subtarget
.isPPC64();
10451 if (Subtarget
.hasVSX() && Subtarget
.hasP9Vector() &&
10452 (V1
->hasOneUse() || V2
->hasOneUse())) {
10453 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10454 "XXPERM instead\n");
10455 Opcode
= PPCISD::XXPERM
;
10457 // The second input to XXPERM is also an output so if the second input has
10458 // multiple uses then copying is necessary, as a result we want the
10459 // single-use operand to be used as the second input to prevent copying.
10460 if ((!isLittleEndian
&& !V2
->hasOneUse() && V1
->hasOneUse()) ||
10461 (isLittleEndian
&& !V1
->hasOneUse() && V2
->hasOneUse())) {
10463 NeedSwap
= !NeedSwap
;
10467 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10468 // that it is in input element units, not in bytes. Convert now.
10470 // For little endian, the order of the input vectors is reversed, and
10471 // the permutation mask is complemented with respect to 31. This is
10472 // necessary to produce proper semantics with the big-endian-based vperm
10474 EVT EltVT
= V1
.getValueType().getVectorElementType();
10475 unsigned BytesPerElement
= EltVT
.getSizeInBits() / 8;
10477 bool V1HasXXSWAPD
= V1
->getOperand(0)->getOpcode() == PPCISD::XXSWAPD
;
10478 bool V2HasXXSWAPD
= V2
->getOperand(0)->getOpcode() == PPCISD::XXSWAPD
;
10481 Vectors will be appended like so: [ V1 | v2 ]
10483 [ A | B | C | D ] -> [ C | D | A | B ]
10484 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10485 i.e. index of A, B += 8, and index of C, D -= 8.
10487 [ E | F | G | H ] -> [ G | H | E | F ]
10488 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10489 i.e. index of E, F += 8, index of G, H -= 8
10491 [ V1 | V2 ] -> [ V2 | V1 ]
10492 0-15 16-31 0-15 16-31
10493 i.e. index of V1 += 16, index of V2 -= 16
10496 SmallVector
<SDValue
, 16> ResultMask
;
10497 for (unsigned i
= 0, e
= VT
.getVectorNumElements(); i
!= e
; ++i
) {
10498 unsigned SrcElt
= PermMask
[i
] < 0 ? 0 : PermMask
[i
];
10500 if (V1HasXXSWAPD
) {
10503 else if (SrcElt
< 16)
10506 if (V2HasXXSWAPD
) {
10509 else if (SrcElt
> 15)
10518 for (unsigned j
= 0; j
!= BytesPerElement
; ++j
)
10519 if (isLittleEndian
)
10520 ResultMask
.push_back(
10521 DAG
.getConstant(31 - (SrcElt
* BytesPerElement
+ j
), dl
, MVT::i32
));
10523 ResultMask
.push_back(
10524 DAG
.getConstant(SrcElt
* BytesPerElement
+ j
, dl
, MVT::i32
));
10527 if (V1HasXXSWAPD
) {
10528 dl
= SDLoc(V1
->getOperand(0));
10529 V1
= V1
->getOperand(0)->getOperand(1);
10531 if (V2HasXXSWAPD
) {
10532 dl
= SDLoc(V2
->getOperand(0));
10533 V2
= V2
->getOperand(0)->getOperand(1);
10536 if (isPPC64
&& (V1HasXXSWAPD
|| V2HasXXSWAPD
)) {
10537 if (ValType
!= MVT::v2f64
)
10538 V1
= DAG
.getBitcast(MVT::v2f64
, V1
);
10539 if (V2
.getValueType() != MVT::v2f64
)
10540 V2
= DAG
.getBitcast(MVT::v2f64
, V2
);
10543 ShufflesHandledWithVPERM
++;
10544 SDValue VPermMask
= DAG
.getBuildVector(MVT::v16i8
, dl
, ResultMask
);
10546 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(Op
);
10547 if (Opcode
== PPCISD::XXPERM
) {
10548 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10550 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10553 dbgs() << "With the following permute control vector:\n";
10557 if (Opcode
== PPCISD::XXPERM
)
10558 VPermMask
= DAG
.getBitcast(MVT::v4i32
, VPermMask
);
10560 // Only need to place items backwards in LE,
10561 // the mask was properly calculated.
10562 if (isLittleEndian
)
10565 SDValue VPERMNode
=
10566 DAG
.getNode(Opcode
, dl
, V1
.getValueType(), V1
, V2
, VPermMask
);
10568 VPERMNode
= DAG
.getBitcast(ValType
, VPERMNode
);
10572 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10573 /// vector comparison. If it is, return true and fill in Opc/isDot with
10574 /// information about the intrinsic.
10575 static bool getVectorCompareInfo(SDValue Intrin
, int &CompareOpc
,
10576 bool &isDot
, const PPCSubtarget
&Subtarget
) {
10577 unsigned IntrinsicID
= Intrin
.getConstantOperandVal(0);
10580 switch (IntrinsicID
) {
10583 // Comparison predicates.
10584 case Intrinsic::ppc_altivec_vcmpbfp_p
:
10588 case Intrinsic::ppc_altivec_vcmpeqfp_p
:
10592 case Intrinsic::ppc_altivec_vcmpequb_p
:
10596 case Intrinsic::ppc_altivec_vcmpequh_p
:
10600 case Intrinsic::ppc_altivec_vcmpequw_p
:
10604 case Intrinsic::ppc_altivec_vcmpequd_p
:
10605 if (Subtarget
.hasVSX() || Subtarget
.hasP8Altivec()) {
10611 case Intrinsic::ppc_altivec_vcmpneb_p
:
10612 case Intrinsic::ppc_altivec_vcmpneh_p
:
10613 case Intrinsic::ppc_altivec_vcmpnew_p
:
10614 case Intrinsic::ppc_altivec_vcmpnezb_p
:
10615 case Intrinsic::ppc_altivec_vcmpnezh_p
:
10616 case Intrinsic::ppc_altivec_vcmpnezw_p
:
10617 if (Subtarget
.hasP9Altivec()) {
10618 switch (IntrinsicID
) {
10620 llvm_unreachable("Unknown comparison intrinsic.");
10621 case Intrinsic::ppc_altivec_vcmpneb_p
:
10624 case Intrinsic::ppc_altivec_vcmpneh_p
:
10627 case Intrinsic::ppc_altivec_vcmpnew_p
:
10630 case Intrinsic::ppc_altivec_vcmpnezb_p
:
10633 case Intrinsic::ppc_altivec_vcmpnezh_p
:
10636 case Intrinsic::ppc_altivec_vcmpnezw_p
:
10644 case Intrinsic::ppc_altivec_vcmpgefp_p
:
10648 case Intrinsic::ppc_altivec_vcmpgtfp_p
:
10652 case Intrinsic::ppc_altivec_vcmpgtsb_p
:
10656 case Intrinsic::ppc_altivec_vcmpgtsh_p
:
10660 case Intrinsic::ppc_altivec_vcmpgtsw_p
:
10664 case Intrinsic::ppc_altivec_vcmpgtsd_p
:
10665 if (Subtarget
.hasVSX() || Subtarget
.hasP8Altivec()) {
10671 case Intrinsic::ppc_altivec_vcmpgtub_p
:
10675 case Intrinsic::ppc_altivec_vcmpgtuh_p
:
10679 case Intrinsic::ppc_altivec_vcmpgtuw_p
:
10683 case Intrinsic::ppc_altivec_vcmpgtud_p
:
10684 if (Subtarget
.hasVSX() || Subtarget
.hasP8Altivec()) {
10691 case Intrinsic::ppc_altivec_vcmpequq
:
10692 case Intrinsic::ppc_altivec_vcmpgtsq
:
10693 case Intrinsic::ppc_altivec_vcmpgtuq
:
10694 if (!Subtarget
.isISA3_1())
10696 switch (IntrinsicID
) {
10698 llvm_unreachable("Unknown comparison intrinsic.");
10699 case Intrinsic::ppc_altivec_vcmpequq
:
10702 case Intrinsic::ppc_altivec_vcmpgtsq
:
10705 case Intrinsic::ppc_altivec_vcmpgtuq
:
10711 // VSX predicate comparisons use the same infrastructure
10712 case Intrinsic::ppc_vsx_xvcmpeqdp_p
:
10713 case Intrinsic::ppc_vsx_xvcmpgedp_p
:
10714 case Intrinsic::ppc_vsx_xvcmpgtdp_p
:
10715 case Intrinsic::ppc_vsx_xvcmpeqsp_p
:
10716 case Intrinsic::ppc_vsx_xvcmpgesp_p
:
10717 case Intrinsic::ppc_vsx_xvcmpgtsp_p
:
10718 if (Subtarget
.hasVSX()) {
10719 switch (IntrinsicID
) {
10720 case Intrinsic::ppc_vsx_xvcmpeqdp_p
:
10723 case Intrinsic::ppc_vsx_xvcmpgedp_p
:
10726 case Intrinsic::ppc_vsx_xvcmpgtdp_p
:
10729 case Intrinsic::ppc_vsx_xvcmpeqsp_p
:
10732 case Intrinsic::ppc_vsx_xvcmpgesp_p
:
10735 case Intrinsic::ppc_vsx_xvcmpgtsp_p
:
10744 // Normal Comparisons.
10745 case Intrinsic::ppc_altivec_vcmpbfp
:
10748 case Intrinsic::ppc_altivec_vcmpeqfp
:
10751 case Intrinsic::ppc_altivec_vcmpequb
:
10754 case Intrinsic::ppc_altivec_vcmpequh
:
10757 case Intrinsic::ppc_altivec_vcmpequw
:
10760 case Intrinsic::ppc_altivec_vcmpequd
:
10761 if (Subtarget
.hasP8Altivec())
10766 case Intrinsic::ppc_altivec_vcmpneb
:
10767 case Intrinsic::ppc_altivec_vcmpneh
:
10768 case Intrinsic::ppc_altivec_vcmpnew
:
10769 case Intrinsic::ppc_altivec_vcmpnezb
:
10770 case Intrinsic::ppc_altivec_vcmpnezh
:
10771 case Intrinsic::ppc_altivec_vcmpnezw
:
10772 if (Subtarget
.hasP9Altivec())
10773 switch (IntrinsicID
) {
10775 llvm_unreachable("Unknown comparison intrinsic.");
10776 case Intrinsic::ppc_altivec_vcmpneb
:
10779 case Intrinsic::ppc_altivec_vcmpneh
:
10782 case Intrinsic::ppc_altivec_vcmpnew
:
10785 case Intrinsic::ppc_altivec_vcmpnezb
:
10788 case Intrinsic::ppc_altivec_vcmpnezh
:
10791 case Intrinsic::ppc_altivec_vcmpnezw
:
10798 case Intrinsic::ppc_altivec_vcmpgefp
:
10801 case Intrinsic::ppc_altivec_vcmpgtfp
:
10804 case Intrinsic::ppc_altivec_vcmpgtsb
:
10807 case Intrinsic::ppc_altivec_vcmpgtsh
:
10810 case Intrinsic::ppc_altivec_vcmpgtsw
:
10813 case Intrinsic::ppc_altivec_vcmpgtsd
:
10814 if (Subtarget
.hasP8Altivec())
10819 case Intrinsic::ppc_altivec_vcmpgtub
:
10822 case Intrinsic::ppc_altivec_vcmpgtuh
:
10825 case Intrinsic::ppc_altivec_vcmpgtuw
:
10828 case Intrinsic::ppc_altivec_vcmpgtud
:
10829 if (Subtarget
.hasP8Altivec())
10834 case Intrinsic::ppc_altivec_vcmpequq_p
:
10835 case Intrinsic::ppc_altivec_vcmpgtsq_p
:
10836 case Intrinsic::ppc_altivec_vcmpgtuq_p
:
10837 if (!Subtarget
.isISA3_1())
10839 switch (IntrinsicID
) {
10841 llvm_unreachable("Unknown comparison intrinsic.");
10842 case Intrinsic::ppc_altivec_vcmpequq_p
:
10845 case Intrinsic::ppc_altivec_vcmpgtsq_p
:
10848 case Intrinsic::ppc_altivec_vcmpgtuq_p
:
10858 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10859 /// lower, do it, otherwise return null.
10860 SDValue
PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
10861 SelectionDAG
&DAG
) const {
10862 unsigned IntrinsicID
= Op
.getConstantOperandVal(0);
10866 switch (IntrinsicID
) {
10867 case Intrinsic::thread_pointer
:
10868 // Reads the thread pointer register, used for __builtin_thread_pointer.
10869 if (Subtarget
.isPPC64())
10870 return DAG
.getRegister(PPC::X13
, MVT::i64
);
10871 return DAG
.getRegister(PPC::R2
, MVT::i32
);
10873 case Intrinsic::ppc_rldimi
: {
10874 assert(Subtarget
.isPPC64() && "rldimi is only available in 64-bit!");
10875 SDValue Src
= Op
.getOperand(1);
10876 APInt Mask
= Op
.getConstantOperandAPInt(4);
10878 return Op
.getOperand(2);
10879 if (Mask
.isAllOnes())
10880 return DAG
.getNode(ISD::ROTL
, dl
, MVT::i64
, Src
, Op
.getOperand(3));
10881 uint64_t SH
= Op
.getConstantOperandVal(3);
10882 unsigned MB
= 0, ME
= 0;
10883 if (!isRunOfOnes64(Mask
.getZExtValue(), MB
, ME
))
10884 report_fatal_error("invalid rldimi mask!");
10885 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10886 if (ME
< 63 - SH
) {
10887 Src
= DAG
.getNode(ISD::ROTL
, dl
, MVT::i64
, Src
,
10888 DAG
.getConstant(ME
+ SH
+ 1, dl
, MVT::i32
));
10889 } else if (ME
> 63 - SH
) {
10890 Src
= DAG
.getNode(ISD::ROTL
, dl
, MVT::i64
, Src
,
10891 DAG
.getConstant(ME
+ SH
- 63, dl
, MVT::i32
));
10894 DAG
.getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
,
10895 {Op
.getOperand(2), Src
,
10896 DAG
.getTargetConstant(63 - ME
, dl
, MVT::i32
),
10897 DAG
.getTargetConstant(MB
, dl
, MVT::i32
)}),
10901 case Intrinsic::ppc_rlwimi
: {
10902 APInt Mask
= Op
.getConstantOperandAPInt(4);
10904 return Op
.getOperand(2);
10905 if (Mask
.isAllOnes())
10906 return DAG
.getNode(ISD::ROTL
, dl
, MVT::i32
, Op
.getOperand(1),
10908 unsigned MB
= 0, ME
= 0;
10909 if (!isRunOfOnes(Mask
.getZExtValue(), MB
, ME
))
10910 report_fatal_error("invalid rlwimi mask!");
10911 return SDValue(DAG
.getMachineNode(
10912 PPC::RLWIMI
, dl
, MVT::i32
,
10913 {Op
.getOperand(2), Op
.getOperand(1), Op
.getOperand(3),
10914 DAG
.getTargetConstant(MB
, dl
, MVT::i32
),
10915 DAG
.getTargetConstant(ME
, dl
, MVT::i32
)}),
10919 case Intrinsic::ppc_rlwnm
: {
10920 if (Op
.getConstantOperandVal(3) == 0)
10921 return DAG
.getConstant(0, dl
, MVT::i32
);
10922 unsigned MB
= 0, ME
= 0;
10923 if (!isRunOfOnes(Op
.getConstantOperandVal(3), MB
, ME
))
10924 report_fatal_error("invalid rlwnm mask!");
10926 DAG
.getMachineNode(PPC::RLWNM
, dl
, MVT::i32
,
10927 {Op
.getOperand(1), Op
.getOperand(2),
10928 DAG
.getTargetConstant(MB
, dl
, MVT::i32
),
10929 DAG
.getTargetConstant(ME
, dl
, MVT::i32
)}),
10933 case Intrinsic::ppc_mma_disassemble_acc
: {
10934 if (Subtarget
.isISAFuture()) {
10935 EVT ReturnTypes
[] = {MVT::v256i1
, MVT::v256i1
};
10937 SDValue(DAG
.getMachineNode(PPC::DMXXEXTFDMR512
, dl
, ReturnTypes
,
10940 SmallVector
<SDValue
, 4> RetOps
;
10941 SDValue Value
= SDValue(WideVec
.getNode(), 0);
10942 SDValue Value2
= SDValue(WideVec
.getNode(), 1);
10945 Extract
= DAG
.getNode(
10946 PPCISD::EXTRACT_VSX_REG
, dl
, MVT::v16i8
,
10947 Subtarget
.isLittleEndian() ? Value2
: Value
,
10948 DAG
.getConstant(Subtarget
.isLittleEndian() ? 1 : 0,
10949 dl
, getPointerTy(DAG
.getDataLayout())));
10950 RetOps
.push_back(Extract
);
10951 Extract
= DAG
.getNode(
10952 PPCISD::EXTRACT_VSX_REG
, dl
, MVT::v16i8
,
10953 Subtarget
.isLittleEndian() ? Value2
: Value
,
10954 DAG
.getConstant(Subtarget
.isLittleEndian() ? 0 : 1,
10955 dl
, getPointerTy(DAG
.getDataLayout())));
10956 RetOps
.push_back(Extract
);
10957 Extract
= DAG
.getNode(
10958 PPCISD::EXTRACT_VSX_REG
, dl
, MVT::v16i8
,
10959 Subtarget
.isLittleEndian() ? Value
: Value2
,
10960 DAG
.getConstant(Subtarget
.isLittleEndian() ? 1 : 0,
10961 dl
, getPointerTy(DAG
.getDataLayout())));
10962 RetOps
.push_back(Extract
);
10963 Extract
= DAG
.getNode(
10964 PPCISD::EXTRACT_VSX_REG
, dl
, MVT::v16i8
,
10965 Subtarget
.isLittleEndian() ? Value
: Value2
,
10966 DAG
.getConstant(Subtarget
.isLittleEndian() ? 0 : 1,
10967 dl
, getPointerTy(DAG
.getDataLayout())));
10968 RetOps
.push_back(Extract
);
10969 return DAG
.getMergeValues(RetOps
, dl
);
10973 case Intrinsic::ppc_vsx_disassemble_pair
: {
10975 SDValue WideVec
= Op
.getOperand(1);
10976 if (IntrinsicID
== Intrinsic::ppc_mma_disassemble_acc
) {
10978 WideVec
= DAG
.getNode(PPCISD::XXMFACC
, dl
, MVT::v512i1
, WideVec
);
10980 SmallVector
<SDValue
, 4> RetOps
;
10981 for (int VecNo
= 0; VecNo
< NumVecs
; VecNo
++) {
10982 SDValue Extract
= DAG
.getNode(
10983 PPCISD::EXTRACT_VSX_REG
, dl
, MVT::v16i8
, WideVec
,
10984 DAG
.getConstant(Subtarget
.isLittleEndian() ? NumVecs
- 1 - VecNo
10986 dl
, getPointerTy(DAG
.getDataLayout())));
10987 RetOps
.push_back(Extract
);
10989 return DAG
.getMergeValues(RetOps
, dl
);
10992 case Intrinsic::ppc_mma_xxmfacc
:
10993 case Intrinsic::ppc_mma_xxmtacc
: {
10994 // Allow pre-isa-future subtargets to lower as normal.
10995 if (!Subtarget
.isISAFuture())
10997 // The intrinsics for xxmtacc and xxmfacc take one argument of
10998 // type v512i1, for future cpu the corresponding wacc instruction
10999 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11000 // the need to produce the xxm[t|f]acc.
11001 SDValue WideVec
= Op
.getOperand(1);
11002 DAG
.ReplaceAllUsesWith(Op
, WideVec
);
11006 case Intrinsic::ppc_unpack_longdouble
: {
11007 auto *Idx
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
11008 assert(Idx
&& (Idx
->getSExtValue() == 0 || Idx
->getSExtValue() == 1) &&
11009 "Argument of long double unpack must be 0 or 1!");
11010 return DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, MVT::f64
, Op
.getOperand(1),
11011 DAG
.getConstant(!!(Idx
->getSExtValue()), dl
,
11012 Idx
->getValueType(0)));
11015 case Intrinsic::ppc_compare_exp_lt
:
11016 case Intrinsic::ppc_compare_exp_gt
:
11017 case Intrinsic::ppc_compare_exp_eq
:
11018 case Intrinsic::ppc_compare_exp_uo
: {
11020 switch (IntrinsicID
) {
11021 case Intrinsic::ppc_compare_exp_lt
:
11022 Pred
= PPC::PRED_LT
;
11024 case Intrinsic::ppc_compare_exp_gt
:
11025 Pred
= PPC::PRED_GT
;
11027 case Intrinsic::ppc_compare_exp_eq
:
11028 Pred
= PPC::PRED_EQ
;
11030 case Intrinsic::ppc_compare_exp_uo
:
11031 Pred
= PPC::PRED_UN
;
11035 DAG
.getMachineNode(
11036 PPC::SELECT_CC_I4
, dl
, MVT::i32
,
11037 {SDValue(DAG
.getMachineNode(PPC::XSCMPEXPDP
, dl
, MVT::i32
,
11038 Op
.getOperand(1), Op
.getOperand(2)),
11040 DAG
.getConstant(1, dl
, MVT::i32
), DAG
.getConstant(0, dl
, MVT::i32
),
11041 DAG
.getTargetConstant(Pred
, dl
, MVT::i32
)}),
11044 case Intrinsic::ppc_test_data_class
: {
11045 EVT OpVT
= Op
.getOperand(1).getValueType();
11046 unsigned CmprOpc
= OpVT
== MVT::f128
? PPC::XSTSTDCQP
11047 : (OpVT
== MVT::f64
? PPC::XSTSTDCDP
11050 DAG
.getMachineNode(
11051 PPC::SELECT_CC_I4
, dl
, MVT::i32
,
11052 {SDValue(DAG
.getMachineNode(CmprOpc
, dl
, MVT::i32
, Op
.getOperand(2),
11055 DAG
.getConstant(1, dl
, MVT::i32
), DAG
.getConstant(0, dl
, MVT::i32
),
11056 DAG
.getTargetConstant(PPC::PRED_EQ
, dl
, MVT::i32
)}),
11059 case Intrinsic::ppc_fnmsub
: {
11060 EVT VT
= Op
.getOperand(1).getValueType();
11061 if (!Subtarget
.hasVSX() || (!Subtarget
.hasFloat128() && VT
== MVT::f128
))
11062 return DAG
.getNode(
11064 DAG
.getNode(ISD::FMA
, dl
, VT
, Op
.getOperand(1), Op
.getOperand(2),
11065 DAG
.getNode(ISD::FNEG
, dl
, VT
, Op
.getOperand(3))));
11066 return DAG
.getNode(PPCISD::FNMSUB
, dl
, VT
, Op
.getOperand(1),
11067 Op
.getOperand(2), Op
.getOperand(3));
11069 case Intrinsic::ppc_convert_f128_to_ppcf128
:
11070 case Intrinsic::ppc_convert_ppcf128_to_f128
: {
11071 RTLIB::Libcall LC
= IntrinsicID
== Intrinsic::ppc_convert_ppcf128_to_f128
11072 ? RTLIB::CONVERT_PPCF128_F128
11073 : RTLIB::CONVERT_F128_PPCF128
;
11074 MakeLibCallOptions CallOptions
;
11075 std::pair
<SDValue
, SDValue
> Result
=
11076 makeLibCall(DAG
, LC
, Op
.getValueType(), Op
.getOperand(1), CallOptions
,
11078 return Result
.first
;
11080 case Intrinsic::ppc_maxfe
:
11081 case Intrinsic::ppc_maxfl
:
11082 case Intrinsic::ppc_maxfs
:
11083 case Intrinsic::ppc_minfe
:
11084 case Intrinsic::ppc_minfl
:
11085 case Intrinsic::ppc_minfs
: {
11086 EVT VT
= Op
.getValueType();
11088 all_of(Op
->ops().drop_front(4),
11089 [VT
](const SDUse
&Use
) { return Use
.getValueType() == VT
; }) &&
11090 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11092 ISD::CondCode CC
= ISD::SETGT
;
11093 if (IntrinsicID
== Intrinsic::ppc_minfe
||
11094 IntrinsicID
== Intrinsic::ppc_minfl
||
11095 IntrinsicID
== Intrinsic::ppc_minfs
)
11097 unsigned I
= Op
.getNumOperands() - 2, Cnt
= I
;
11098 SDValue Res
= Op
.getOperand(I
);
11099 for (--I
; Cnt
!= 0; --Cnt
, I
= (--I
== 0 ? (Op
.getNumOperands() - 1) : I
)) {
11101 DAG
.getSelectCC(dl
, Res
, Op
.getOperand(I
), Res
, Op
.getOperand(I
), CC
);
11107 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11108 // opcode number of the comparison.
11111 if (!getVectorCompareInfo(Op
, CompareOpc
, isDot
, Subtarget
))
11112 return SDValue(); // Don't custom lower most intrinsics.
11114 // If this is a non-dot comparison, make the VCMP node and we are done.
11116 SDValue Tmp
= DAG
.getNode(PPCISD::VCMP
, dl
, Op
.getOperand(2).getValueType(),
11117 Op
.getOperand(1), Op
.getOperand(2),
11118 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
));
11119 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Tmp
);
11122 // Create the PPCISD altivec 'dot' comparison node.
11124 Op
.getOperand(2), // LHS
11125 Op
.getOperand(3), // RHS
11126 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
)
11128 EVT VTs
[] = { Op
.getOperand(2).getValueType(), MVT::Glue
};
11129 SDValue CompNode
= DAG
.getNode(PPCISD::VCMP_rec
, dl
, VTs
, Ops
);
11131 // Now that we have the comparison, emit a copy from the CR to a GPR.
11132 // This is flagged to the above dot comparison.
11133 SDValue Flags
= DAG
.getNode(PPCISD::MFOCRF
, dl
, MVT::i32
,
11134 DAG
.getRegister(PPC::CR6
, MVT::i32
),
11135 CompNode
.getValue(1));
11137 // Unpack the result based on how the target uses it.
11138 unsigned BitNo
; // Bit # of CR6.
11139 bool InvertBit
; // Invert result?
11140 switch (Op
.getConstantOperandVal(1)) {
11141 default: // Can't happen, don't crash on invalid number though.
11142 case 0: // Return the value of the EQ bit of CR6.
11143 BitNo
= 0; InvertBit
= false;
11145 case 1: // Return the inverted value of the EQ bit of CR6.
11146 BitNo
= 0; InvertBit
= true;
11148 case 2: // Return the value of the LT bit of CR6.
11149 BitNo
= 2; InvertBit
= false;
11151 case 3: // Return the inverted value of the LT bit of CR6.
11152 BitNo
= 2; InvertBit
= true;
11156 // Shift the bit into the low position.
11157 Flags
= DAG
.getNode(ISD::SRL
, dl
, MVT::i32
, Flags
,
11158 DAG
.getConstant(8 - (3 - BitNo
), dl
, MVT::i32
));
11159 // Isolate the bit.
11160 Flags
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Flags
,
11161 DAG
.getConstant(1, dl
, MVT::i32
));
11163 // If we are supposed to, toggle the bit.
11165 Flags
= DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, Flags
,
11166 DAG
.getConstant(1, dl
, MVT::i32
));
11170 SDValue
PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
11171 SelectionDAG
&DAG
) const {
11172 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11173 // the beginning of the argument list.
11174 int ArgStart
= isa
<ConstantSDNode
>(Op
.getOperand(0)) ? 0 : 1;
11176 switch (Op
.getConstantOperandVal(ArgStart
)) {
11177 case Intrinsic::ppc_cfence
: {
11178 assert(ArgStart
== 1 && "llvm.ppc.cfence must carry a chain argument.");
11179 SDValue Val
= Op
.getOperand(ArgStart
+ 1);
11180 EVT Ty
= Val
.getValueType();
11181 if (Ty
== MVT::i128
) {
11182 // FIXME: Testing one of two paired registers is sufficient to guarantee
11184 Val
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i64
, Val
);
11186 unsigned Opcode
= Subtarget
.isPPC64() ? PPC::CFENCE8
: PPC::CFENCE
;
11187 EVT FTy
= Subtarget
.isPPC64() ? MVT::i64
: MVT::i32
;
11189 DAG
.getMachineNode(Opcode
, DL
, MVT::Other
,
11190 DAG
.getNode(ISD::ANY_EXTEND
, DL
, FTy
, Val
),
11200 // Lower scalar BSWAP64 to xxbrd.
11201 SDValue
PPCTargetLowering::LowerBSWAP(SDValue Op
, SelectionDAG
&DAG
) const {
11203 if (!Subtarget
.isPPC64())
11206 Op
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i64
, Op
.getOperand(0),
11209 Op
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::v2i64
, Op
);
11211 int VectorIndex
= 0;
11212 if (Subtarget
.isLittleEndian())
11214 Op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i64
, Op
,
11215 DAG
.getTargetConstant(VectorIndex
, dl
, MVT::i32
));
11219 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11220 // compared to a value that is atomically loaded (atomic loads zero-extend).
11221 SDValue
PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op
,
11222 SelectionDAG
&DAG
) const {
11223 assert(Op
.getOpcode() == ISD::ATOMIC_CMP_SWAP
&&
11224 "Expecting an atomic compare-and-swap here.");
11226 auto *AtomicNode
= cast
<AtomicSDNode
>(Op
.getNode());
11227 EVT MemVT
= AtomicNode
->getMemoryVT();
11228 if (MemVT
.getSizeInBits() >= 32)
11231 SDValue CmpOp
= Op
.getOperand(2);
11232 // If this is already correctly zero-extended, leave it alone.
11233 auto HighBits
= APInt::getHighBitsSet(32, 32 - MemVT
.getSizeInBits());
11234 if (DAG
.MaskedValueIsZero(CmpOp
, HighBits
))
11237 // Clear the high bits of the compare operand.
11238 unsigned MaskVal
= (1 << MemVT
.getSizeInBits()) - 1;
11240 DAG
.getNode(ISD::AND
, dl
, MVT::i32
, CmpOp
,
11241 DAG
.getConstant(MaskVal
, dl
, MVT::i32
));
11243 // Replace the existing compare operand with the properly zero-extended one.
11244 SmallVector
<SDValue
, 4> Ops
;
11245 for (int i
= 0, e
= AtomicNode
->getNumOperands(); i
< e
; i
++)
11246 Ops
.push_back(AtomicNode
->getOperand(i
));
11248 MachineMemOperand
*MMO
= AtomicNode
->getMemOperand();
11249 SDVTList Tys
= DAG
.getVTList(MVT::i32
, MVT::Other
);
11251 (MemVT
== MVT::i8
) ? PPCISD::ATOMIC_CMP_SWAP_8
: PPCISD::ATOMIC_CMP_SWAP_16
;
11252 return DAG
.getMemIntrinsicNode(NodeTy
, dl
, Tys
, Ops
, MemVT
, MMO
);
11255 SDValue
PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op
,
11256 SelectionDAG
&DAG
) const {
11257 AtomicSDNode
*N
= cast
<AtomicSDNode
>(Op
.getNode());
11258 EVT MemVT
= N
->getMemoryVT();
11259 assert(MemVT
.getSimpleVT() == MVT::i128
&&
11260 "Expect quadword atomic operations");
11262 unsigned Opc
= N
->getOpcode();
11264 case ISD::ATOMIC_LOAD
: {
11265 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11266 // lowered to ppc instructions by pattern matching instruction selector.
11267 SDVTList Tys
= DAG
.getVTList(MVT::i64
, MVT::i64
, MVT::Other
);
11268 SmallVector
<SDValue
, 4> Ops
{
11270 DAG
.getConstant(Intrinsic::ppc_atomic_load_i128
, dl
, MVT::i32
)};
11271 for (int I
= 1, E
= N
->getNumOperands(); I
< E
; ++I
)
11272 Ops
.push_back(N
->getOperand(I
));
11273 SDValue LoadedVal
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, dl
, Tys
,
11274 Ops
, MemVT
, N
->getMemOperand());
11275 SDValue ValLo
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::i128
, LoadedVal
);
11277 DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::i128
, LoadedVal
.getValue(1));
11278 ValHi
= DAG
.getNode(ISD::SHL
, dl
, MVT::i128
, ValHi
,
11279 DAG
.getConstant(64, dl
, MVT::i32
));
11281 DAG
.getNode(ISD::OR
, dl
, {MVT::i128
, MVT::Other
}, {ValLo
, ValHi
});
11282 return DAG
.getNode(ISD::MERGE_VALUES
, dl
, {MVT::i128
, MVT::Other
},
11283 {Val
, LoadedVal
.getValue(2)});
11285 case ISD::ATOMIC_STORE
: {
11286 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11287 // lowered to ppc instructions by pattern matching instruction selector.
11288 SDVTList Tys
= DAG
.getVTList(MVT::Other
);
11289 SmallVector
<SDValue
, 4> Ops
{
11291 DAG
.getConstant(Intrinsic::ppc_atomic_store_i128
, dl
, MVT::i32
)};
11292 SDValue Val
= N
->getOperand(1);
11293 SDValue ValLo
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i64
, Val
);
11294 SDValue ValHi
= DAG
.getNode(ISD::SRL
, dl
, MVT::i128
, Val
,
11295 DAG
.getConstant(64, dl
, MVT::i32
));
11296 ValHi
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i64
, ValHi
);
11297 Ops
.push_back(ValLo
);
11298 Ops
.push_back(ValHi
);
11299 Ops
.push_back(N
->getOperand(2));
11300 return DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
, dl
, Tys
, Ops
, MemVT
,
11301 N
->getMemOperand());
11304 llvm_unreachable("Unexpected atomic opcode");
11308 static SDValue
getDataClassTest(SDValue Op
, FPClassTest Mask
, const SDLoc
&Dl
,
11310 const PPCSubtarget
&Subtarget
) {
11311 assert(Mask
<= fcAllFlags
&& "Invalid fp_class flags!");
11313 enum DataClassMask
{
11315 DC_NEG_INF
= 1 << 4,
11316 DC_POS_INF
= 1 << 5,
11317 DC_NEG_ZERO
= 1 << 2,
11318 DC_POS_ZERO
= 1 << 3,
11319 DC_NEG_SUBNORM
= 1,
11320 DC_POS_SUBNORM
= 1 << 1,
11323 EVT VT
= Op
.getValueType();
11325 unsigned TestOp
= VT
== MVT::f128
? PPC::XSTSTDCQP
11326 : VT
== MVT::f64
? PPC::XSTSTDCDP
11329 if (Mask
== fcAllFlags
)
11330 return DAG
.getBoolConstant(true, Dl
, MVT::i1
, VT
);
11332 return DAG
.getBoolConstant(false, Dl
, MVT::i1
, VT
);
11334 // When it's cheaper or necessary to test reverse flags.
11335 if ((Mask
& fcNormal
) == fcNormal
|| Mask
== ~fcQNan
|| Mask
== ~fcSNan
) {
11336 SDValue Rev
= getDataClassTest(Op
, ~Mask
, Dl
, DAG
, Subtarget
);
11337 return DAG
.getNOT(Dl
, Rev
, MVT::i1
);
11340 // Power doesn't support testing whether a value is 'normal'. Test the rest
11341 // first, and test if it's 'not not-normal' with expected sign.
11342 if (Mask
& fcNormal
) {
11343 SDValue
Rev(DAG
.getMachineNode(
11344 TestOp
, Dl
, MVT::i32
,
11345 DAG
.getTargetConstant(DC_NAN
| DC_NEG_INF
| DC_POS_INF
|
11346 DC_NEG_ZERO
| DC_POS_ZERO
|
11347 DC_NEG_SUBNORM
| DC_POS_SUBNORM
,
11351 // Sign are stored in CR bit 0, result are in CR bit 2.
11353 DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, Dl
, MVT::i1
, Rev
,
11354 DAG
.getTargetConstant(PPC::sub_lt
, Dl
, MVT::i32
)),
11356 SDValue
Normal(DAG
.getNOT(
11358 SDValue(DAG
.getMachineNode(
11359 TargetOpcode::EXTRACT_SUBREG
, Dl
, MVT::i1
, Rev
,
11360 DAG
.getTargetConstant(PPC::sub_eq
, Dl
, MVT::i32
)),
11363 if (Mask
& fcPosNormal
)
11364 Sign
= DAG
.getNOT(Dl
, Sign
, MVT::i1
);
11365 SDValue Result
= DAG
.getNode(ISD::AND
, Dl
, MVT::i1
, Sign
, Normal
);
11366 if (Mask
== fcPosNormal
|| Mask
== fcNegNormal
)
11369 return DAG
.getNode(
11370 ISD::OR
, Dl
, MVT::i1
,
11371 getDataClassTest(Op
, Mask
& ~fcNormal
, Dl
, DAG
, Subtarget
), Result
);
11374 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11375 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11376 if ((Mask
& fcNan
) == fcQNan
|| (Mask
& fcNan
) == fcSNan
) {
11377 bool IsQuiet
= Mask
& fcQNan
;
11378 SDValue NanCheck
= getDataClassTest(Op
, fcNan
, Dl
, DAG
, Subtarget
);
11380 // Quietness is determined by the first bit in fraction field.
11381 uint64_t QuietMask
= 0;
11383 if (VT
== MVT::f128
) {
11384 HighWord
= DAG
.getNode(
11385 ISD::EXTRACT_VECTOR_ELT
, Dl
, MVT::i32
, DAG
.getBitcast(MVT::v4i32
, Op
),
11386 DAG
.getVectorIdxConstant(Subtarget
.isLittleEndian() ? 3 : 0, Dl
));
11387 QuietMask
= 0x8000;
11388 } else if (VT
== MVT::f64
) {
11389 if (Subtarget
.isPPC64()) {
11390 HighWord
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, Dl
, MVT::i32
,
11391 DAG
.getBitcast(MVT::i64
, Op
),
11392 DAG
.getConstant(1, Dl
, MVT::i32
));
11394 SDValue Vec
= DAG
.getBitcast(
11395 MVT::v4i32
, DAG
.getNode(ISD::SCALAR_TO_VECTOR
, Dl
, MVT::v2f64
, Op
));
11396 HighWord
= DAG
.getNode(
11397 ISD::EXTRACT_VECTOR_ELT
, Dl
, MVT::i32
, Vec
,
11398 DAG
.getVectorIdxConstant(Subtarget
.isLittleEndian() ? 1 : 0, Dl
));
11400 QuietMask
= 0x80000;
11401 } else if (VT
== MVT::f32
) {
11402 HighWord
= DAG
.getBitcast(MVT::i32
, Op
);
11403 QuietMask
= 0x400000;
11405 SDValue NanRes
= DAG
.getSetCC(
11407 DAG
.getNode(ISD::AND
, Dl
, MVT::i32
, HighWord
,
11408 DAG
.getConstant(QuietMask
, Dl
, MVT::i32
)),
11409 DAG
.getConstant(0, Dl
, MVT::i32
), IsQuiet
? ISD::SETNE
: ISD::SETEQ
);
11410 NanRes
= DAG
.getNode(ISD::AND
, Dl
, MVT::i1
, NanCheck
, NanRes
);
11411 if (Mask
== fcQNan
|| Mask
== fcSNan
)
11414 return DAG
.getNode(ISD::OR
, Dl
, MVT::i1
,
11415 getDataClassTest(Op
, Mask
& ~fcNan
, Dl
, DAG
, Subtarget
),
11419 unsigned NativeMask
= 0;
11420 if ((Mask
& fcNan
) == fcNan
)
11421 NativeMask
|= DC_NAN
;
11422 if (Mask
& fcNegInf
)
11423 NativeMask
|= DC_NEG_INF
;
11424 if (Mask
& fcPosInf
)
11425 NativeMask
|= DC_POS_INF
;
11426 if (Mask
& fcNegZero
)
11427 NativeMask
|= DC_NEG_ZERO
;
11428 if (Mask
& fcPosZero
)
11429 NativeMask
|= DC_POS_ZERO
;
11430 if (Mask
& fcNegSubnormal
)
11431 NativeMask
|= DC_NEG_SUBNORM
;
11432 if (Mask
& fcPosSubnormal
)
11433 NativeMask
|= DC_POS_SUBNORM
;
11435 DAG
.getMachineNode(
11436 TargetOpcode::EXTRACT_SUBREG
, Dl
, MVT::i1
,
11437 SDValue(DAG
.getMachineNode(
11438 TestOp
, Dl
, MVT::i32
,
11439 DAG
.getTargetConstant(NativeMask
, Dl
, MVT::i32
), Op
),
11441 DAG
.getTargetConstant(PPC::sub_eq
, Dl
, MVT::i32
)),
11445 SDValue
PPCTargetLowering::LowerIS_FPCLASS(SDValue Op
,
11446 SelectionDAG
&DAG
) const {
11447 assert(Subtarget
.hasP9Vector() && "Test data class requires Power9");
11448 SDValue LHS
= Op
.getOperand(0);
11449 uint64_t RHSC
= Op
.getConstantOperandVal(1);
11451 FPClassTest Category
= static_cast<FPClassTest
>(RHSC
);
11452 return getDataClassTest(LHS
, Category
, Dl
, DAG
, Subtarget
);
11455 SDValue
PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op
,
11456 SelectionDAG
&DAG
) const {
11458 // Create a stack slot that is 16-byte aligned.
11459 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
11460 int FrameIdx
= MFI
.CreateStackObject(16, Align(16), false);
11461 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
11462 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
11464 // Store the input value into Value#0 of the stack slot.
11465 SDValue Store
= DAG
.getStore(DAG
.getEntryNode(), dl
, Op
.getOperand(0), FIdx
,
11466 MachinePointerInfo());
11468 return DAG
.getLoad(Op
.getValueType(), dl
, Store
, FIdx
, MachinePointerInfo());
11471 SDValue
PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
11472 SelectionDAG
&DAG
) const {
11473 assert(Op
.getOpcode() == ISD::INSERT_VECTOR_ELT
&&
11474 "Should only be called for ISD::INSERT_VECTOR_ELT");
11476 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
11478 EVT VT
= Op
.getValueType();
11480 SDValue V1
= Op
.getOperand(0);
11481 SDValue V2
= Op
.getOperand(1);
11483 if (VT
== MVT::v2f64
&& C
)
11486 if (Subtarget
.hasP9Vector()) {
11487 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11488 // because on P10, it allows this specific insert_vector_elt load pattern to
11489 // utilize the refactored load and store infrastructure in order to exploit
11491 // On targets with inexpensive direct moves (Power9 and up), a
11492 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11493 // load since a single precision load will involve conversion to double
11494 // precision on the load followed by another conversion to single precision.
11495 if ((VT
== MVT::v4f32
) && (V2
.getValueType() == MVT::f32
) &&
11496 (isa
<LoadSDNode
>(V2
))) {
11497 SDValue BitcastVector
= DAG
.getBitcast(MVT::v4i32
, V1
);
11498 SDValue BitcastLoad
= DAG
.getBitcast(MVT::i32
, V2
);
11499 SDValue InsVecElt
=
11500 DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
, MVT::v4i32
, BitcastVector
,
11501 BitcastLoad
, Op
.getOperand(2));
11502 return DAG
.getBitcast(MVT::v4f32
, InsVecElt
);
11506 if (Subtarget
.isISA3_1()) {
11507 if ((VT
== MVT::v2i64
|| VT
== MVT::v2f64
) && !Subtarget
.isPPC64())
11509 // On P10, we have legal lowering for constant and variable indices for
11511 if (VT
== MVT::v16i8
|| VT
== MVT::v8i16
|| VT
== MVT::v4i32
||
11512 VT
== MVT::v2i64
|| VT
== MVT::v4f32
|| VT
== MVT::v2f64
)
11516 // Before P10, we have legal lowering for constant indices but not for
11521 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11522 if (VT
== MVT::v8i16
|| VT
== MVT::v16i8
) {
11523 SDValue Mtvsrz
= DAG
.getNode(PPCISD::MTVSRZ
, dl
, VT
, V2
);
11524 unsigned BytesInEachElement
= VT
.getVectorElementType().getSizeInBits() / 8;
11525 unsigned InsertAtElement
= C
->getZExtValue();
11526 unsigned InsertAtByte
= InsertAtElement
* BytesInEachElement
;
11527 if (Subtarget
.isLittleEndian()) {
11528 InsertAtByte
= (16 - BytesInEachElement
) - InsertAtByte
;
11530 return DAG
.getNode(PPCISD::VECINSERT
, dl
, VT
, V1
, Mtvsrz
,
11531 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
11536 SDValue
PPCTargetLowering::LowerVectorLoad(SDValue Op
,
11537 SelectionDAG
&DAG
) const {
11539 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
11540 SDValue LoadChain
= LN
->getChain();
11541 SDValue BasePtr
= LN
->getBasePtr();
11542 EVT VT
= Op
.getValueType();
11544 if (VT
!= MVT::v256i1
&& VT
!= MVT::v512i1
)
11547 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11548 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11549 // 2 or 4 vsx registers.
11550 assert((VT
!= MVT::v512i1
|| Subtarget
.hasMMA()) &&
11551 "Type unsupported without MMA");
11552 assert((VT
!= MVT::v256i1
|| Subtarget
.pairedVectorMemops()) &&
11553 "Type unsupported without paired vector support");
11554 Align Alignment
= LN
->getAlign();
11555 SmallVector
<SDValue
, 4> Loads
;
11556 SmallVector
<SDValue
, 4> LoadChains
;
11557 unsigned NumVecs
= VT
.getSizeInBits() / 128;
11558 for (unsigned Idx
= 0; Idx
< NumVecs
; ++Idx
) {
11560 DAG
.getLoad(MVT::v16i8
, dl
, LoadChain
, BasePtr
,
11561 LN
->getPointerInfo().getWithOffset(Idx
* 16),
11562 commonAlignment(Alignment
, Idx
* 16),
11563 LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
11564 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
11565 DAG
.getConstant(16, dl
, BasePtr
.getValueType()));
11566 Loads
.push_back(Load
);
11567 LoadChains
.push_back(Load
.getValue(1));
11569 if (Subtarget
.isLittleEndian()) {
11570 std::reverse(Loads
.begin(), Loads
.end());
11571 std::reverse(LoadChains
.begin(), LoadChains
.end());
11573 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, LoadChains
);
11575 DAG
.getNode(VT
== MVT::v512i1
? PPCISD::ACC_BUILD
: PPCISD::PAIR_BUILD
,
11577 SDValue RetOps
[] = {Value
, TF
};
11578 return DAG
.getMergeValues(RetOps
, dl
);
11581 SDValue
PPCTargetLowering::LowerVectorStore(SDValue Op
,
11582 SelectionDAG
&DAG
) const {
11584 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
.getNode());
11585 SDValue StoreChain
= SN
->getChain();
11586 SDValue BasePtr
= SN
->getBasePtr();
11587 SDValue Value
= SN
->getValue();
11588 SDValue Value2
= SN
->getValue();
11589 EVT StoreVT
= Value
.getValueType();
11591 if (StoreVT
!= MVT::v256i1
&& StoreVT
!= MVT::v512i1
)
11594 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11595 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11596 // underlying registers individually.
11597 assert((StoreVT
!= MVT::v512i1
|| Subtarget
.hasMMA()) &&
11598 "Type unsupported without MMA");
11599 assert((StoreVT
!= MVT::v256i1
|| Subtarget
.pairedVectorMemops()) &&
11600 "Type unsupported without paired vector support");
11601 Align Alignment
= SN
->getAlign();
11602 SmallVector
<SDValue
, 4> Stores
;
11603 unsigned NumVecs
= 2;
11604 if (StoreVT
== MVT::v512i1
) {
11605 if (Subtarget
.isISAFuture()) {
11606 EVT ReturnTypes
[] = {MVT::v256i1
, MVT::v256i1
};
11607 MachineSDNode
*ExtNode
= DAG
.getMachineNode(
11608 PPC::DMXXEXTFDMR512
, dl
, ReturnTypes
, Op
.getOperand(1));
11610 Value
= SDValue(ExtNode
, 0);
11611 Value2
= SDValue(ExtNode
, 1);
11613 Value
= DAG
.getNode(PPCISD::XXMFACC
, dl
, MVT::v512i1
, Value
);
11616 for (unsigned Idx
= 0; Idx
< NumVecs
; ++Idx
) {
11617 unsigned VecNum
= Subtarget
.isLittleEndian() ? NumVecs
- 1 - Idx
: Idx
;
11619 if (Subtarget
.isISAFuture()) {
11620 VecNum
= Subtarget
.isLittleEndian() ? 1 - (Idx
% 2) : (Idx
% 2);
11621 Elt
= DAG
.getNode(PPCISD::EXTRACT_VSX_REG
, dl
, MVT::v16i8
,
11622 Idx
> 1 ? Value2
: Value
,
11623 DAG
.getConstant(VecNum
, dl
, getPointerTy(DAG
.getDataLayout())));
11625 Elt
= DAG
.getNode(PPCISD::EXTRACT_VSX_REG
, dl
, MVT::v16i8
, Value
,
11626 DAG
.getConstant(VecNum
, dl
, getPointerTy(DAG
.getDataLayout())));
11629 DAG
.getStore(StoreChain
, dl
, Elt
, BasePtr
,
11630 SN
->getPointerInfo().getWithOffset(Idx
* 16),
11631 commonAlignment(Alignment
, Idx
* 16),
11632 SN
->getMemOperand()->getFlags(), SN
->getAAInfo());
11633 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
11634 DAG
.getConstant(16, dl
, BasePtr
.getValueType()));
11635 Stores
.push_back(Store
);
11637 SDValue TF
= DAG
.getTokenFactor(dl
, Stores
);
11641 SDValue
PPCTargetLowering::LowerMUL(SDValue Op
, SelectionDAG
&DAG
) const {
11643 if (Op
.getValueType() == MVT::v4i32
) {
11644 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
11646 SDValue Zero
= getCanonicalConstSplat(0, 1, MVT::v4i32
, DAG
, dl
);
11647 // +16 as shift amt.
11648 SDValue Neg16
= getCanonicalConstSplat(-16, 4, MVT::v4i32
, DAG
, dl
);
11649 SDValue RHSSwap
= // = vrlw RHS, 16
11650 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw
, RHS
, Neg16
, DAG
, dl
);
11652 // Shrinkify inputs to v8i16.
11653 LHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, LHS
);
11654 RHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, RHS
);
11655 RHSSwap
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, RHSSwap
);
11657 // Low parts multiplied together, generating 32-bit results (we ignore the
11659 SDValue LoProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh
,
11660 LHS
, RHS
, DAG
, dl
, MVT::v4i32
);
11662 SDValue HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm
,
11663 LHS
, RHSSwap
, Zero
, DAG
, dl
, MVT::v4i32
);
11664 // Shift the high parts up 16 bits.
11665 HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, HiProd
,
11667 return DAG
.getNode(ISD::ADD
, dl
, MVT::v4i32
, LoProd
, HiProd
);
11668 } else if (Op
.getValueType() == MVT::v16i8
) {
11669 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
11670 bool isLittleEndian
= Subtarget
.isLittleEndian();
11672 // Multiply the even 8-bit parts, producing 16-bit sums.
11673 SDValue EvenParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub
,
11674 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
11675 EvenParts
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, EvenParts
);
11677 // Multiply the odd 8-bit parts, producing 16-bit sums.
11678 SDValue OddParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub
,
11679 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
11680 OddParts
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OddParts
);
11682 // Merge the results together. Because vmuleub and vmuloub are
11683 // instructions with a big-endian bias, we must reverse the
11684 // element numbering and reverse the meaning of "odd" and "even"
11685 // when generating little endian code.
11687 for (unsigned i
= 0; i
!= 8; ++i
) {
11688 if (isLittleEndian
) {
11690 Ops
[i
*2+1] = 2*i
+16;
11693 Ops
[i
*2+1] = 2*i
+1+16;
11696 if (isLittleEndian
)
11697 return DAG
.getVectorShuffle(MVT::v16i8
, dl
, OddParts
, EvenParts
, Ops
);
11699 return DAG
.getVectorShuffle(MVT::v16i8
, dl
, EvenParts
, OddParts
, Ops
);
11701 llvm_unreachable("Unknown mul to lower!");
11705 SDValue
PPCTargetLowering::LowerFP_ROUND(SDValue Op
, SelectionDAG
&DAG
) const {
11706 bool IsStrict
= Op
->isStrictFPOpcode();
11707 if (Op
.getOperand(IsStrict
? 1 : 0).getValueType() == MVT::f128
&&
11708 !Subtarget
.hasP9Vector())
11714 // Custom lowering for fpext vf32 to v2f64
11715 SDValue
PPCTargetLowering::LowerFP_EXTEND(SDValue Op
, SelectionDAG
&DAG
) const {
11717 assert(Op
.getOpcode() == ISD::FP_EXTEND
&&
11718 "Should only be called for ISD::FP_EXTEND");
11720 // FIXME: handle extends from half precision float vectors on P9.
11721 // We only want to custom lower an extend from v2f32 to v2f64.
11722 if (Op
.getValueType() != MVT::v2f64
||
11723 Op
.getOperand(0).getValueType() != MVT::v2f32
)
11727 SDValue Op0
= Op
.getOperand(0);
11729 switch (Op0
.getOpcode()) {
11732 case ISD::EXTRACT_SUBVECTOR
: {
11733 assert(Op0
.getNumOperands() == 2 &&
11734 isa
<ConstantSDNode
>(Op0
->getOperand(1)) &&
11735 "Node should have 2 operands with second one being a constant!");
11737 if (Op0
.getOperand(0).getValueType() != MVT::v4f32
)
11740 // Custom lower is only done for high or low doubleword.
11741 int Idx
= Op0
.getConstantOperandVal(1);
11745 // Since input is v4f32, at this point Idx is either 0 or 2.
11746 // Shift to get the doubleword position we want.
11747 int DWord
= Idx
>> 1;
11749 // High and low word positions are different on little endian.
11750 if (Subtarget
.isLittleEndian())
11753 return DAG
.getNode(PPCISD::FP_EXTEND_HALF
, dl
, MVT::v2f64
,
11754 Op0
.getOperand(0), DAG
.getConstant(DWord
, dl
, MVT::i32
));
11759 SDValue NewLoad
[2];
11760 for (unsigned i
= 0, ie
= Op0
.getNumOperands(); i
!= ie
; ++i
) {
11761 // Ensure both input are loads.
11762 SDValue LdOp
= Op0
.getOperand(i
);
11763 if (LdOp
.getOpcode() != ISD::LOAD
)
11765 // Generate new load node.
11766 LoadSDNode
*LD
= cast
<LoadSDNode
>(LdOp
);
11767 SDValue LoadOps
[] = {LD
->getChain(), LD
->getBasePtr()};
11768 NewLoad
[i
] = DAG
.getMemIntrinsicNode(
11769 PPCISD::LD_VSX_LH
, dl
, DAG
.getVTList(MVT::v4f32
, MVT::Other
), LoadOps
,
11770 LD
->getMemoryVT(), LD
->getMemOperand());
11773 DAG
.getNode(Op0
.getOpcode(), SDLoc(Op0
), MVT::v4f32
, NewLoad
[0],
11774 NewLoad
[1], Op0
.getNode()->getFlags());
11775 return DAG
.getNode(PPCISD::FP_EXTEND_HALF
, dl
, MVT::v2f64
, NewOp
,
11776 DAG
.getConstant(0, dl
, MVT::i32
));
11779 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op0
);
11780 SDValue LoadOps
[] = {LD
->getChain(), LD
->getBasePtr()};
11781 SDValue NewLd
= DAG
.getMemIntrinsicNode(
11782 PPCISD::LD_VSX_LH
, dl
, DAG
.getVTList(MVT::v4f32
, MVT::Other
), LoadOps
,
11783 LD
->getMemoryVT(), LD
->getMemOperand());
11784 return DAG
.getNode(PPCISD::FP_EXTEND_HALF
, dl
, MVT::v2f64
, NewLd
,
11785 DAG
.getConstant(0, dl
, MVT::i32
));
11788 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11791 /// LowerOperation - Provide custom lowering hooks for some operations.
11793 SDValue
PPCTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
11794 switch (Op
.getOpcode()) {
11795 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11796 case ISD::FPOW
: return lowerPow(Op
, DAG
);
11797 case ISD::FSIN
: return lowerSin(Op
, DAG
);
11798 case ISD::FCOS
: return lowerCos(Op
, DAG
);
11799 case ISD::FLOG
: return lowerLog(Op
, DAG
);
11800 case ISD::FLOG10
: return lowerLog10(Op
, DAG
);
11801 case ISD::FEXP
: return lowerExp(Op
, DAG
);
11802 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
11803 case ISD::BlockAddress
: return LowerBlockAddress(Op
, DAG
);
11804 case ISD::GlobalAddress
: return LowerGlobalAddress(Op
, DAG
);
11805 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
11806 case ISD::JumpTable
: return LowerJumpTable(Op
, DAG
);
11807 case ISD::STRICT_FSETCC
:
11808 case ISD::STRICT_FSETCCS
:
11809 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
11810 case ISD::INIT_TRAMPOLINE
: return LowerINIT_TRAMPOLINE(Op
, DAG
);
11811 case ISD::ADJUST_TRAMPOLINE
: return LowerADJUST_TRAMPOLINE(Op
, DAG
);
11813 case ISD::INLINEASM
:
11814 case ISD::INLINEASM_BR
: return LowerINLINEASM(Op
, DAG
);
11815 // Variable argument lowering.
11816 case ISD::VASTART
: return LowerVASTART(Op
, DAG
);
11817 case ISD::VAARG
: return LowerVAARG(Op
, DAG
);
11818 case ISD::VACOPY
: return LowerVACOPY(Op
, DAG
);
11820 case ISD::STACKRESTORE
: return LowerSTACKRESTORE(Op
, DAG
);
11821 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
11822 case ISD::GET_DYNAMIC_AREA_OFFSET
:
11823 return LowerGET_DYNAMIC_AREA_OFFSET(Op
, DAG
);
11825 // Exception handling lowering.
11826 case ISD::EH_DWARF_CFA
: return LowerEH_DWARF_CFA(Op
, DAG
);
11827 case ISD::EH_SJLJ_SETJMP
: return lowerEH_SJLJ_SETJMP(Op
, DAG
);
11828 case ISD::EH_SJLJ_LONGJMP
: return lowerEH_SJLJ_LONGJMP(Op
, DAG
);
11830 case ISD::LOAD
: return LowerLOAD(Op
, DAG
);
11831 case ISD::STORE
: return LowerSTORE(Op
, DAG
);
11832 case ISD::TRUNCATE
: return LowerTRUNCATE(Op
, DAG
);
11833 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
11834 case ISD::STRICT_FP_TO_UINT
:
11835 case ISD::STRICT_FP_TO_SINT
:
11836 case ISD::FP_TO_UINT
:
11837 case ISD::FP_TO_SINT
: return LowerFP_TO_INT(Op
, DAG
, SDLoc(Op
));
11838 case ISD::STRICT_UINT_TO_FP
:
11839 case ISD::STRICT_SINT_TO_FP
:
11840 case ISD::UINT_TO_FP
:
11841 case ISD::SINT_TO_FP
: return LowerINT_TO_FP(Op
, DAG
);
11842 case ISD::GET_ROUNDING
: return LowerGET_ROUNDING(Op
, DAG
);
11844 // Lower 64-bit shifts.
11845 case ISD::SHL_PARTS
: return LowerSHL_PARTS(Op
, DAG
);
11846 case ISD::SRL_PARTS
: return LowerSRL_PARTS(Op
, DAG
);
11847 case ISD::SRA_PARTS
: return LowerSRA_PARTS(Op
, DAG
);
11849 case ISD::FSHL
: return LowerFunnelShift(Op
, DAG
);
11850 case ISD::FSHR
: return LowerFunnelShift(Op
, DAG
);
11852 // Vector-related lowering.
11853 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
11854 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
11855 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
11856 case ISD::SCALAR_TO_VECTOR
: return LowerSCALAR_TO_VECTOR(Op
, DAG
);
11857 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
11858 case ISD::MUL
: return LowerMUL(Op
, DAG
);
11859 case ISD::FP_EXTEND
: return LowerFP_EXTEND(Op
, DAG
);
11860 case ISD::STRICT_FP_ROUND
:
11861 case ISD::FP_ROUND
:
11862 return LowerFP_ROUND(Op
, DAG
);
11863 case ISD::ROTL
: return LowerROTL(Op
, DAG
);
11865 // For counter-based loop handling.
11866 case ISD::INTRINSIC_W_CHAIN
: return SDValue();
11868 case ISD::BITCAST
: return LowerBITCAST(Op
, DAG
);
11870 // Frame & Return address.
11871 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
11872 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
11874 case ISD::INTRINSIC_VOID
:
11875 return LowerINTRINSIC_VOID(Op
, DAG
);
11877 return LowerBSWAP(Op
, DAG
);
11878 case ISD::ATOMIC_CMP_SWAP
:
11879 return LowerATOMIC_CMP_SWAP(Op
, DAG
);
11880 case ISD::ATOMIC_STORE
:
11881 return LowerATOMIC_LOAD_STORE(Op
, DAG
);
11882 case ISD::IS_FPCLASS
:
11883 return LowerIS_FPCLASS(Op
, DAG
);
11887 void PPCTargetLowering::ReplaceNodeResults(SDNode
*N
,
11888 SmallVectorImpl
<SDValue
>&Results
,
11889 SelectionDAG
&DAG
) const {
11891 switch (N
->getOpcode()) {
11893 llvm_unreachable("Do not know how to custom type legalize this operation!");
11894 case ISD::ATOMIC_LOAD
: {
11895 SDValue Res
= LowerATOMIC_LOAD_STORE(SDValue(N
, 0), DAG
);
11896 Results
.push_back(Res
);
11897 Results
.push_back(Res
.getValue(1));
11900 case ISD::READCYCLECOUNTER
: {
11901 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
);
11902 SDValue RTB
= DAG
.getNode(PPCISD::READ_TIME_BASE
, dl
, VTs
, N
->getOperand(0));
11905 DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, RTB
, RTB
.getValue(1)));
11906 Results
.push_back(RTB
.getValue(2));
11909 case ISD::INTRINSIC_W_CHAIN
: {
11910 if (N
->getConstantOperandVal(1) != Intrinsic::loop_decrement
)
11913 assert(N
->getValueType(0) == MVT::i1
&&
11914 "Unexpected result type for CTR decrement intrinsic");
11915 EVT SVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(),
11916 N
->getValueType(0));
11917 SDVTList VTs
= DAG
.getVTList(SVT
, MVT::Other
);
11918 SDValue NewInt
= DAG
.getNode(N
->getOpcode(), dl
, VTs
, N
->getOperand(0),
11921 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, NewInt
));
11922 Results
.push_back(NewInt
.getValue(1));
11925 case ISD::INTRINSIC_WO_CHAIN
: {
11926 switch (N
->getConstantOperandVal(0)) {
11927 case Intrinsic::ppc_pack_longdouble
:
11928 Results
.push_back(DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::ppcf128
,
11929 N
->getOperand(2), N
->getOperand(1)));
11931 case Intrinsic::ppc_maxfe
:
11932 case Intrinsic::ppc_minfe
:
11933 case Intrinsic::ppc_fnmsub
:
11934 case Intrinsic::ppc_convert_f128_to_ppcf128
:
11935 Results
.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N
, 0), DAG
));
11941 if (!Subtarget
.isSVR4ABI() || Subtarget
.isPPC64())
11944 EVT VT
= N
->getValueType(0);
11946 if (VT
== MVT::i64
) {
11947 SDValue NewNode
= LowerVAARG(SDValue(N
, 1), DAG
);
11949 Results
.push_back(NewNode
);
11950 Results
.push_back(NewNode
.getValue(1));
11954 case ISD::STRICT_FP_TO_SINT
:
11955 case ISD::STRICT_FP_TO_UINT
:
11956 case ISD::FP_TO_SINT
:
11957 case ISD::FP_TO_UINT
: {
11958 // LowerFP_TO_INT() can only handle f32 and f64.
11959 if (N
->getOperand(N
->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11962 SDValue LoweredValue
= LowerFP_TO_INT(SDValue(N
, 0), DAG
, dl
);
11963 Results
.push_back(LoweredValue
);
11964 if (N
->isStrictFPOpcode())
11965 Results
.push_back(LoweredValue
.getValue(1));
11968 case ISD::TRUNCATE
: {
11969 if (!N
->getValueType(0).isVector())
11971 SDValue Lowered
= LowerTRUNCATEVector(SDValue(N
, 0), DAG
);
11973 Results
.push_back(Lowered
);
11978 // Don't handle funnel shifts here.
11981 // Don't handle bitcast here.
11983 case ISD::FP_EXTEND
:
11984 SDValue Lowered
= LowerFP_EXTEND(SDValue(N
, 0), DAG
);
11986 Results
.push_back(Lowered
);
11991 //===----------------------------------------------------------------------===//
11992 // Other Lowering Code
11993 //===----------------------------------------------------------------------===//
11995 static Instruction
*callIntrinsic(IRBuilderBase
&Builder
, Intrinsic::ID Id
) {
11996 Module
*M
= Builder
.GetInsertBlock()->getParent()->getParent();
11997 Function
*Func
= Intrinsic::getDeclaration(M
, Id
);
11998 return Builder
.CreateCall(Func
, {});
12001 // The mappings for emitLeading/TrailingFence is taken from
12002 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12003 Instruction
*PPCTargetLowering::emitLeadingFence(IRBuilderBase
&Builder
,
12005 AtomicOrdering Ord
) const {
12006 if (Ord
== AtomicOrdering::SequentiallyConsistent
)
12007 return callIntrinsic(Builder
, Intrinsic::ppc_sync
);
12008 if (isReleaseOrStronger(Ord
))
12009 return callIntrinsic(Builder
, Intrinsic::ppc_lwsync
);
12013 Instruction
*PPCTargetLowering::emitTrailingFence(IRBuilderBase
&Builder
,
12015 AtomicOrdering Ord
) const {
12016 if (Inst
->hasAtomicLoad() && isAcquireOrStronger(Ord
)) {
12017 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12018 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12019 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12020 if (isa
<LoadInst
>(Inst
))
12021 return Builder
.CreateCall(
12022 Intrinsic::getDeclaration(
12023 Builder
.GetInsertBlock()->getParent()->getParent(),
12024 Intrinsic::ppc_cfence
, {Inst
->getType()}),
12026 // FIXME: Can use isync for rmw operation.
12027 return callIntrinsic(Builder
, Intrinsic::ppc_lwsync
);
12032 MachineBasicBlock
*
12033 PPCTargetLowering::EmitAtomicBinary(MachineInstr
&MI
, MachineBasicBlock
*BB
,
12034 unsigned AtomicSize
,
12035 unsigned BinOpcode
,
12036 unsigned CmpOpcode
,
12037 unsigned CmpPred
) const {
12038 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12039 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
12041 auto LoadMnemonic
= PPC::LDARX
;
12042 auto StoreMnemonic
= PPC::STDCX
;
12043 switch (AtomicSize
) {
12045 llvm_unreachable("Unexpected size of atomic entity");
12047 LoadMnemonic
= PPC::LBARX
;
12048 StoreMnemonic
= PPC::STBCX
;
12049 assert(Subtarget
.hasPartwordAtomics() && "Call this only with size >=4");
12052 LoadMnemonic
= PPC::LHARX
;
12053 StoreMnemonic
= PPC::STHCX
;
12054 assert(Subtarget
.hasPartwordAtomics() && "Call this only with size >=4");
12057 LoadMnemonic
= PPC::LWARX
;
12058 StoreMnemonic
= PPC::STWCX
;
12061 LoadMnemonic
= PPC::LDARX
;
12062 StoreMnemonic
= PPC::STDCX
;
12066 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
12067 MachineFunction
*F
= BB
->getParent();
12068 MachineFunction::iterator It
= ++BB
->getIterator();
12070 Register dest
= MI
.getOperand(0).getReg();
12071 Register ptrA
= MI
.getOperand(1).getReg();
12072 Register ptrB
= MI
.getOperand(2).getReg();
12073 Register incr
= MI
.getOperand(3).getReg();
12074 DebugLoc dl
= MI
.getDebugLoc();
12076 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
12077 MachineBasicBlock
*loop2MBB
=
12078 CmpOpcode
? F
->CreateMachineBasicBlock(LLVM_BB
) : nullptr;
12079 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
12080 F
->insert(It
, loopMBB
);
12082 F
->insert(It
, loop2MBB
);
12083 F
->insert(It
, exitMBB
);
12084 exitMBB
->splice(exitMBB
->begin(), BB
,
12085 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
12086 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
12088 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
12089 Register TmpReg
= (!BinOpcode
) ? incr
:
12090 RegInfo
.createVirtualRegister( AtomicSize
== 8 ? &PPC::G8RCRegClass
12091 : &PPC::GPRCRegClass
);
12095 // fallthrough --> loopMBB
12096 BB
->addSuccessor(loopMBB
);
12099 // l[wd]arx dest, ptr
12100 // add r0, dest, incr
12101 // st[wd]cx. r0, ptr
12103 // fallthrough --> exitMBB
12107 // l[wd]arx dest, ptr
12108 // cmpl?[wd] dest, incr
12111 // st[wd]cx. dest, ptr
12113 // fallthrough --> exitMBB
12116 BuildMI(BB
, dl
, TII
->get(LoadMnemonic
), dest
)
12117 .addReg(ptrA
).addReg(ptrB
);
12119 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
).addReg(incr
).addReg(dest
);
12121 Register CrReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
12122 // Signed comparisons of byte or halfword values must be sign-extended.
12123 if (CmpOpcode
== PPC::CMPW
&& AtomicSize
< 4) {
12124 Register ExtReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
12125 BuildMI(BB
, dl
, TII
->get(AtomicSize
== 1 ? PPC::EXTSB
: PPC::EXTSH
),
12126 ExtReg
).addReg(dest
);
12127 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), CrReg
).addReg(ExtReg
).addReg(incr
);
12129 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), CrReg
).addReg(dest
).addReg(incr
);
12131 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
12135 BB
->addSuccessor(loop2MBB
);
12136 BB
->addSuccessor(exitMBB
);
12139 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
12140 .addReg(TmpReg
).addReg(ptrA
).addReg(ptrB
);
12141 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
12142 .addImm(PPC::PRED_NE
).addReg(PPC::CR0
).addMBB(loopMBB
);
12143 BB
->addSuccessor(loopMBB
);
12144 BB
->addSuccessor(exitMBB
);
12152 static bool isSignExtended(MachineInstr
&MI
, const PPCInstrInfo
*TII
) {
12153 switch(MI
.getOpcode()) {
12157 return TII
->isSignExtended(MI
.getOperand(1).getReg(),
12158 &MI
.getMF()->getRegInfo());
12182 case PPC::EXTSB8_32_64
:
12183 case PPC::EXTSB8_rec
:
12184 case PPC::EXTSB_rec
:
12187 case PPC::EXTSH8_32_64
:
12188 case PPC::EXTSH8_rec
:
12189 case PPC::EXTSH_rec
:
12191 case PPC::EXTSWSLI
:
12192 case PPC::EXTSWSLI_32_64
:
12193 case PPC::EXTSWSLI_32_64_rec
:
12194 case PPC::EXTSWSLI_rec
:
12195 case PPC::EXTSW_32
:
12196 case PPC::EXTSW_32_64
:
12197 case PPC::EXTSW_32_64_rec
:
12198 case PPC::EXTSW_rec
:
12201 case PPC::SRAWI_rec
:
12202 case PPC::SRAW_rec
:
12208 MachineBasicBlock
*PPCTargetLowering::EmitPartwordAtomicBinary(
12209 MachineInstr
&MI
, MachineBasicBlock
*BB
,
12210 bool is8bit
, // operation
12211 unsigned BinOpcode
, unsigned CmpOpcode
, unsigned CmpPred
) const {
12212 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12213 const PPCInstrInfo
*TII
= Subtarget
.getInstrInfo();
12215 // If this is a signed comparison and the value being compared is not known
12216 // to be sign extended, sign extend it here.
12217 DebugLoc dl
= MI
.getDebugLoc();
12218 MachineFunction
*F
= BB
->getParent();
12219 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
12220 Register incr
= MI
.getOperand(3).getReg();
12221 bool IsSignExtended
=
12222 incr
.isVirtual() && isSignExtended(*RegInfo
.getVRegDef(incr
), TII
);
12224 if (CmpOpcode
== PPC::CMPW
&& !IsSignExtended
) {
12225 Register ValueReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
12226 BuildMI(*BB
, MI
, dl
, TII
->get(is8bit
? PPC::EXTSB
: PPC::EXTSH
), ValueReg
)
12227 .addReg(MI
.getOperand(3).getReg());
12228 MI
.getOperand(3).setReg(ValueReg
);
12231 // If we support part-word atomic mnemonics, just use them
12232 if (Subtarget
.hasPartwordAtomics())
12233 return EmitAtomicBinary(MI
, BB
, is8bit
? 1 : 2, BinOpcode
, CmpOpcode
,
12236 // In 64 bit mode we have to use 64 bits for addresses, even though the
12237 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12238 // registers without caring whether they're 32 or 64, but here we're
12239 // doing actual arithmetic on the addresses.
12240 bool is64bit
= Subtarget
.isPPC64();
12241 bool isLittleEndian
= Subtarget
.isLittleEndian();
12242 unsigned ZeroReg
= is64bit
? PPC::ZERO8
: PPC::ZERO
;
12244 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
12245 MachineFunction::iterator It
= ++BB
->getIterator();
12247 Register dest
= MI
.getOperand(0).getReg();
12248 Register ptrA
= MI
.getOperand(1).getReg();
12249 Register ptrB
= MI
.getOperand(2).getReg();
12251 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
12252 MachineBasicBlock
*loop2MBB
=
12253 CmpOpcode
? F
->CreateMachineBasicBlock(LLVM_BB
) : nullptr;
12254 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
12255 F
->insert(It
, loopMBB
);
12257 F
->insert(It
, loop2MBB
);
12258 F
->insert(It
, exitMBB
);
12259 exitMBB
->splice(exitMBB
->begin(), BB
,
12260 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
12261 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
12263 const TargetRegisterClass
*RC
=
12264 is64bit
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
12265 const TargetRegisterClass
*GPRC
= &PPC::GPRCRegClass
;
12267 Register PtrReg
= RegInfo
.createVirtualRegister(RC
);
12268 Register Shift1Reg
= RegInfo
.createVirtualRegister(GPRC
);
12269 Register ShiftReg
=
12270 isLittleEndian
? Shift1Reg
: RegInfo
.createVirtualRegister(GPRC
);
12271 Register Incr2Reg
= RegInfo
.createVirtualRegister(GPRC
);
12272 Register MaskReg
= RegInfo
.createVirtualRegister(GPRC
);
12273 Register Mask2Reg
= RegInfo
.createVirtualRegister(GPRC
);
12274 Register Mask3Reg
= RegInfo
.createVirtualRegister(GPRC
);
12275 Register Tmp2Reg
= RegInfo
.createVirtualRegister(GPRC
);
12276 Register Tmp3Reg
= RegInfo
.createVirtualRegister(GPRC
);
12277 Register Tmp4Reg
= RegInfo
.createVirtualRegister(GPRC
);
12278 Register TmpDestReg
= RegInfo
.createVirtualRegister(GPRC
);
12279 Register SrwDestReg
= RegInfo
.createVirtualRegister(GPRC
);
12282 (!BinOpcode
) ? Incr2Reg
: RegInfo
.createVirtualRegister(GPRC
);
12286 // fallthrough --> loopMBB
12287 BB
->addSuccessor(loopMBB
);
12289 // The 4-byte load must be aligned, while a char or short may be
12290 // anywhere in the word. Hence all this nasty bookkeeping code.
12291 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12292 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12293 // xori shift, shift1, 24 [16]
12294 // rlwinm ptr, ptr1, 0, 0, 29
12295 // slw incr2, incr, shift
12296 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12297 // slw mask, mask2, shift
12299 // lwarx tmpDest, ptr
12300 // add tmp, tmpDest, incr2
12301 // andc tmp2, tmpDest, mask
12302 // and tmp3, tmp, mask
12303 // or tmp4, tmp3, tmp2
12304 // stwcx. tmp4, ptr
12306 // fallthrough --> exitMBB
12307 // srw SrwDest, tmpDest, shift
12308 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12309 if (ptrA
!= ZeroReg
) {
12310 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
12311 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
12317 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12319 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
)
12320 .addReg(Ptr1Reg
, 0, is64bit
? PPC::sub_32
: 0)
12323 .addImm(is8bit
? 28 : 27);
12324 if (!isLittleEndian
)
12325 BuildMI(BB
, dl
, TII
->get(PPC::XORI
), ShiftReg
)
12327 .addImm(is8bit
? 24 : 16);
12329 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
12334 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
12339 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), Incr2Reg
).addReg(incr
).addReg(ShiftReg
);
12341 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
12343 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
12344 BuildMI(BB
, dl
, TII
->get(PPC::ORI
), Mask2Reg
)
12348 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
12353 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
12357 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
)
12359 .addReg(TmpDestReg
);
12360 BuildMI(BB
, dl
, TII
->get(PPC::ANDC
), Tmp2Reg
)
12361 .addReg(TmpDestReg
)
12363 BuildMI(BB
, dl
, TII
->get(PPC::AND
), Tmp3Reg
).addReg(TmpReg
).addReg(MaskReg
);
12365 // For unsigned comparisons, we can directly compare the shifted values.
12366 // For signed comparisons we shift and sign extend.
12367 Register SReg
= RegInfo
.createVirtualRegister(GPRC
);
12368 Register CrReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
12369 BuildMI(BB
, dl
, TII
->get(PPC::AND
), SReg
)
12370 .addReg(TmpDestReg
)
12372 unsigned ValueReg
= SReg
;
12373 unsigned CmpReg
= Incr2Reg
;
12374 if (CmpOpcode
== PPC::CMPW
) {
12375 ValueReg
= RegInfo
.createVirtualRegister(GPRC
);
12376 BuildMI(BB
, dl
, TII
->get(PPC::SRW
), ValueReg
)
12379 Register ValueSReg
= RegInfo
.createVirtualRegister(GPRC
);
12380 BuildMI(BB
, dl
, TII
->get(is8bit
? PPC::EXTSB
: PPC::EXTSH
), ValueSReg
)
12382 ValueReg
= ValueSReg
;
12385 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), CrReg
).addReg(ValueReg
).addReg(CmpReg
);
12386 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
12390 BB
->addSuccessor(loop2MBB
);
12391 BB
->addSuccessor(exitMBB
);
12394 BuildMI(BB
, dl
, TII
->get(PPC::OR
), Tmp4Reg
).addReg(Tmp3Reg
).addReg(Tmp2Reg
);
12395 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
12399 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
12400 .addImm(PPC::PRED_NE
)
12403 BB
->addSuccessor(loopMBB
);
12404 BB
->addSuccessor(exitMBB
);
12409 // Since the shift amount is not a constant, we need to clear
12410 // the upper bits with a separate RLWINM.
12411 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::RLWINM
), dest
)
12412 .addReg(SrwDestReg
)
12414 .addImm(is8bit
? 24 : 16)
12416 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::SRW
), SrwDestReg
)
12417 .addReg(TmpDestReg
)
12422 llvm::MachineBasicBlock
*
12423 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr
&MI
,
12424 MachineBasicBlock
*MBB
) const {
12425 DebugLoc DL
= MI
.getDebugLoc();
12426 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
12427 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
12429 MachineFunction
*MF
= MBB
->getParent();
12430 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
12432 const BasicBlock
*BB
= MBB
->getBasicBlock();
12433 MachineFunction::iterator I
= ++MBB
->getIterator();
12435 Register DstReg
= MI
.getOperand(0).getReg();
12436 const TargetRegisterClass
*RC
= MRI
.getRegClass(DstReg
);
12437 assert(TRI
->isTypeLegalForClass(*RC
, MVT::i32
) && "Invalid destination!");
12438 Register mainDstReg
= MRI
.createVirtualRegister(RC
);
12439 Register restoreDstReg
= MRI
.createVirtualRegister(RC
);
12441 MVT PVT
= getPointerTy(MF
->getDataLayout());
12442 assert((PVT
== MVT::i64
|| PVT
== MVT::i32
) &&
12443 "Invalid Pointer Size!");
12444 // For v = setjmp(buf), we generate
12447 // SjLjSetup mainMBB
12453 // buf[LabelOffset] = LR
12457 // v = phi(main, restore)
12460 MachineBasicBlock
*thisMBB
= MBB
;
12461 MachineBasicBlock
*mainMBB
= MF
->CreateMachineBasicBlock(BB
);
12462 MachineBasicBlock
*sinkMBB
= MF
->CreateMachineBasicBlock(BB
);
12463 MF
->insert(I
, mainMBB
);
12464 MF
->insert(I
, sinkMBB
);
12466 MachineInstrBuilder MIB
;
12468 // Transfer the remainder of BB and its successor edges to sinkMBB.
12469 sinkMBB
->splice(sinkMBB
->begin(), MBB
,
12470 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
12471 sinkMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
12473 // Note that the structure of the jmp_buf used here is not compatible
12474 // with that used by libc, and is not designed to be. Specifically, it
12475 // stores only those 'reserved' registers that LLVM does not otherwise
12476 // understand how to spill. Also, by convention, by the time this
12477 // intrinsic is called, Clang has already stored the frame address in the
12478 // first slot of the buffer and stack address in the third. Following the
12479 // X86 target code, we'll store the jump address in the second slot. We also
12480 // need to save the TOC pointer (R2) to handle jumps between shared
12481 // libraries, and that will be stored in the fourth slot. The thread
12482 // identifier (R13) is not affected.
12485 const int64_t LabelOffset
= 1 * PVT
.getStoreSize();
12486 const int64_t TOCOffset
= 3 * PVT
.getStoreSize();
12487 const int64_t BPOffset
= 4 * PVT
.getStoreSize();
12489 // Prepare IP either in reg.
12490 const TargetRegisterClass
*PtrRC
= getRegClassFor(PVT
);
12491 Register LabelReg
= MRI
.createVirtualRegister(PtrRC
);
12492 Register BufReg
= MI
.getOperand(1).getReg();
12494 if (Subtarget
.is64BitELFABI()) {
12495 setUsesTOCBasePtr(*MBB
->getParent());
12496 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::STD
))
12503 // Naked functions never have a base pointer, and so we use r1. For all
12504 // other functions, this decision must be delayed until during PEI.
12506 if (MF
->getFunction().hasFnAttribute(Attribute::Naked
))
12507 BaseReg
= Subtarget
.isPPC64() ? PPC::X1
: PPC::R1
;
12509 BaseReg
= Subtarget
.isPPC64() ? PPC::BP8
: PPC::BP
;
12511 MIB
= BuildMI(*thisMBB
, MI
, DL
,
12512 TII
->get(Subtarget
.isPPC64() ? PPC::STD
: PPC::STW
))
12519 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::BCLalways
)).addMBB(mainMBB
);
12520 MIB
.addRegMask(TRI
->getNoPreservedMask());
12522 BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::LI
), restoreDstReg
).addImm(1);
12524 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::EH_SjLj_Setup
))
12526 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::B
)).addMBB(sinkMBB
);
12528 thisMBB
->addSuccessor(mainMBB
, BranchProbability::getZero());
12529 thisMBB
->addSuccessor(sinkMBB
, BranchProbability::getOne());
12534 BuildMI(mainMBB
, DL
,
12535 TII
->get(Subtarget
.isPPC64() ? PPC::MFLR8
: PPC::MFLR
), LabelReg
);
12538 if (Subtarget
.isPPC64()) {
12539 MIB
= BuildMI(mainMBB
, DL
, TII
->get(PPC::STD
))
12541 .addImm(LabelOffset
)
12544 MIB
= BuildMI(mainMBB
, DL
, TII
->get(PPC::STW
))
12546 .addImm(LabelOffset
)
12549 MIB
.cloneMemRefs(MI
);
12551 BuildMI(mainMBB
, DL
, TII
->get(PPC::LI
), mainDstReg
).addImm(0);
12552 mainMBB
->addSuccessor(sinkMBB
);
12555 BuildMI(*sinkMBB
, sinkMBB
->begin(), DL
,
12556 TII
->get(PPC::PHI
), DstReg
)
12557 .addReg(mainDstReg
).addMBB(mainMBB
)
12558 .addReg(restoreDstReg
).addMBB(thisMBB
);
12560 MI
.eraseFromParent();
12564 MachineBasicBlock
*
12565 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr
&MI
,
12566 MachineBasicBlock
*MBB
) const {
12567 DebugLoc DL
= MI
.getDebugLoc();
12568 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
12570 MachineFunction
*MF
= MBB
->getParent();
12571 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
12573 MVT PVT
= getPointerTy(MF
->getDataLayout());
12574 assert((PVT
== MVT::i64
|| PVT
== MVT::i32
) &&
12575 "Invalid Pointer Size!");
12577 const TargetRegisterClass
*RC
=
12578 (PVT
== MVT::i64
) ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
12579 Register Tmp
= MRI
.createVirtualRegister(RC
);
12580 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12581 unsigned FP
= (PVT
== MVT::i64
) ? PPC::X31
: PPC::R31
;
12582 unsigned SP
= (PVT
== MVT::i64
) ? PPC::X1
: PPC::R1
;
12586 : (Subtarget
.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12589 MachineInstrBuilder MIB
;
12591 const int64_t LabelOffset
= 1 * PVT
.getStoreSize();
12592 const int64_t SPOffset
= 2 * PVT
.getStoreSize();
12593 const int64_t TOCOffset
= 3 * PVT
.getStoreSize();
12594 const int64_t BPOffset
= 4 * PVT
.getStoreSize();
12596 Register BufReg
= MI
.getOperand(0).getReg();
12598 // Reload FP (the jumped-to function may not have had a
12599 // frame pointer, and if so, then its r31 will be restored
12601 if (PVT
== MVT::i64
) {
12602 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), FP
)
12606 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), FP
)
12610 MIB
.cloneMemRefs(MI
);
12613 if (PVT
== MVT::i64
) {
12614 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), Tmp
)
12615 .addImm(LabelOffset
)
12618 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), Tmp
)
12619 .addImm(LabelOffset
)
12622 MIB
.cloneMemRefs(MI
);
12625 if (PVT
== MVT::i64
) {
12626 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), SP
)
12630 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), SP
)
12634 MIB
.cloneMemRefs(MI
);
12637 if (PVT
== MVT::i64
) {
12638 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), BP
)
12642 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), BP
)
12646 MIB
.cloneMemRefs(MI
);
12649 if (PVT
== MVT::i64
&& Subtarget
.isSVR4ABI()) {
12650 setUsesTOCBasePtr(*MBB
->getParent());
12651 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), PPC::X2
)
12658 BuildMI(*MBB
, MI
, DL
,
12659 TII
->get(PVT
== MVT::i64
? PPC::MTCTR8
: PPC::MTCTR
)).addReg(Tmp
);
12660 BuildMI(*MBB
, MI
, DL
, TII
->get(PVT
== MVT::i64
? PPC::BCTR8
: PPC::BCTR
));
12662 MI
.eraseFromParent();
12666 bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction
&MF
) const {
12667 // If the function specifically requests inline stack probes, emit them.
12668 if (MF
.getFunction().hasFnAttribute("probe-stack"))
12669 return MF
.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12674 unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction
&MF
) const {
12675 const TargetFrameLowering
*TFI
= Subtarget
.getFrameLowering();
12676 unsigned StackAlign
= TFI
->getStackAlignment();
12677 assert(StackAlign
>= 1 && isPowerOf2_32(StackAlign
) &&
12678 "Unexpected stack alignment");
12679 // The default stack probe size is 4096 if the function has no
12680 // stack-probe-size attribute.
12681 const Function
&Fn
= MF
.getFunction();
12682 unsigned StackProbeSize
=
12683 Fn
.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12684 // Round down to the stack alignment.
12685 StackProbeSize
&= ~(StackAlign
- 1);
12686 return StackProbeSize
? StackProbeSize
: StackAlign
;
12689 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12690 // into three phases. In the first phase, it uses pseudo instruction
12691 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12692 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12693 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12694 // MaxCallFrameSize so that it can calculate correct data area pointer.
12695 MachineBasicBlock
*
12696 PPCTargetLowering::emitProbedAlloca(MachineInstr
&MI
,
12697 MachineBasicBlock
*MBB
) const {
12698 const bool isPPC64
= Subtarget
.isPPC64();
12699 MachineFunction
*MF
= MBB
->getParent();
12700 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
12701 DebugLoc DL
= MI
.getDebugLoc();
12702 const unsigned ProbeSize
= getStackProbeSize(*MF
);
12703 const BasicBlock
*ProbedBB
= MBB
->getBasicBlock();
12704 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
12705 // The CFG of probing stack looks as
12711 // +--->+ TestMBB +---+
12714 // | +-----v----+ |
12715 // +---+ BlockMBB | |
12721 // In MBB, calculate previous frame pointer and final stack pointer.
12722 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12723 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12724 // TailMBB is spliced via \p MI.
12725 MachineBasicBlock
*TestMBB
= MF
->CreateMachineBasicBlock(ProbedBB
);
12726 MachineBasicBlock
*TailMBB
= MF
->CreateMachineBasicBlock(ProbedBB
);
12727 MachineBasicBlock
*BlockMBB
= MF
->CreateMachineBasicBlock(ProbedBB
);
12729 MachineFunction::iterator MBBIter
= ++MBB
->getIterator();
12730 MF
->insert(MBBIter
, TestMBB
);
12731 MF
->insert(MBBIter
, BlockMBB
);
12732 MF
->insert(MBBIter
, TailMBB
);
12734 const TargetRegisterClass
*G8RC
= &PPC::G8RCRegClass
;
12735 const TargetRegisterClass
*GPRC
= &PPC::GPRCRegClass
;
12737 Register DstReg
= MI
.getOperand(0).getReg();
12738 Register NegSizeReg
= MI
.getOperand(1).getReg();
12739 Register SPReg
= isPPC64
? PPC::X1
: PPC::R1
;
12740 Register FinalStackPtr
= MRI
.createVirtualRegister(isPPC64
? G8RC
: GPRC
);
12741 Register FramePointer
= MRI
.createVirtualRegister(isPPC64
? G8RC
: GPRC
);
12742 Register ActualNegSizeReg
= MRI
.createVirtualRegister(isPPC64
? G8RC
: GPRC
);
12744 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12745 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12748 if (!MRI
.hasOneNonDBGUse(NegSizeReg
))
12750 isPPC64
? PPC::PREPARE_PROBED_ALLOCA_64
: PPC::PREPARE_PROBED_ALLOCA_32
;
12752 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12753 // and NegSizeReg will be allocated in the same phyreg to avoid
12754 // redundant copy when NegSizeReg has only one use which is current MI and
12755 // will be replaced by PREPARE_PROBED_ALLOCA then.
12756 ProbeOpc
= isPPC64
? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12757 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32
;
12758 BuildMI(*MBB
, {MI
}, DL
, TII
->get(ProbeOpc
), FramePointer
)
12759 .addDef(ActualNegSizeReg
)
12760 .addReg(NegSizeReg
)
12761 .add(MI
.getOperand(2))
12762 .add(MI
.getOperand(3));
12764 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12765 BuildMI(*MBB
, {MI
}, DL
, TII
->get(isPPC64
? PPC::ADD8
: PPC::ADD4
),
12768 .addReg(ActualNegSizeReg
);
12770 // Materialize a scratch register for update.
12771 int64_t NegProbeSize
= -(int64_t)ProbeSize
;
12772 assert(isInt
<32>(NegProbeSize
) && "Unhandled probe size!");
12773 Register ScratchReg
= MRI
.createVirtualRegister(isPPC64
? G8RC
: GPRC
);
12774 if (!isInt
<16>(NegProbeSize
)) {
12775 Register TempReg
= MRI
.createVirtualRegister(isPPC64
? G8RC
: GPRC
);
12776 BuildMI(*MBB
, {MI
}, DL
, TII
->get(isPPC64
? PPC::LIS8
: PPC::LIS
), TempReg
)
12777 .addImm(NegProbeSize
>> 16);
12778 BuildMI(*MBB
, {MI
}, DL
, TII
->get(isPPC64
? PPC::ORI8
: PPC::ORI
),
12781 .addImm(NegProbeSize
& 0xFFFF);
12783 BuildMI(*MBB
, {MI
}, DL
, TII
->get(isPPC64
? PPC::LI8
: PPC::LI
), ScratchReg
)
12784 .addImm(NegProbeSize
);
12787 // Probing leading residual part.
12788 Register Div
= MRI
.createVirtualRegister(isPPC64
? G8RC
: GPRC
);
12789 BuildMI(*MBB
, {MI
}, DL
, TII
->get(isPPC64
? PPC::DIVD
: PPC::DIVW
), Div
)
12790 .addReg(ActualNegSizeReg
)
12791 .addReg(ScratchReg
);
12792 Register Mul
= MRI
.createVirtualRegister(isPPC64
? G8RC
: GPRC
);
12793 BuildMI(*MBB
, {MI
}, DL
, TII
->get(isPPC64
? PPC::MULLD
: PPC::MULLW
), Mul
)
12795 .addReg(ScratchReg
);
12796 Register NegMod
= MRI
.createVirtualRegister(isPPC64
? G8RC
: GPRC
);
12797 BuildMI(*MBB
, {MI
}, DL
, TII
->get(isPPC64
? PPC::SUBF8
: PPC::SUBF
), NegMod
)
12799 .addReg(ActualNegSizeReg
);
12800 BuildMI(*MBB
, {MI
}, DL
, TII
->get(isPPC64
? PPC::STDUX
: PPC::STWUX
), SPReg
)
12801 .addReg(FramePointer
)
12807 // Remaining part should be multiple of ProbeSize.
12808 Register CmpResult
= MRI
.createVirtualRegister(&PPC::CRRCRegClass
);
12809 BuildMI(TestMBB
, DL
, TII
->get(isPPC64
? PPC::CMPD
: PPC::CMPW
), CmpResult
)
12811 .addReg(FinalStackPtr
);
12812 BuildMI(TestMBB
, DL
, TII
->get(PPC::BCC
))
12813 .addImm(PPC::PRED_EQ
)
12816 TestMBB
->addSuccessor(BlockMBB
);
12817 TestMBB
->addSuccessor(TailMBB
);
12821 // Touch the block.
12823 BuildMI(BlockMBB
, DL
, TII
->get(isPPC64
? PPC::STDUX
: PPC::STWUX
), SPReg
)
12824 .addReg(FramePointer
)
12826 .addReg(ScratchReg
);
12827 BuildMI(BlockMBB
, DL
, TII
->get(PPC::B
)).addMBB(TestMBB
);
12828 BlockMBB
->addSuccessor(TestMBB
);
12831 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12832 // DYNAREAOFFSET pseudo instruction to get the future result.
12833 Register MaxCallFrameSizeReg
=
12834 MRI
.createVirtualRegister(isPPC64
? G8RC
: GPRC
);
12835 BuildMI(TailMBB
, DL
,
12836 TII
->get(isPPC64
? PPC::DYNAREAOFFSET8
: PPC::DYNAREAOFFSET
),
12837 MaxCallFrameSizeReg
)
12838 .add(MI
.getOperand(2))
12839 .add(MI
.getOperand(3));
12840 BuildMI(TailMBB
, DL
, TII
->get(isPPC64
? PPC::ADD8
: PPC::ADD4
), DstReg
)
12842 .addReg(MaxCallFrameSizeReg
);
12844 // Splice instructions after MI to TailMBB.
12845 TailMBB
->splice(TailMBB
->end(), MBB
,
12846 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
12847 TailMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
12848 MBB
->addSuccessor(TestMBB
);
12850 // Delete the pseudo instruction.
12851 MI
.eraseFromParent();
12853 ++NumDynamicAllocaProbed
;
12857 static bool IsSelectCC(MachineInstr
&MI
) {
12858 switch (MI
.getOpcode()) {
12859 case PPC::SELECT_CC_I4
:
12860 case PPC::SELECT_CC_I8
:
12861 case PPC::SELECT_CC_F4
:
12862 case PPC::SELECT_CC_F8
:
12863 case PPC::SELECT_CC_F16
:
12864 case PPC::SELECT_CC_VRRC
:
12865 case PPC::SELECT_CC_VSFRC
:
12866 case PPC::SELECT_CC_VSSRC
:
12867 case PPC::SELECT_CC_VSRC
:
12868 case PPC::SELECT_CC_SPE4
:
12869 case PPC::SELECT_CC_SPE
:
12876 static bool IsSelect(MachineInstr
&MI
) {
12877 switch (MI
.getOpcode()) {
12878 case PPC::SELECT_I4
:
12879 case PPC::SELECT_I8
:
12880 case PPC::SELECT_F4
:
12881 case PPC::SELECT_F8
:
12882 case PPC::SELECT_F16
:
12883 case PPC::SELECT_SPE
:
12884 case PPC::SELECT_SPE4
:
12885 case PPC::SELECT_VRRC
:
12886 case PPC::SELECT_VSFRC
:
12887 case PPC::SELECT_VSSRC
:
12888 case PPC::SELECT_VSRC
:
12895 MachineBasicBlock
*
12896 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
12897 MachineBasicBlock
*BB
) const {
12898 if (MI
.getOpcode() == TargetOpcode::STACKMAP
||
12899 MI
.getOpcode() == TargetOpcode::PATCHPOINT
) {
12900 if (Subtarget
.is64BitELFABI() &&
12901 MI
.getOpcode() == TargetOpcode::PATCHPOINT
&&
12902 !Subtarget
.isUsingPCRelativeCalls()) {
12903 // Call lowering should have added an r2 operand to indicate a dependence
12904 // on the TOC base pointer value. It can't however, because there is no
12905 // way to mark the dependence as implicit there, and so the stackmap code
12906 // will confuse it with a regular operand. Instead, add the dependence
12908 MI
.addOperand(MachineOperand::CreateReg(PPC::X2
, false, true));
12911 return emitPatchPoint(MI
, BB
);
12914 if (MI
.getOpcode() == PPC::EH_SjLj_SetJmp32
||
12915 MI
.getOpcode() == PPC::EH_SjLj_SetJmp64
) {
12916 return emitEHSjLjSetJmp(MI
, BB
);
12917 } else if (MI
.getOpcode() == PPC::EH_SjLj_LongJmp32
||
12918 MI
.getOpcode() == PPC::EH_SjLj_LongJmp64
) {
12919 return emitEHSjLjLongJmp(MI
, BB
);
12922 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
12924 // To "insert" these instructions we actually have to insert their
12925 // control-flow patterns.
12926 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
12927 MachineFunction::iterator It
= ++BB
->getIterator();
12929 MachineFunction
*F
= BB
->getParent();
12930 MachineRegisterInfo
&MRI
= F
->getRegInfo();
12932 if (Subtarget
.hasISEL() &&
12933 (MI
.getOpcode() == PPC::SELECT_CC_I4
||
12934 MI
.getOpcode() == PPC::SELECT_CC_I8
||
12935 MI
.getOpcode() == PPC::SELECT_I4
|| MI
.getOpcode() == PPC::SELECT_I8
)) {
12936 SmallVector
<MachineOperand
, 2> Cond
;
12937 if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
12938 MI
.getOpcode() == PPC::SELECT_CC_I8
)
12939 Cond
.push_back(MI
.getOperand(4));
12941 Cond
.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET
));
12942 Cond
.push_back(MI
.getOperand(1));
12944 DebugLoc dl
= MI
.getDebugLoc();
12945 TII
->insertSelect(*BB
, MI
, dl
, MI
.getOperand(0).getReg(), Cond
,
12946 MI
.getOperand(2).getReg(), MI
.getOperand(3).getReg());
12947 } else if (IsSelectCC(MI
) || IsSelect(MI
)) {
12948 // The incoming instruction knows the destination vreg to set, the
12949 // condition code register to branch on, the true/false values to
12950 // select between, and a branch opcode to use.
12955 // cmpTY ccX, r1, r2
12957 // fallthrough --> copy0MBB
12958 MachineBasicBlock
*thisMBB
= BB
;
12959 MachineBasicBlock
*copy0MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
12960 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
12961 DebugLoc dl
= MI
.getDebugLoc();
12962 F
->insert(It
, copy0MBB
);
12963 F
->insert(It
, sinkMBB
);
12965 // Set the call frame size on entry to the new basic blocks.
12966 // See https://reviews.llvm.org/D156113.
12967 unsigned CallFrameSize
= TII
->getCallFrameSizeAt(MI
);
12968 copy0MBB
->setCallFrameSize(CallFrameSize
);
12969 sinkMBB
->setCallFrameSize(CallFrameSize
);
12971 // Transfer the remainder of BB and its successor edges to sinkMBB.
12972 sinkMBB
->splice(sinkMBB
->begin(), BB
,
12973 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
12974 sinkMBB
->transferSuccessorsAndUpdatePHIs(BB
);
12976 // Next, add the true and fallthrough blocks as its successors.
12977 BB
->addSuccessor(copy0MBB
);
12978 BB
->addSuccessor(sinkMBB
);
12980 if (IsSelect(MI
)) {
12981 BuildMI(BB
, dl
, TII
->get(PPC::BC
))
12982 .addReg(MI
.getOperand(1).getReg())
12985 unsigned SelectPred
= MI
.getOperand(4).getImm();
12986 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
12987 .addImm(SelectPred
)
12988 .addReg(MI
.getOperand(1).getReg())
12993 // %FalseValue = ...
12994 // # fallthrough to sinkMBB
12997 // Update machine-CFG edges
12998 BB
->addSuccessor(sinkMBB
);
13001 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13004 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::PHI
), MI
.getOperand(0).getReg())
13005 .addReg(MI
.getOperand(3).getReg())
13007 .addReg(MI
.getOperand(2).getReg())
13009 } else if (MI
.getOpcode() == PPC::ReadTB
) {
13010 // To read the 64-bit time-base register on a 32-bit target, we read the
13011 // two halves. Should the counter have wrapped while it was being read, we
13012 // need to try again.
13015 // mfspr Rx,TBU # load from TBU
13016 // mfspr Ry,TB # load from TB
13017 // mfspr Rz,TBU # load from TBU
13018 // cmpw crX,Rx,Rz # check if 'old'='new'
13019 // bne readLoop # branch if they're not equal
13022 MachineBasicBlock
*readMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
13023 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
13024 DebugLoc dl
= MI
.getDebugLoc();
13025 F
->insert(It
, readMBB
);
13026 F
->insert(It
, sinkMBB
);
13028 // Transfer the remainder of BB and its successor edges to sinkMBB.
13029 sinkMBB
->splice(sinkMBB
->begin(), BB
,
13030 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
13031 sinkMBB
->transferSuccessorsAndUpdatePHIs(BB
);
13033 BB
->addSuccessor(readMBB
);
13036 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
13037 Register ReadAgainReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
13038 Register LoReg
= MI
.getOperand(0).getReg();
13039 Register HiReg
= MI
.getOperand(1).getReg();
13041 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), HiReg
).addImm(269);
13042 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), LoReg
).addImm(268);
13043 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), ReadAgainReg
).addImm(269);
13045 Register CmpReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
13047 BuildMI(BB
, dl
, TII
->get(PPC::CMPW
), CmpReg
)
13049 .addReg(ReadAgainReg
);
13050 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
13051 .addImm(PPC::PRED_NE
)
13055 BB
->addSuccessor(readMBB
);
13056 BB
->addSuccessor(sinkMBB
);
13057 } else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8
)
13058 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::ADD4
);
13059 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16
)
13060 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::ADD4
);
13061 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32
)
13062 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::ADD4
);
13063 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64
)
13064 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::ADD8
);
13066 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I8
)
13067 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::AND
);
13068 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I16
)
13069 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::AND
);
13070 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I32
)
13071 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::AND
);
13072 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I64
)
13073 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::AND8
);
13075 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I8
)
13076 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::OR
);
13077 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I16
)
13078 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::OR
);
13079 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I32
)
13080 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::OR
);
13081 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I64
)
13082 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::OR8
);
13084 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8
)
13085 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::XOR
);
13086 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16
)
13087 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::XOR
);
13088 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32
)
13089 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::XOR
);
13090 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64
)
13091 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::XOR8
);
13093 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8
)
13094 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::NAND
);
13095 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16
)
13096 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::NAND
);
13097 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32
)
13098 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::NAND
);
13099 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64
)
13100 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::NAND8
);
13102 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8
)
13103 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::SUBF
);
13104 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16
)
13105 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::SUBF
);
13106 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32
)
13107 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::SUBF
);
13108 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64
)
13109 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::SUBF8
);
13111 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8
)
13112 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPW
, PPC::PRED_LT
);
13113 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16
)
13114 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPW
, PPC::PRED_LT
);
13115 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32
)
13116 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPW
, PPC::PRED_LT
);
13117 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64
)
13118 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPD
, PPC::PRED_LT
);
13120 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8
)
13121 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPW
, PPC::PRED_GT
);
13122 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16
)
13123 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPW
, PPC::PRED_GT
);
13124 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32
)
13125 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPW
, PPC::PRED_GT
);
13126 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64
)
13127 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPD
, PPC::PRED_GT
);
13129 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8
)
13130 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPLW
, PPC::PRED_LT
);
13131 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16
)
13132 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPLW
, PPC::PRED_LT
);
13133 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32
)
13134 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPLW
, PPC::PRED_LT
);
13135 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64
)
13136 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPLD
, PPC::PRED_LT
);
13138 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8
)
13139 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPLW
, PPC::PRED_GT
);
13140 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16
)
13141 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPLW
, PPC::PRED_GT
);
13142 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32
)
13143 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPLW
, PPC::PRED_GT
);
13144 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64
)
13145 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPLD
, PPC::PRED_GT
);
13147 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I8
)
13148 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0);
13149 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I16
)
13150 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0);
13151 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I32
)
13152 BB
= EmitAtomicBinary(MI
, BB
, 4, 0);
13153 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I64
)
13154 BB
= EmitAtomicBinary(MI
, BB
, 8, 0);
13155 else if (MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32
||
13156 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
||
13157 (Subtarget
.hasPartwordAtomics() &&
13158 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
) ||
13159 (Subtarget
.hasPartwordAtomics() &&
13160 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16
)) {
13161 bool is64bit
= MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
;
13163 auto LoadMnemonic
= PPC::LDARX
;
13164 auto StoreMnemonic
= PPC::STDCX
;
13165 switch (MI
.getOpcode()) {
13167 llvm_unreachable("Compare and swap of unknown size");
13168 case PPC::ATOMIC_CMP_SWAP_I8
:
13169 LoadMnemonic
= PPC::LBARX
;
13170 StoreMnemonic
= PPC::STBCX
;
13171 assert(Subtarget
.hasPartwordAtomics() && "No support partword atomics.");
13173 case PPC::ATOMIC_CMP_SWAP_I16
:
13174 LoadMnemonic
= PPC::LHARX
;
13175 StoreMnemonic
= PPC::STHCX
;
13176 assert(Subtarget
.hasPartwordAtomics() && "No support partword atomics.");
13178 case PPC::ATOMIC_CMP_SWAP_I32
:
13179 LoadMnemonic
= PPC::LWARX
;
13180 StoreMnemonic
= PPC::STWCX
;
13182 case PPC::ATOMIC_CMP_SWAP_I64
:
13183 LoadMnemonic
= PPC::LDARX
;
13184 StoreMnemonic
= PPC::STDCX
;
13187 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
13188 Register dest
= MI
.getOperand(0).getReg();
13189 Register ptrA
= MI
.getOperand(1).getReg();
13190 Register ptrB
= MI
.getOperand(2).getReg();
13191 Register CrReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
13192 Register oldval
= MI
.getOperand(3).getReg();
13193 Register newval
= MI
.getOperand(4).getReg();
13194 DebugLoc dl
= MI
.getDebugLoc();
13196 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
13197 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
13198 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
13199 F
->insert(It
, loop1MBB
);
13200 F
->insert(It
, loop2MBB
);
13201 F
->insert(It
, exitMBB
);
13202 exitMBB
->splice(exitMBB
->begin(), BB
,
13203 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
13204 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
13208 // fallthrough --> loopMBB
13209 BB
->addSuccessor(loop1MBB
);
13212 // l[bhwd]arx dest, ptr
13213 // cmp[wd] dest, oldval
13216 // st[bhwd]cx. newval, ptr
13221 BuildMI(BB
, dl
, TII
->get(LoadMnemonic
), dest
).addReg(ptrA
).addReg(ptrB
);
13222 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::CMPD
: PPC::CMPW
), CrReg
)
13225 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
13226 .addImm(PPC::PRED_NE
)
13229 BB
->addSuccessor(loop2MBB
);
13230 BB
->addSuccessor(exitMBB
);
13233 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
13237 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
13238 .addImm(PPC::PRED_NE
)
13241 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
13242 BB
->addSuccessor(loop1MBB
);
13243 BB
->addSuccessor(exitMBB
);
13248 } else if (MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
||
13249 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16
) {
13250 // We must use 64-bit registers for addresses when targeting 64-bit,
13251 // since we're actually doing arithmetic on them. Other registers
13253 bool is64bit
= Subtarget
.isPPC64();
13254 bool isLittleEndian
= Subtarget
.isLittleEndian();
13255 bool is8bit
= MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
;
13257 Register dest
= MI
.getOperand(0).getReg();
13258 Register ptrA
= MI
.getOperand(1).getReg();
13259 Register ptrB
= MI
.getOperand(2).getReg();
13260 Register oldval
= MI
.getOperand(3).getReg();
13261 Register newval
= MI
.getOperand(4).getReg();
13262 DebugLoc dl
= MI
.getDebugLoc();
13264 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
13265 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
13266 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
13267 F
->insert(It
, loop1MBB
);
13268 F
->insert(It
, loop2MBB
);
13269 F
->insert(It
, exitMBB
);
13270 exitMBB
->splice(exitMBB
->begin(), BB
,
13271 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
13272 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
13274 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
13275 const TargetRegisterClass
*RC
=
13276 is64bit
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
13277 const TargetRegisterClass
*GPRC
= &PPC::GPRCRegClass
;
13279 Register PtrReg
= RegInfo
.createVirtualRegister(RC
);
13280 Register Shift1Reg
= RegInfo
.createVirtualRegister(GPRC
);
13281 Register ShiftReg
=
13282 isLittleEndian
? Shift1Reg
: RegInfo
.createVirtualRegister(GPRC
);
13283 Register NewVal2Reg
= RegInfo
.createVirtualRegister(GPRC
);
13284 Register NewVal3Reg
= RegInfo
.createVirtualRegister(GPRC
);
13285 Register OldVal2Reg
= RegInfo
.createVirtualRegister(GPRC
);
13286 Register OldVal3Reg
= RegInfo
.createVirtualRegister(GPRC
);
13287 Register MaskReg
= RegInfo
.createVirtualRegister(GPRC
);
13288 Register Mask2Reg
= RegInfo
.createVirtualRegister(GPRC
);
13289 Register Mask3Reg
= RegInfo
.createVirtualRegister(GPRC
);
13290 Register Tmp2Reg
= RegInfo
.createVirtualRegister(GPRC
);
13291 Register Tmp4Reg
= RegInfo
.createVirtualRegister(GPRC
);
13292 Register TmpDestReg
= RegInfo
.createVirtualRegister(GPRC
);
13294 Register TmpReg
= RegInfo
.createVirtualRegister(GPRC
);
13295 Register ZeroReg
= is64bit
? PPC::ZERO8
: PPC::ZERO
;
13296 Register CrReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
13299 // fallthrough --> loopMBB
13300 BB
->addSuccessor(loop1MBB
);
13302 // The 4-byte load must be aligned, while a char or short may be
13303 // anywhere in the word. Hence all this nasty bookkeeping code.
13304 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13305 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13306 // xori shift, shift1, 24 [16]
13307 // rlwinm ptr, ptr1, 0, 0, 29
13308 // slw newval2, newval, shift
13309 // slw oldval2, oldval,shift
13310 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13311 // slw mask, mask2, shift
13312 // and newval3, newval2, mask
13313 // and oldval3, oldval2, mask
13315 // lwarx tmpDest, ptr
13316 // and tmp, tmpDest, mask
13317 // cmpw tmp, oldval3
13320 // andc tmp2, tmpDest, mask
13321 // or tmp4, tmp2, newval3
13322 // stwcx. tmp4, ptr
13326 // srw dest, tmpDest, shift
13327 if (ptrA
!= ZeroReg
) {
13328 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
13329 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
13336 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13338 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
)
13339 .addReg(Ptr1Reg
, 0, is64bit
? PPC::sub_32
: 0)
13342 .addImm(is8bit
? 28 : 27);
13343 if (!isLittleEndian
)
13344 BuildMI(BB
, dl
, TII
->get(PPC::XORI
), ShiftReg
)
13346 .addImm(is8bit
? 24 : 16);
13348 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
13353 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
13358 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), NewVal2Reg
)
13361 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), OldVal2Reg
)
13365 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
13367 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
13368 BuildMI(BB
, dl
, TII
->get(PPC::ORI
), Mask2Reg
)
13372 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
13375 BuildMI(BB
, dl
, TII
->get(PPC::AND
), NewVal3Reg
)
13376 .addReg(NewVal2Reg
)
13378 BuildMI(BB
, dl
, TII
->get(PPC::AND
), OldVal3Reg
)
13379 .addReg(OldVal2Reg
)
13383 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
13386 BuildMI(BB
, dl
, TII
->get(PPC::AND
), TmpReg
)
13387 .addReg(TmpDestReg
)
13389 BuildMI(BB
, dl
, TII
->get(PPC::CMPW
), CrReg
)
13391 .addReg(OldVal3Reg
);
13392 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
13393 .addImm(PPC::PRED_NE
)
13396 BB
->addSuccessor(loop2MBB
);
13397 BB
->addSuccessor(exitMBB
);
13400 BuildMI(BB
, dl
, TII
->get(PPC::ANDC
), Tmp2Reg
)
13401 .addReg(TmpDestReg
)
13403 BuildMI(BB
, dl
, TII
->get(PPC::OR
), Tmp4Reg
)
13405 .addReg(NewVal3Reg
);
13406 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
13410 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
13411 .addImm(PPC::PRED_NE
)
13414 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
13415 BB
->addSuccessor(loop1MBB
);
13416 BB
->addSuccessor(exitMBB
);
13421 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::SRW
), dest
)
13424 } else if (MI
.getOpcode() == PPC::FADDrtz
) {
13425 // This pseudo performs an FADD with rounding mode temporarily forced
13426 // to round-to-zero. We emit this via custom inserter since the FPSCR
13427 // is not modeled at the SelectionDAG level.
13428 Register Dest
= MI
.getOperand(0).getReg();
13429 Register Src1
= MI
.getOperand(1).getReg();
13430 Register Src2
= MI
.getOperand(2).getReg();
13431 DebugLoc dl
= MI
.getDebugLoc();
13433 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
13434 Register MFFSReg
= RegInfo
.createVirtualRegister(&PPC::F8RCRegClass
);
13436 // Save FPSCR value.
13437 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), MFFSReg
);
13439 // Set rounding mode to round-to-zero.
13440 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSB1
))
13442 .addReg(PPC::RM
, RegState::ImplicitDefine
);
13444 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSB0
))
13446 .addReg(PPC::RM
, RegState::ImplicitDefine
);
13448 // Perform addition.
13449 auto MIB
= BuildMI(*BB
, MI
, dl
, TII
->get(PPC::FADD
), Dest
)
13452 if (MI
.getFlag(MachineInstr::NoFPExcept
))
13453 MIB
.setMIFlag(MachineInstr::NoFPExcept
);
13455 // Restore FPSCR value.
13456 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSFb
)).addImm(1).addReg(MFFSReg
);
13457 } else if (MI
.getOpcode() == PPC::ANDI_rec_1_EQ_BIT
||
13458 MI
.getOpcode() == PPC::ANDI_rec_1_GT_BIT
||
13459 MI
.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8
||
13460 MI
.getOpcode() == PPC::ANDI_rec_1_GT_BIT8
) {
13461 unsigned Opcode
= (MI
.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8
||
13462 MI
.getOpcode() == PPC::ANDI_rec_1_GT_BIT8
)
13465 bool IsEQ
= (MI
.getOpcode() == PPC::ANDI_rec_1_EQ_BIT
||
13466 MI
.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8
);
13468 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
13469 Register Dest
= RegInfo
.createVirtualRegister(
13470 Opcode
== PPC::ANDI_rec
? &PPC::GPRCRegClass
: &PPC::G8RCRegClass
);
13472 DebugLoc Dl
= MI
.getDebugLoc();
13473 BuildMI(*BB
, MI
, Dl
, TII
->get(Opcode
), Dest
)
13474 .addReg(MI
.getOperand(1).getReg())
13476 BuildMI(*BB
, MI
, Dl
, TII
->get(TargetOpcode::COPY
),
13477 MI
.getOperand(0).getReg())
13478 .addReg(IsEQ
? PPC::CR0EQ
: PPC::CR0GT
);
13479 } else if (MI
.getOpcode() == PPC::TCHECK_RET
) {
13480 DebugLoc Dl
= MI
.getDebugLoc();
13481 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
13482 Register CRReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
13483 BuildMI(*BB
, MI
, Dl
, TII
->get(PPC::TCHECK
), CRReg
);
13484 BuildMI(*BB
, MI
, Dl
, TII
->get(TargetOpcode::COPY
),
13485 MI
.getOperand(0).getReg())
13487 } else if (MI
.getOpcode() == PPC::TBEGIN_RET
) {
13488 DebugLoc Dl
= MI
.getDebugLoc();
13489 unsigned Imm
= MI
.getOperand(1).getImm();
13490 BuildMI(*BB
, MI
, Dl
, TII
->get(PPC::TBEGIN
)).addImm(Imm
);
13491 BuildMI(*BB
, MI
, Dl
, TII
->get(TargetOpcode::COPY
),
13492 MI
.getOperand(0).getReg())
13493 .addReg(PPC::CR0EQ
);
13494 } else if (MI
.getOpcode() == PPC::SETRNDi
) {
13495 DebugLoc dl
= MI
.getDebugLoc();
13496 Register OldFPSCRReg
= MI
.getOperand(0).getReg();
13498 // Save FPSCR value.
13499 if (MRI
.use_empty(OldFPSCRReg
))
13500 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::IMPLICIT_DEF
), OldFPSCRReg
);
13502 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), OldFPSCRReg
);
13504 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13505 // the following settings:
13506 // 00 Round to nearest
13508 // 10 Round to +inf
13509 // 11 Round to -inf
13511 // When the operand is immediate, using the two least significant bits of
13512 // the immediate to set the bits 62:63 of FPSCR.
13513 unsigned Mode
= MI
.getOperand(1).getImm();
13514 BuildMI(*BB
, MI
, dl
, TII
->get((Mode
& 1) ? PPC::MTFSB1
: PPC::MTFSB0
))
13516 .addReg(PPC::RM
, RegState::ImplicitDefine
);
13518 BuildMI(*BB
, MI
, dl
, TII
->get((Mode
& 2) ? PPC::MTFSB1
: PPC::MTFSB0
))
13520 .addReg(PPC::RM
, RegState::ImplicitDefine
);
13521 } else if (MI
.getOpcode() == PPC::SETRND
) {
13522 DebugLoc dl
= MI
.getDebugLoc();
13524 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13525 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13526 // If the target doesn't have DirectMove, we should use stack to do the
13527 // conversion, because the target doesn't have the instructions like mtvsrd
13528 // or mfvsrd to do this conversion directly.
13529 auto copyRegFromG8RCOrF8RC
= [&] (unsigned DestReg
, unsigned SrcReg
) {
13530 if (Subtarget
.hasDirectMove()) {
13531 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::COPY
), DestReg
)
13534 // Use stack to do the register copy.
13535 unsigned StoreOp
= PPC::STD
, LoadOp
= PPC::LFD
;
13536 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
13537 const TargetRegisterClass
*RC
= RegInfo
.getRegClass(SrcReg
);
13538 if (RC
== &PPC::F8RCRegClass
) {
13539 // Copy register from F8RCRegClass to G8RCRegclass.
13540 assert((RegInfo
.getRegClass(DestReg
) == &PPC::G8RCRegClass
) &&
13541 "Unsupported RegClass.");
13543 StoreOp
= PPC::STFD
;
13546 // Copy register from G8RCRegClass to F8RCRegclass.
13547 assert((RegInfo
.getRegClass(SrcReg
) == &PPC::G8RCRegClass
) &&
13548 (RegInfo
.getRegClass(DestReg
) == &PPC::F8RCRegClass
) &&
13549 "Unsupported RegClass.");
13552 MachineFrameInfo
&MFI
= F
->getFrameInfo();
13553 int FrameIdx
= MFI
.CreateStackObject(8, Align(8), false);
13555 MachineMemOperand
*MMOStore
= F
->getMachineMemOperand(
13556 MachinePointerInfo::getFixedStack(*F
, FrameIdx
, 0),
13557 MachineMemOperand::MOStore
, MFI
.getObjectSize(FrameIdx
),
13558 MFI
.getObjectAlign(FrameIdx
));
13560 // Store the SrcReg into the stack.
13561 BuildMI(*BB
, MI
, dl
, TII
->get(StoreOp
))
13564 .addFrameIndex(FrameIdx
)
13565 .addMemOperand(MMOStore
);
13567 MachineMemOperand
*MMOLoad
= F
->getMachineMemOperand(
13568 MachinePointerInfo::getFixedStack(*F
, FrameIdx
, 0),
13569 MachineMemOperand::MOLoad
, MFI
.getObjectSize(FrameIdx
),
13570 MFI
.getObjectAlign(FrameIdx
));
13572 // Load from the stack where SrcReg is stored, and save to DestReg,
13573 // so we have done the RegClass conversion from RegClass::SrcReg to
13574 // RegClass::DestReg.
13575 BuildMI(*BB
, MI
, dl
, TII
->get(LoadOp
), DestReg
)
13577 .addFrameIndex(FrameIdx
)
13578 .addMemOperand(MMOLoad
);
13582 Register OldFPSCRReg
= MI
.getOperand(0).getReg();
13584 // Save FPSCR value.
13585 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), OldFPSCRReg
);
13587 // When the operand is gprc register, use two least significant bits of the
13588 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13590 // copy OldFPSCRTmpReg, OldFPSCRReg
13591 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13592 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13593 // copy NewFPSCRReg, NewFPSCRTmpReg
13594 // mtfsf 255, NewFPSCRReg
13595 MachineOperand SrcOp
= MI
.getOperand(1);
13596 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
13597 Register OldFPSCRTmpReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
13599 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg
, OldFPSCRReg
);
13601 Register ImDefReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
13602 Register ExtSrcReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
13604 // The first operand of INSERT_SUBREG should be a register which has
13605 // subregisters, we only care about its RegClass, so we should use an
13606 // IMPLICIT_DEF register.
13607 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::IMPLICIT_DEF
), ImDefReg
);
13608 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::INSERT_SUBREG
), ExtSrcReg
)
13613 Register NewFPSCRTmpReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
13614 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::RLDIMI
), NewFPSCRTmpReg
)
13615 .addReg(OldFPSCRTmpReg
)
13620 Register NewFPSCRReg
= RegInfo
.createVirtualRegister(&PPC::F8RCRegClass
);
13621 copyRegFromG8RCOrF8RC(NewFPSCRReg
, NewFPSCRTmpReg
);
13623 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13625 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSF
))
13627 .addReg(NewFPSCRReg
)
13630 } else if (MI
.getOpcode() == PPC::SETFLM
) {
13631 DebugLoc Dl
= MI
.getDebugLoc();
13633 // Result of setflm is previous FPSCR content, so we need to save it first.
13634 Register OldFPSCRReg
= MI
.getOperand(0).getReg();
13635 if (MRI
.use_empty(OldFPSCRReg
))
13636 BuildMI(*BB
, MI
, Dl
, TII
->get(TargetOpcode::IMPLICIT_DEF
), OldFPSCRReg
);
13638 BuildMI(*BB
, MI
, Dl
, TII
->get(PPC::MFFS
), OldFPSCRReg
);
13640 // Put bits in 32:63 to FPSCR.
13641 Register NewFPSCRReg
= MI
.getOperand(1).getReg();
13642 BuildMI(*BB
, MI
, Dl
, TII
->get(PPC::MTFSF
))
13644 .addReg(NewFPSCRReg
)
13647 } else if (MI
.getOpcode() == PPC::PROBED_ALLOCA_32
||
13648 MI
.getOpcode() == PPC::PROBED_ALLOCA_64
) {
13649 return emitProbedAlloca(MI
, BB
);
13650 } else if (MI
.getOpcode() == PPC::SPLIT_QUADWORD
) {
13651 DebugLoc DL
= MI
.getDebugLoc();
13652 Register Src
= MI
.getOperand(2).getReg();
13653 Register Lo
= MI
.getOperand(0).getReg();
13654 Register Hi
= MI
.getOperand(1).getReg();
13655 BuildMI(*BB
, MI
, DL
, TII
->get(TargetOpcode::COPY
))
13657 .addUse(Src
, 0, PPC::sub_gp8_x1
);
13658 BuildMI(*BB
, MI
, DL
, TII
->get(TargetOpcode::COPY
))
13660 .addUse(Src
, 0, PPC::sub_gp8_x0
);
13661 } else if (MI
.getOpcode() == PPC::LQX_PSEUDO
||
13662 MI
.getOpcode() == PPC::STQX_PSEUDO
) {
13663 DebugLoc DL
= MI
.getDebugLoc();
13664 // Ptr is used as the ptr_rc_no_r0 part
13665 // of LQ/STQ's memory operand and adding result of RA and RB,
13666 // so it has to be g8rc_and_g8rc_nox0.
13668 F
->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass
);
13669 Register Val
= MI
.getOperand(0).getReg();
13670 Register RA
= MI
.getOperand(1).getReg();
13671 Register RB
= MI
.getOperand(2).getReg();
13672 BuildMI(*BB
, MI
, DL
, TII
->get(PPC::ADD8
), Ptr
).addReg(RA
).addReg(RB
);
13673 BuildMI(*BB
, MI
, DL
,
13674 MI
.getOpcode() == PPC::LQX_PSEUDO
? TII
->get(PPC::LQ
)
13675 : TII
->get(PPC::STQ
))
13676 .addReg(Val
, MI
.getOpcode() == PPC::LQX_PSEUDO
? RegState::Define
: 0)
13680 llvm_unreachable("Unexpected instr type to insert");
13683 MI
.eraseFromParent(); // The pseudo instruction is gone now.
13687 //===----------------------------------------------------------------------===//
13688 // Target Optimization Hooks
13689 //===----------------------------------------------------------------------===//
13691 static int getEstimateRefinementSteps(EVT VT
, const PPCSubtarget
&Subtarget
) {
13692 // For the estimates, convergence is quadratic, so we essentially double the
13693 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13694 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13695 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13696 int RefinementSteps
= Subtarget
.hasRecipPrec() ? 1 : 3;
13697 if (VT
.getScalarType() == MVT::f64
)
13699 return RefinementSteps
;
13702 SDValue
PPCTargetLowering::getSqrtInputTest(SDValue Op
, SelectionDAG
&DAG
,
13703 const DenormalMode
&Mode
) const {
13704 // We only have VSX Vector Test for software Square Root.
13705 EVT VT
= Op
.getValueType();
13706 if (!isTypeLegal(MVT::i1
) ||
13708 ((VT
!= MVT::v2f64
&& VT
!= MVT::v4f32
) || !Subtarget
.hasVSX())))
13709 return TargetLowering::getSqrtInputTest(Op
, DAG
, Mode
);
13712 // The output register of FTSQRT is CR field.
13713 SDValue FTSQRT
= DAG
.getNode(PPCISD::FTSQRT
, DL
, MVT::i32
, Op
);
13715 // Let e_b be the unbiased exponent of the double-precision
13716 // floating-point operand in register FRB.
13717 // fe_flag is set to 1 if either of the following conditions occurs.
13718 // - The double-precision floating-point operand in register FRB is a zero,
13719 // a NaN, or an infinity, or a negative value.
13720 // - e_b is less than or equal to -970.
13721 // Otherwise fe_flag is set to 0.
13722 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13723 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13724 // exponent is less than -970)
13725 SDValue SRIdxVal
= DAG
.getTargetConstant(PPC::sub_eq
, DL
, MVT::i32
);
13726 return SDValue(DAG
.getMachineNode(TargetOpcode::EXTRACT_SUBREG
, DL
, MVT::i1
,
13732 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op
,
13733 SelectionDAG
&DAG
) const {
13734 // We only have VSX Vector Square Root.
13735 EVT VT
= Op
.getValueType();
13736 if (VT
!= MVT::f64
&&
13737 ((VT
!= MVT::v2f64
&& VT
!= MVT::v4f32
) || !Subtarget
.hasVSX()))
13738 return TargetLowering::getSqrtResultForDenormInput(Op
, DAG
);
13740 return DAG
.getNode(PPCISD::FSQRT
, SDLoc(Op
), VT
, Op
);
13743 SDValue
PPCTargetLowering::getSqrtEstimate(SDValue Operand
, SelectionDAG
&DAG
,
13744 int Enabled
, int &RefinementSteps
,
13745 bool &UseOneConstNR
,
13746 bool Reciprocal
) const {
13747 EVT VT
= Operand
.getValueType();
13748 if ((VT
== MVT::f32
&& Subtarget
.hasFRSQRTES()) ||
13749 (VT
== MVT::f64
&& Subtarget
.hasFRSQRTE()) ||
13750 (VT
== MVT::v4f32
&& Subtarget
.hasAltivec()) ||
13751 (VT
== MVT::v2f64
&& Subtarget
.hasVSX())) {
13752 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
13753 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
13755 // The Newton-Raphson computation with a single constant does not provide
13756 // enough accuracy on some CPUs.
13757 UseOneConstNR
= !Subtarget
.needsTwoConstNR();
13758 return DAG
.getNode(PPCISD::FRSQRTE
, SDLoc(Operand
), VT
, Operand
);
13763 SDValue
PPCTargetLowering::getRecipEstimate(SDValue Operand
, SelectionDAG
&DAG
,
13765 int &RefinementSteps
) const {
13766 EVT VT
= Operand
.getValueType();
13767 if ((VT
== MVT::f32
&& Subtarget
.hasFRES()) ||
13768 (VT
== MVT::f64
&& Subtarget
.hasFRE()) ||
13769 (VT
== MVT::v4f32
&& Subtarget
.hasAltivec()) ||
13770 (VT
== MVT::v2f64
&& Subtarget
.hasVSX())) {
13771 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
13772 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
13773 return DAG
.getNode(PPCISD::FRE
, SDLoc(Operand
), VT
, Operand
);
13778 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13779 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13780 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13781 // enabled for division), this functionality is redundant with the default
13782 // combiner logic (once the division -> reciprocal/multiply transformation
13783 // has taken place). As a result, this matters more for older cores than for
13786 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13787 // reciprocal if there are two or more FDIVs (for embedded cores with only
13788 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13789 switch (Subtarget
.getCPUDirective()) {
13794 case PPC::DIR_E500
:
13795 case PPC::DIR_E500mc
:
13796 case PPC::DIR_E5500
:
13801 // isConsecutiveLSLoc needs to work even if all adds have not yet been
13802 // collapsed, and so we need to look through chains of them.
13803 static void getBaseWithConstantOffset(SDValue Loc
, SDValue
&Base
,
13804 int64_t& Offset
, SelectionDAG
&DAG
) {
13805 if (DAG
.isBaseWithConstantOffset(Loc
)) {
13806 Base
= Loc
.getOperand(0);
13807 Offset
+= cast
<ConstantSDNode
>(Loc
.getOperand(1))->getSExtValue();
13809 // The base might itself be a base plus an offset, and if so, accumulate
13811 getBaseWithConstantOffset(Loc
.getOperand(0), Base
, Offset
, DAG
);
13815 static bool isConsecutiveLSLoc(SDValue Loc
, EVT VT
, LSBaseSDNode
*Base
,
13816 unsigned Bytes
, int Dist
,
13817 SelectionDAG
&DAG
) {
13818 if (VT
.getSizeInBits() / 8 != Bytes
)
13821 SDValue BaseLoc
= Base
->getBasePtr();
13822 if (Loc
.getOpcode() == ISD::FrameIndex
) {
13823 if (BaseLoc
.getOpcode() != ISD::FrameIndex
)
13825 const MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
13826 int FI
= cast
<FrameIndexSDNode
>(Loc
)->getIndex();
13827 int BFI
= cast
<FrameIndexSDNode
>(BaseLoc
)->getIndex();
13828 int FS
= MFI
.getObjectSize(FI
);
13829 int BFS
= MFI
.getObjectSize(BFI
);
13830 if (FS
!= BFS
|| FS
!= (int)Bytes
) return false;
13831 return MFI
.getObjectOffset(FI
) == (MFI
.getObjectOffset(BFI
) + Dist
*Bytes
);
13834 SDValue Base1
= Loc
, Base2
= BaseLoc
;
13835 int64_t Offset1
= 0, Offset2
= 0;
13836 getBaseWithConstantOffset(Loc
, Base1
, Offset1
, DAG
);
13837 getBaseWithConstantOffset(BaseLoc
, Base2
, Offset2
, DAG
);
13838 if (Base1
== Base2
&& Offset1
== (Offset2
+ Dist
* Bytes
))
13841 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
13842 const GlobalValue
*GV1
= nullptr;
13843 const GlobalValue
*GV2
= nullptr;
13846 bool isGA1
= TLI
.isGAPlusOffset(Loc
.getNode(), GV1
, Offset1
);
13847 bool isGA2
= TLI
.isGAPlusOffset(BaseLoc
.getNode(), GV2
, Offset2
);
13848 if (isGA1
&& isGA2
&& GV1
== GV2
)
13849 return Offset1
== (Offset2
+ Dist
*Bytes
);
13853 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13854 // not enforce equality of the chain operands.
13855 static bool isConsecutiveLS(SDNode
*N
, LSBaseSDNode
*Base
,
13856 unsigned Bytes
, int Dist
,
13857 SelectionDAG
&DAG
) {
13858 if (LSBaseSDNode
*LS
= dyn_cast
<LSBaseSDNode
>(N
)) {
13859 EVT VT
= LS
->getMemoryVT();
13860 SDValue Loc
= LS
->getBasePtr();
13861 return isConsecutiveLSLoc(Loc
, VT
, Base
, Bytes
, Dist
, DAG
);
13864 if (N
->getOpcode() == ISD::INTRINSIC_W_CHAIN
) {
13866 switch (N
->getConstantOperandVal(1)) {
13867 default: return false;
13868 case Intrinsic::ppc_altivec_lvx
:
13869 case Intrinsic::ppc_altivec_lvxl
:
13870 case Intrinsic::ppc_vsx_lxvw4x
:
13871 case Intrinsic::ppc_vsx_lxvw4x_be
:
13874 case Intrinsic::ppc_vsx_lxvd2x
:
13875 case Intrinsic::ppc_vsx_lxvd2x_be
:
13878 case Intrinsic::ppc_altivec_lvebx
:
13881 case Intrinsic::ppc_altivec_lvehx
:
13884 case Intrinsic::ppc_altivec_lvewx
:
13889 return isConsecutiveLSLoc(N
->getOperand(2), VT
, Base
, Bytes
, Dist
, DAG
);
13892 if (N
->getOpcode() == ISD::INTRINSIC_VOID
) {
13894 switch (N
->getConstantOperandVal(1)) {
13895 default: return false;
13896 case Intrinsic::ppc_altivec_stvx
:
13897 case Intrinsic::ppc_altivec_stvxl
:
13898 case Intrinsic::ppc_vsx_stxvw4x
:
13901 case Intrinsic::ppc_vsx_stxvd2x
:
13904 case Intrinsic::ppc_vsx_stxvw4x_be
:
13907 case Intrinsic::ppc_vsx_stxvd2x_be
:
13910 case Intrinsic::ppc_altivec_stvebx
:
13913 case Intrinsic::ppc_altivec_stvehx
:
13916 case Intrinsic::ppc_altivec_stvewx
:
13921 return isConsecutiveLSLoc(N
->getOperand(3), VT
, Base
, Bytes
, Dist
, DAG
);
13927 // Return true is there is a nearyby consecutive load to the one provided
13928 // (regardless of alignment). We search up and down the chain, looking though
13929 // token factors and other loads (but nothing else). As a result, a true result
13930 // indicates that it is safe to create a new consecutive load adjacent to the
13932 static bool findConsecutiveLoad(LoadSDNode
*LD
, SelectionDAG
&DAG
) {
13933 SDValue Chain
= LD
->getChain();
13934 EVT VT
= LD
->getMemoryVT();
13936 SmallSet
<SDNode
*, 16> LoadRoots
;
13937 SmallVector
<SDNode
*, 8> Queue(1, Chain
.getNode());
13938 SmallSet
<SDNode
*, 16> Visited
;
13940 // First, search up the chain, branching to follow all token-factor operands.
13941 // If we find a consecutive load, then we're done, otherwise, record all
13942 // nodes just above the top-level loads and token factors.
13943 while (!Queue
.empty()) {
13944 SDNode
*ChainNext
= Queue
.pop_back_val();
13945 if (!Visited
.insert(ChainNext
).second
)
13948 if (MemSDNode
*ChainLD
= dyn_cast
<MemSDNode
>(ChainNext
)) {
13949 if (isConsecutiveLS(ChainLD
, LD
, VT
.getStoreSize(), 1, DAG
))
13952 if (!Visited
.count(ChainLD
->getChain().getNode()))
13953 Queue
.push_back(ChainLD
->getChain().getNode());
13954 } else if (ChainNext
->getOpcode() == ISD::TokenFactor
) {
13955 for (const SDUse
&O
: ChainNext
->ops())
13956 if (!Visited
.count(O
.getNode()))
13957 Queue
.push_back(O
.getNode());
13959 LoadRoots
.insert(ChainNext
);
13962 // Second, search down the chain, starting from the top-level nodes recorded
13963 // in the first phase. These top-level nodes are the nodes just above all
13964 // loads and token factors. Starting with their uses, recursively look though
13965 // all loads (just the chain uses) and token factors to find a consecutive
13970 for (SDNode
*I
: LoadRoots
) {
13971 Queue
.push_back(I
);
13973 while (!Queue
.empty()) {
13974 SDNode
*LoadRoot
= Queue
.pop_back_val();
13975 if (!Visited
.insert(LoadRoot
).second
)
13978 if (MemSDNode
*ChainLD
= dyn_cast
<MemSDNode
>(LoadRoot
))
13979 if (isConsecutiveLS(ChainLD
, LD
, VT
.getStoreSize(), 1, DAG
))
13982 for (SDNode
*U
: LoadRoot
->uses())
13983 if (((isa
<MemSDNode
>(U
) &&
13984 cast
<MemSDNode
>(U
)->getChain().getNode() == LoadRoot
) ||
13985 U
->getOpcode() == ISD::TokenFactor
) &&
13987 Queue
.push_back(U
);
13994 /// This function is called when we have proved that a SETCC node can be replaced
13995 /// by subtraction (and other supporting instructions) so that the result of
13996 /// comparison is kept in a GPR instead of CR. This function is purely for
13997 /// codegen purposes and has some flags to guide the codegen process.
13998 static SDValue
generateEquivalentSub(SDNode
*N
, int Size
, bool Complement
,
13999 bool Swap
, SDLoc
&DL
, SelectionDAG
&DAG
) {
14000 assert(N
->getOpcode() == ISD::SETCC
&& "ISD::SETCC Expected.");
14002 // Zero extend the operands to the largest legal integer. Originally, they
14003 // must be of a strictly smaller size.
14004 auto Op0
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(0),
14005 DAG
.getConstant(Size
, DL
, MVT::i32
));
14006 auto Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(1),
14007 DAG
.getConstant(Size
, DL
, MVT::i32
));
14009 // Swap if needed. Depends on the condition code.
14011 std::swap(Op0
, Op1
);
14013 // Subtract extended integers.
14014 auto SubNode
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, Op0
, Op1
);
14016 // Move the sign bit to the least significant position and zero out the rest.
14017 // Now the least significant bit carries the result of original comparison.
14018 auto Shifted
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, SubNode
,
14019 DAG
.getConstant(Size
- 1, DL
, MVT::i32
));
14020 auto Final
= Shifted
;
14022 // Complement the result if needed. Based on the condition code.
14024 Final
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, Shifted
,
14025 DAG
.getConstant(1, DL
, MVT::i64
));
14027 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, Final
);
14030 SDValue
PPCTargetLowering::ConvertSETCCToSubtract(SDNode
*N
,
14031 DAGCombinerInfo
&DCI
) const {
14032 assert(N
->getOpcode() == ISD::SETCC
&& "ISD::SETCC Expected.");
14034 SelectionDAG
&DAG
= DCI
.DAG
;
14037 // Size of integers being compared has a critical role in the following
14038 // analysis, so we prefer to do this when all types are legal.
14039 if (!DCI
.isAfterLegalizeDAG())
14042 // If all users of SETCC extend its value to a legal integer type
14043 // then we replace SETCC with a subtraction
14044 for (const SDNode
*U
: N
->uses())
14045 if (U
->getOpcode() != ISD::ZERO_EXTEND
)
14048 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
14049 auto OpSize
= N
->getOperand(0).getValueSizeInBits();
14051 unsigned Size
= DAG
.getDataLayout().getLargestLegalIntTypeSizeInBits();
14053 if (OpSize
< Size
) {
14057 return generateEquivalentSub(N
, Size
, false, false, DL
, DAG
);
14059 return generateEquivalentSub(N
, Size
, true, true, DL
, DAG
);
14061 return generateEquivalentSub(N
, Size
, false, true, DL
, DAG
);
14063 return generateEquivalentSub(N
, Size
, true, false, DL
, DAG
);
14070 SDValue
PPCTargetLowering::DAGCombineTruncBoolExt(SDNode
*N
,
14071 DAGCombinerInfo
&DCI
) const {
14072 SelectionDAG
&DAG
= DCI
.DAG
;
14075 assert(Subtarget
.useCRBits() && "Expecting to be tracking CR bits");
14076 // If we're tracking CR bits, we need to be careful that we don't have:
14077 // trunc(binary-ops(zext(x), zext(y)))
14079 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14080 // such that we're unnecessarily moving things into GPRs when it would be
14081 // better to keep them in CR bits.
14083 // Note that trunc here can be an actual i1 trunc, or can be the effective
14084 // truncation that comes from a setcc or select_cc.
14085 if (N
->getOpcode() == ISD::TRUNCATE
&&
14086 N
->getValueType(0) != MVT::i1
)
14089 if (N
->getOperand(0).getValueType() != MVT::i32
&&
14090 N
->getOperand(0).getValueType() != MVT::i64
)
14093 if (N
->getOpcode() == ISD::SETCC
||
14094 N
->getOpcode() == ISD::SELECT_CC
) {
14095 // If we're looking at a comparison, then we need to make sure that the
14096 // high bits (all except for the first) don't matter the result.
14098 cast
<CondCodeSDNode
>(N
->getOperand(
14099 N
->getOpcode() == ISD::SETCC
? 2 : 4))->get();
14100 unsigned OpBits
= N
->getOperand(0).getValueSizeInBits();
14102 if (ISD::isSignedIntSetCC(CC
)) {
14103 if (DAG
.ComputeNumSignBits(N
->getOperand(0)) != OpBits
||
14104 DAG
.ComputeNumSignBits(N
->getOperand(1)) != OpBits
)
14106 } else if (ISD::isUnsignedIntSetCC(CC
)) {
14107 if (!DAG
.MaskedValueIsZero(N
->getOperand(0),
14108 APInt::getHighBitsSet(OpBits
, OpBits
-1)) ||
14109 !DAG
.MaskedValueIsZero(N
->getOperand(1),
14110 APInt::getHighBitsSet(OpBits
, OpBits
-1)))
14111 return (N
->getOpcode() == ISD::SETCC
? ConvertSETCCToSubtract(N
, DCI
)
14114 // This is neither a signed nor an unsigned comparison, just make sure
14115 // that the high bits are equal.
14116 KnownBits Op1Known
= DAG
.computeKnownBits(N
->getOperand(0));
14117 KnownBits Op2Known
= DAG
.computeKnownBits(N
->getOperand(1));
14119 // We don't really care about what is known about the first bit (if
14120 // anything), so pretend that it is known zero for both to ensure they can
14121 // be compared as constants.
14122 Op1Known
.Zero
.setBit(0); Op1Known
.One
.clearBit(0);
14123 Op2Known
.Zero
.setBit(0); Op2Known
.One
.clearBit(0);
14125 if (!Op1Known
.isConstant() || !Op2Known
.isConstant() ||
14126 Op1Known
.getConstant() != Op2Known
.getConstant())
14131 // We now know that the higher-order bits are irrelevant, we just need to
14132 // make sure that all of the intermediate operations are bit operations, and
14133 // all inputs are extensions.
14134 if (N
->getOperand(0).getOpcode() != ISD::AND
&&
14135 N
->getOperand(0).getOpcode() != ISD::OR
&&
14136 N
->getOperand(0).getOpcode() != ISD::XOR
&&
14137 N
->getOperand(0).getOpcode() != ISD::SELECT
&&
14138 N
->getOperand(0).getOpcode() != ISD::SELECT_CC
&&
14139 N
->getOperand(0).getOpcode() != ISD::TRUNCATE
&&
14140 N
->getOperand(0).getOpcode() != ISD::SIGN_EXTEND
&&
14141 N
->getOperand(0).getOpcode() != ISD::ZERO_EXTEND
&&
14142 N
->getOperand(0).getOpcode() != ISD::ANY_EXTEND
)
14145 if ((N
->getOpcode() == ISD::SETCC
|| N
->getOpcode() == ISD::SELECT_CC
) &&
14146 N
->getOperand(1).getOpcode() != ISD::AND
&&
14147 N
->getOperand(1).getOpcode() != ISD::OR
&&
14148 N
->getOperand(1).getOpcode() != ISD::XOR
&&
14149 N
->getOperand(1).getOpcode() != ISD::SELECT
&&
14150 N
->getOperand(1).getOpcode() != ISD::SELECT_CC
&&
14151 N
->getOperand(1).getOpcode() != ISD::TRUNCATE
&&
14152 N
->getOperand(1).getOpcode() != ISD::SIGN_EXTEND
&&
14153 N
->getOperand(1).getOpcode() != ISD::ZERO_EXTEND
&&
14154 N
->getOperand(1).getOpcode() != ISD::ANY_EXTEND
)
14157 SmallVector
<SDValue
, 4> Inputs
;
14158 SmallVector
<SDValue
, 8> BinOps
, PromOps
;
14159 SmallPtrSet
<SDNode
*, 16> Visited
;
14161 for (unsigned i
= 0; i
< 2; ++i
) {
14162 if (((N
->getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
14163 N
->getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
14164 N
->getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) &&
14165 N
->getOperand(i
).getOperand(0).getValueType() == MVT::i1
) ||
14166 isa
<ConstantSDNode
>(N
->getOperand(i
)))
14167 Inputs
.push_back(N
->getOperand(i
));
14169 BinOps
.push_back(N
->getOperand(i
));
14171 if (N
->getOpcode() == ISD::TRUNCATE
)
14175 // Visit all inputs, collect all binary operations (and, or, xor and
14176 // select) that are all fed by extensions.
14177 while (!BinOps
.empty()) {
14178 SDValue BinOp
= BinOps
.pop_back_val();
14180 if (!Visited
.insert(BinOp
.getNode()).second
)
14183 PromOps
.push_back(BinOp
);
14185 for (unsigned i
= 0, ie
= BinOp
.getNumOperands(); i
!= ie
; ++i
) {
14186 // The condition of the select is not promoted.
14187 if (BinOp
.getOpcode() == ISD::SELECT
&& i
== 0)
14189 if (BinOp
.getOpcode() == ISD::SELECT_CC
&& i
!= 2 && i
!= 3)
14192 if (((BinOp
.getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
14193 BinOp
.getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
14194 BinOp
.getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) &&
14195 BinOp
.getOperand(i
).getOperand(0).getValueType() == MVT::i1
) ||
14196 isa
<ConstantSDNode
>(BinOp
.getOperand(i
))) {
14197 Inputs
.push_back(BinOp
.getOperand(i
));
14198 } else if (BinOp
.getOperand(i
).getOpcode() == ISD::AND
||
14199 BinOp
.getOperand(i
).getOpcode() == ISD::OR
||
14200 BinOp
.getOperand(i
).getOpcode() == ISD::XOR
||
14201 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT
||
14202 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT_CC
||
14203 BinOp
.getOperand(i
).getOpcode() == ISD::TRUNCATE
||
14204 BinOp
.getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
14205 BinOp
.getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
14206 BinOp
.getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) {
14207 BinOps
.push_back(BinOp
.getOperand(i
));
14209 // We have an input that is not an extension or another binary
14210 // operation; we'll abort this transformation.
14216 // Make sure that this is a self-contained cluster of operations (which
14217 // is not quite the same thing as saying that everything has only one
14219 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
14220 if (isa
<ConstantSDNode
>(Inputs
[i
]))
14223 for (const SDNode
*User
: Inputs
[i
].getNode()->uses()) {
14224 if (User
!= N
&& !Visited
.count(User
))
14227 // Make sure that we're not going to promote the non-output-value
14228 // operand(s) or SELECT or SELECT_CC.
14229 // FIXME: Although we could sometimes handle this, and it does occur in
14230 // practice that one of the condition inputs to the select is also one of
14231 // the outputs, we currently can't deal with this.
14232 if (User
->getOpcode() == ISD::SELECT
) {
14233 if (User
->getOperand(0) == Inputs
[i
])
14235 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
14236 if (User
->getOperand(0) == Inputs
[i
] ||
14237 User
->getOperand(1) == Inputs
[i
])
14243 for (unsigned i
= 0, ie
= PromOps
.size(); i
!= ie
; ++i
) {
14244 for (const SDNode
*User
: PromOps
[i
].getNode()->uses()) {
14245 if (User
!= N
&& !Visited
.count(User
))
14248 // Make sure that we're not going to promote the non-output-value
14249 // operand(s) or SELECT or SELECT_CC.
14250 // FIXME: Although we could sometimes handle this, and it does occur in
14251 // practice that one of the condition inputs to the select is also one of
14252 // the outputs, we currently can't deal with this.
14253 if (User
->getOpcode() == ISD::SELECT
) {
14254 if (User
->getOperand(0) == PromOps
[i
])
14256 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
14257 if (User
->getOperand(0) == PromOps
[i
] ||
14258 User
->getOperand(1) == PromOps
[i
])
14264 // Replace all inputs with the extension operand.
14265 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
14266 // Constants may have users outside the cluster of to-be-promoted nodes,
14267 // and so we need to replace those as we do the promotions.
14268 if (isa
<ConstantSDNode
>(Inputs
[i
]))
14271 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
], Inputs
[i
].getOperand(0));
14274 std::list
<HandleSDNode
> PromOpHandles
;
14275 for (auto &PromOp
: PromOps
)
14276 PromOpHandles
.emplace_back(PromOp
);
14278 // Replace all operations (these are all the same, but have a different
14279 // (i1) return type). DAG.getNode will validate that the types of
14280 // a binary operator match, so go through the list in reverse so that
14281 // we've likely promoted both operands first. Any intermediate truncations or
14282 // extensions disappear.
14283 while (!PromOpHandles
.empty()) {
14284 SDValue PromOp
= PromOpHandles
.back().getValue();
14285 PromOpHandles
.pop_back();
14287 if (PromOp
.getOpcode() == ISD::TRUNCATE
||
14288 PromOp
.getOpcode() == ISD::SIGN_EXTEND
||
14289 PromOp
.getOpcode() == ISD::ZERO_EXTEND
||
14290 PromOp
.getOpcode() == ISD::ANY_EXTEND
) {
14291 if (!isa
<ConstantSDNode
>(PromOp
.getOperand(0)) &&
14292 PromOp
.getOperand(0).getValueType() != MVT::i1
) {
14293 // The operand is not yet ready (see comment below).
14294 PromOpHandles
.emplace_front(PromOp
);
14298 SDValue RepValue
= PromOp
.getOperand(0);
14299 if (isa
<ConstantSDNode
>(RepValue
))
14300 RepValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, RepValue
);
14302 DAG
.ReplaceAllUsesOfValueWith(PromOp
, RepValue
);
14307 switch (PromOp
.getOpcode()) {
14308 default: C
= 0; break;
14309 case ISD::SELECT
: C
= 1; break;
14310 case ISD::SELECT_CC
: C
= 2; break;
14313 if ((!isa
<ConstantSDNode
>(PromOp
.getOperand(C
)) &&
14314 PromOp
.getOperand(C
).getValueType() != MVT::i1
) ||
14315 (!isa
<ConstantSDNode
>(PromOp
.getOperand(C
+1)) &&
14316 PromOp
.getOperand(C
+1).getValueType() != MVT::i1
)) {
14317 // The to-be-promoted operands of this node have not yet been
14318 // promoted (this should be rare because we're going through the
14319 // list backward, but if one of the operands has several users in
14320 // this cluster of to-be-promoted nodes, it is possible).
14321 PromOpHandles
.emplace_front(PromOp
);
14325 SmallVector
<SDValue
, 3> Ops(PromOp
.getNode()->op_begin(),
14326 PromOp
.getNode()->op_end());
14328 // If there are any constant inputs, make sure they're replaced now.
14329 for (unsigned i
= 0; i
< 2; ++i
)
14330 if (isa
<ConstantSDNode
>(Ops
[C
+i
]))
14331 Ops
[C
+i
] = DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, Ops
[C
+i
]);
14333 DAG
.ReplaceAllUsesOfValueWith(PromOp
,
14334 DAG
.getNode(PromOp
.getOpcode(), dl
, MVT::i1
, Ops
));
14337 // Now we're left with the initial truncation itself.
14338 if (N
->getOpcode() == ISD::TRUNCATE
)
14339 return N
->getOperand(0);
14341 // Otherwise, this is a comparison. The operands to be compared have just
14342 // changed type (to i1), but everything else is the same.
14343 return SDValue(N
, 0);
14346 SDValue
PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode
*N
,
14347 DAGCombinerInfo
&DCI
) const {
14348 SelectionDAG
&DAG
= DCI
.DAG
;
14351 // If we're tracking CR bits, we need to be careful that we don't have:
14352 // zext(binary-ops(trunc(x), trunc(y)))
14354 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14355 // such that we're unnecessarily moving things into CR bits that can more
14356 // efficiently stay in GPRs. Note that if we're not certain that the high
14357 // bits are set as required by the final extension, we still may need to do
14358 // some masking to get the proper behavior.
14360 // This same functionality is important on PPC64 when dealing with
14361 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14362 // the return values of functions. Because it is so similar, it is handled
14365 if (N
->getValueType(0) != MVT::i32
&&
14366 N
->getValueType(0) != MVT::i64
)
14369 if (!((N
->getOperand(0).getValueType() == MVT::i1
&& Subtarget
.useCRBits()) ||
14370 (N
->getOperand(0).getValueType() == MVT::i32
&& Subtarget
.isPPC64())))
14373 if (N
->getOperand(0).getOpcode() != ISD::AND
&&
14374 N
->getOperand(0).getOpcode() != ISD::OR
&&
14375 N
->getOperand(0).getOpcode() != ISD::XOR
&&
14376 N
->getOperand(0).getOpcode() != ISD::SELECT
&&
14377 N
->getOperand(0).getOpcode() != ISD::SELECT_CC
)
14380 SmallVector
<SDValue
, 4> Inputs
;
14381 SmallVector
<SDValue
, 8> BinOps(1, N
->getOperand(0)), PromOps
;
14382 SmallPtrSet
<SDNode
*, 16> Visited
;
14384 // Visit all inputs, collect all binary operations (and, or, xor and
14385 // select) that are all fed by truncations.
14386 while (!BinOps
.empty()) {
14387 SDValue BinOp
= BinOps
.pop_back_val();
14389 if (!Visited
.insert(BinOp
.getNode()).second
)
14392 PromOps
.push_back(BinOp
);
14394 for (unsigned i
= 0, ie
= BinOp
.getNumOperands(); i
!= ie
; ++i
) {
14395 // The condition of the select is not promoted.
14396 if (BinOp
.getOpcode() == ISD::SELECT
&& i
== 0)
14398 if (BinOp
.getOpcode() == ISD::SELECT_CC
&& i
!= 2 && i
!= 3)
14401 if (BinOp
.getOperand(i
).getOpcode() == ISD::TRUNCATE
||
14402 isa
<ConstantSDNode
>(BinOp
.getOperand(i
))) {
14403 Inputs
.push_back(BinOp
.getOperand(i
));
14404 } else if (BinOp
.getOperand(i
).getOpcode() == ISD::AND
||
14405 BinOp
.getOperand(i
).getOpcode() == ISD::OR
||
14406 BinOp
.getOperand(i
).getOpcode() == ISD::XOR
||
14407 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT
||
14408 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT_CC
) {
14409 BinOps
.push_back(BinOp
.getOperand(i
));
14411 // We have an input that is not a truncation or another binary
14412 // operation; we'll abort this transformation.
14418 // The operands of a select that must be truncated when the select is
14419 // promoted because the operand is actually part of the to-be-promoted set.
14420 DenseMap
<SDNode
*, EVT
> SelectTruncOp
[2];
14422 // Make sure that this is a self-contained cluster of operations (which
14423 // is not quite the same thing as saying that everything has only one
14425 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
14426 if (isa
<ConstantSDNode
>(Inputs
[i
]))
14429 for (SDNode
*User
: Inputs
[i
].getNode()->uses()) {
14430 if (User
!= N
&& !Visited
.count(User
))
14433 // If we're going to promote the non-output-value operand(s) or SELECT or
14434 // SELECT_CC, record them for truncation.
14435 if (User
->getOpcode() == ISD::SELECT
) {
14436 if (User
->getOperand(0) == Inputs
[i
])
14437 SelectTruncOp
[0].insert(std::make_pair(User
,
14438 User
->getOperand(0).getValueType()));
14439 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
14440 if (User
->getOperand(0) == Inputs
[i
])
14441 SelectTruncOp
[0].insert(std::make_pair(User
,
14442 User
->getOperand(0).getValueType()));
14443 if (User
->getOperand(1) == Inputs
[i
])
14444 SelectTruncOp
[1].insert(std::make_pair(User
,
14445 User
->getOperand(1).getValueType()));
14450 for (unsigned i
= 0, ie
= PromOps
.size(); i
!= ie
; ++i
) {
14451 for (SDNode
*User
: PromOps
[i
].getNode()->uses()) {
14452 if (User
!= N
&& !Visited
.count(User
))
14455 // If we're going to promote the non-output-value operand(s) or SELECT or
14456 // SELECT_CC, record them for truncation.
14457 if (User
->getOpcode() == ISD::SELECT
) {
14458 if (User
->getOperand(0) == PromOps
[i
])
14459 SelectTruncOp
[0].insert(std::make_pair(User
,
14460 User
->getOperand(0).getValueType()));
14461 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
14462 if (User
->getOperand(0) == PromOps
[i
])
14463 SelectTruncOp
[0].insert(std::make_pair(User
,
14464 User
->getOperand(0).getValueType()));
14465 if (User
->getOperand(1) == PromOps
[i
])
14466 SelectTruncOp
[1].insert(std::make_pair(User
,
14467 User
->getOperand(1).getValueType()));
14472 unsigned PromBits
= N
->getOperand(0).getValueSizeInBits();
14473 bool ReallyNeedsExt
= false;
14474 if (N
->getOpcode() != ISD::ANY_EXTEND
) {
14475 // If all of the inputs are not already sign/zero extended, then
14476 // we'll still need to do that at the end.
14477 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
14478 if (isa
<ConstantSDNode
>(Inputs
[i
]))
14482 Inputs
[i
].getOperand(0).getValueSizeInBits();
14483 assert(PromBits
< OpBits
&& "Truncation not to a smaller bit count?");
14485 if ((N
->getOpcode() == ISD::ZERO_EXTEND
&&
14486 !DAG
.MaskedValueIsZero(Inputs
[i
].getOperand(0),
14487 APInt::getHighBitsSet(OpBits
,
14488 OpBits
-PromBits
))) ||
14489 (N
->getOpcode() == ISD::SIGN_EXTEND
&&
14490 DAG
.ComputeNumSignBits(Inputs
[i
].getOperand(0)) <
14491 (OpBits
-(PromBits
-1)))) {
14492 ReallyNeedsExt
= true;
14498 // Replace all inputs, either with the truncation operand, or a
14499 // truncation or extension to the final output type.
14500 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
14501 // Constant inputs need to be replaced with the to-be-promoted nodes that
14502 // use them because they might have users outside of the cluster of
14504 if (isa
<ConstantSDNode
>(Inputs
[i
]))
14507 SDValue InSrc
= Inputs
[i
].getOperand(0);
14508 if (Inputs
[i
].getValueType() == N
->getValueType(0))
14509 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
], InSrc
);
14510 else if (N
->getOpcode() == ISD::SIGN_EXTEND
)
14511 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
14512 DAG
.getSExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
14513 else if (N
->getOpcode() == ISD::ZERO_EXTEND
)
14514 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
14515 DAG
.getZExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
14517 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
14518 DAG
.getAnyExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
14521 std::list
<HandleSDNode
> PromOpHandles
;
14522 for (auto &PromOp
: PromOps
)
14523 PromOpHandles
.emplace_back(PromOp
);
14525 // Replace all operations (these are all the same, but have a different
14526 // (promoted) return type). DAG.getNode will validate that the types of
14527 // a binary operator match, so go through the list in reverse so that
14528 // we've likely promoted both operands first.
14529 while (!PromOpHandles
.empty()) {
14530 SDValue PromOp
= PromOpHandles
.back().getValue();
14531 PromOpHandles
.pop_back();
14534 switch (PromOp
.getOpcode()) {
14535 default: C
= 0; break;
14536 case ISD::SELECT
: C
= 1; break;
14537 case ISD::SELECT_CC
: C
= 2; break;
14540 if ((!isa
<ConstantSDNode
>(PromOp
.getOperand(C
)) &&
14541 PromOp
.getOperand(C
).getValueType() != N
->getValueType(0)) ||
14542 (!isa
<ConstantSDNode
>(PromOp
.getOperand(C
+1)) &&
14543 PromOp
.getOperand(C
+1).getValueType() != N
->getValueType(0))) {
14544 // The to-be-promoted operands of this node have not yet been
14545 // promoted (this should be rare because we're going through the
14546 // list backward, but if one of the operands has several users in
14547 // this cluster of to-be-promoted nodes, it is possible).
14548 PromOpHandles
.emplace_front(PromOp
);
14552 // For SELECT and SELECT_CC nodes, we do a similar check for any
14553 // to-be-promoted comparison inputs.
14554 if (PromOp
.getOpcode() == ISD::SELECT
||
14555 PromOp
.getOpcode() == ISD::SELECT_CC
) {
14556 if ((SelectTruncOp
[0].count(PromOp
.getNode()) &&
14557 PromOp
.getOperand(0).getValueType() != N
->getValueType(0)) ||
14558 (SelectTruncOp
[1].count(PromOp
.getNode()) &&
14559 PromOp
.getOperand(1).getValueType() != N
->getValueType(0))) {
14560 PromOpHandles
.emplace_front(PromOp
);
14565 SmallVector
<SDValue
, 3> Ops(PromOp
.getNode()->op_begin(),
14566 PromOp
.getNode()->op_end());
14568 // If this node has constant inputs, then they'll need to be promoted here.
14569 for (unsigned i
= 0; i
< 2; ++i
) {
14570 if (!isa
<ConstantSDNode
>(Ops
[C
+i
]))
14572 if (Ops
[C
+i
].getValueType() == N
->getValueType(0))
14575 if (N
->getOpcode() == ISD::SIGN_EXTEND
)
14576 Ops
[C
+i
] = DAG
.getSExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
14577 else if (N
->getOpcode() == ISD::ZERO_EXTEND
)
14578 Ops
[C
+i
] = DAG
.getZExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
14580 Ops
[C
+i
] = DAG
.getAnyExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
14583 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14584 // truncate them again to the original value type.
14585 if (PromOp
.getOpcode() == ISD::SELECT
||
14586 PromOp
.getOpcode() == ISD::SELECT_CC
) {
14587 auto SI0
= SelectTruncOp
[0].find(PromOp
.getNode());
14588 if (SI0
!= SelectTruncOp
[0].end())
14589 Ops
[0] = DAG
.getNode(ISD::TRUNCATE
, dl
, SI0
->second
, Ops
[0]);
14590 auto SI1
= SelectTruncOp
[1].find(PromOp
.getNode());
14591 if (SI1
!= SelectTruncOp
[1].end())
14592 Ops
[1] = DAG
.getNode(ISD::TRUNCATE
, dl
, SI1
->second
, Ops
[1]);
14595 DAG
.ReplaceAllUsesOfValueWith(PromOp
,
14596 DAG
.getNode(PromOp
.getOpcode(), dl
, N
->getValueType(0), Ops
));
14599 // Now we're left with the initial extension itself.
14600 if (!ReallyNeedsExt
)
14601 return N
->getOperand(0);
14603 // To zero extend, just mask off everything except for the first bit (in the
14605 if (N
->getOpcode() == ISD::ZERO_EXTEND
)
14606 return DAG
.getNode(ISD::AND
, dl
, N
->getValueType(0), N
->getOperand(0),
14607 DAG
.getConstant(APInt::getLowBitsSet(
14608 N
->getValueSizeInBits(0), PromBits
),
14609 dl
, N
->getValueType(0)));
14611 assert(N
->getOpcode() == ISD::SIGN_EXTEND
&&
14612 "Invalid extension type");
14613 EVT ShiftAmountTy
= getShiftAmountTy(N
->getValueType(0), DAG
.getDataLayout());
14615 DAG
.getConstant(N
->getValueSizeInBits(0) - PromBits
, dl
, ShiftAmountTy
);
14616 return DAG
.getNode(
14617 ISD::SRA
, dl
, N
->getValueType(0),
14618 DAG
.getNode(ISD::SHL
, dl
, N
->getValueType(0), N
->getOperand(0), ShiftCst
),
14622 SDValue
PPCTargetLowering::combineSetCC(SDNode
*N
,
14623 DAGCombinerInfo
&DCI
) const {
14624 assert(N
->getOpcode() == ISD::SETCC
&&
14625 "Should be called with a SETCC node");
14627 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
14628 if (CC
== ISD::SETNE
|| CC
== ISD::SETEQ
) {
14629 SDValue LHS
= N
->getOperand(0);
14630 SDValue RHS
= N
->getOperand(1);
14632 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14633 if (LHS
.getOpcode() == ISD::SUB
&& isNullConstant(LHS
.getOperand(0)) &&
14635 std::swap(LHS
, RHS
);
14637 // x == 0-y --> x+y == 0
14638 // x != 0-y --> x+y != 0
14639 if (RHS
.getOpcode() == ISD::SUB
&& isNullConstant(RHS
.getOperand(0)) &&
14642 SelectionDAG
&DAG
= DCI
.DAG
;
14643 EVT VT
= N
->getValueType(0);
14644 EVT OpVT
= LHS
.getValueType();
14645 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, OpVT
, LHS
, RHS
.getOperand(1));
14646 return DAG
.getSetCC(DL
, VT
, Add
, DAG
.getConstant(0, DL
, OpVT
), CC
);
14650 return DAGCombineTruncBoolExt(N
, DCI
);
14653 // Is this an extending load from an f32 to an f64?
14654 static bool isFPExtLoad(SDValue Op
) {
14655 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Op
.getNode()))
14656 return LD
->getExtensionType() == ISD::EXTLOAD
&&
14657 Op
.getValueType() == MVT::f64
;
14661 /// Reduces the number of fp-to-int conversion when building a vector.
14663 /// If this vector is built out of floating to integer conversions,
14664 /// transform it to a vector built out of floating point values followed by a
14665 /// single floating to integer conversion of the vector.
14666 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14667 /// becomes (fptosi (build_vector ($A, $B, ...)))
14668 SDValue
PPCTargetLowering::
14669 combineElementTruncationToVectorTruncation(SDNode
*N
,
14670 DAGCombinerInfo
&DCI
) const {
14671 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
14672 "Should be called with a BUILD_VECTOR node");
14674 SelectionDAG
&DAG
= DCI
.DAG
;
14677 SDValue FirstInput
= N
->getOperand(0);
14678 assert(FirstInput
.getOpcode() == PPCISD::MFVSR
&&
14679 "The input operand must be an fp-to-int conversion.");
14681 // This combine happens after legalization so the fp_to_[su]i nodes are
14682 // already converted to PPCSISD nodes.
14683 unsigned FirstConversion
= FirstInput
.getOperand(0).getOpcode();
14684 if (FirstConversion
== PPCISD::FCTIDZ
||
14685 FirstConversion
== PPCISD::FCTIDUZ
||
14686 FirstConversion
== PPCISD::FCTIWZ
||
14687 FirstConversion
== PPCISD::FCTIWUZ
) {
14688 bool IsSplat
= true;
14689 bool Is32Bit
= FirstConversion
== PPCISD::FCTIWZ
||
14690 FirstConversion
== PPCISD::FCTIWUZ
;
14691 EVT SrcVT
= FirstInput
.getOperand(0).getValueType();
14692 SmallVector
<SDValue
, 4> Ops
;
14693 EVT TargetVT
= N
->getValueType(0);
14694 for (int i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
) {
14695 SDValue NextOp
= N
->getOperand(i
);
14696 if (NextOp
.getOpcode() != PPCISD::MFVSR
)
14698 unsigned NextConversion
= NextOp
.getOperand(0).getOpcode();
14699 if (NextConversion
!= FirstConversion
)
14701 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14702 // This is not valid if the input was originally double precision. It is
14703 // also not profitable to do unless this is an extending load in which
14704 // case doing this combine will allow us to combine consecutive loads.
14705 if (Is32Bit
&& !isFPExtLoad(NextOp
.getOperand(0).getOperand(0)))
14707 if (N
->getOperand(i
) != FirstInput
)
14711 // If this is a splat, we leave it as-is since there will be only a single
14712 // fp-to-int conversion followed by a splat of the integer. This is better
14713 // for 32-bit and smaller ints and neutral for 64-bit ints.
14717 // Now that we know we have the right type of node, get its operands
14718 for (int i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
) {
14719 SDValue In
= N
->getOperand(i
).getOperand(0);
14721 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14722 // here, we know that all inputs are extending loads so this is safe).
14724 Ops
.push_back(DAG
.getUNDEF(SrcVT
));
14727 DAG
.getNode(ISD::FP_ROUND
, dl
, MVT::f32
, In
.getOperand(0),
14728 DAG
.getIntPtrConstant(1, dl
, /*isTarget=*/true));
14729 Ops
.push_back(Trunc
);
14732 Ops
.push_back(In
.isUndef() ? DAG
.getUNDEF(SrcVT
) : In
.getOperand(0));
14736 if (FirstConversion
== PPCISD::FCTIDZ
||
14737 FirstConversion
== PPCISD::FCTIWZ
)
14738 Opcode
= ISD::FP_TO_SINT
;
14740 Opcode
= ISD::FP_TO_UINT
;
14742 EVT NewVT
= TargetVT
== MVT::v2i64
? MVT::v2f64
: MVT::v4f32
;
14743 SDValue BV
= DAG
.getBuildVector(NewVT
, dl
, Ops
);
14744 return DAG
.getNode(Opcode
, dl
, TargetVT
, BV
);
14749 /// Reduce the number of loads when building a vector.
14751 /// Building a vector out of multiple loads can be converted to a load
14752 /// of the vector type if the loads are consecutive. If the loads are
14753 /// consecutive but in descending order, a shuffle is added at the end
14754 /// to reorder the vector.
14755 static SDValue
combineBVOfConsecutiveLoads(SDNode
*N
, SelectionDAG
&DAG
) {
14756 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
14757 "Should be called with a BUILD_VECTOR node");
14761 // Return early for non byte-sized type, as they can't be consecutive.
14762 if (!N
->getValueType(0).getVectorElementType().isByteSized())
14765 bool InputsAreConsecutiveLoads
= true;
14766 bool InputsAreReverseConsecutive
= true;
14767 unsigned ElemSize
= N
->getValueType(0).getScalarType().getStoreSize();
14768 SDValue FirstInput
= N
->getOperand(0);
14769 bool IsRoundOfExtLoad
= false;
14770 LoadSDNode
*FirstLoad
= nullptr;
14772 if (FirstInput
.getOpcode() == ISD::FP_ROUND
&&
14773 FirstInput
.getOperand(0).getOpcode() == ISD::LOAD
) {
14774 FirstLoad
= cast
<LoadSDNode
>(FirstInput
.getOperand(0));
14775 IsRoundOfExtLoad
= FirstLoad
->getExtensionType() == ISD::EXTLOAD
;
14777 // Not a build vector of (possibly fp_rounded) loads.
14778 if ((!IsRoundOfExtLoad
&& FirstInput
.getOpcode() != ISD::LOAD
) ||
14779 N
->getNumOperands() == 1)
14782 if (!IsRoundOfExtLoad
)
14783 FirstLoad
= cast
<LoadSDNode
>(FirstInput
);
14785 SmallVector
<LoadSDNode
*, 4> InputLoads
;
14786 InputLoads
.push_back(FirstLoad
);
14787 for (int i
= 1, e
= N
->getNumOperands(); i
< e
; ++i
) {
14788 // If any inputs are fp_round(extload), they all must be.
14789 if (IsRoundOfExtLoad
&& N
->getOperand(i
).getOpcode() != ISD::FP_ROUND
)
14792 SDValue NextInput
= IsRoundOfExtLoad
? N
->getOperand(i
).getOperand(0) :
14794 if (NextInput
.getOpcode() != ISD::LOAD
)
14797 SDValue PreviousInput
=
14798 IsRoundOfExtLoad
? N
->getOperand(i
-1).getOperand(0) : N
->getOperand(i
-1);
14799 LoadSDNode
*LD1
= cast
<LoadSDNode
>(PreviousInput
);
14800 LoadSDNode
*LD2
= cast
<LoadSDNode
>(NextInput
);
14802 // If any inputs are fp_round(extload), they all must be.
14803 if (IsRoundOfExtLoad
&& LD2
->getExtensionType() != ISD::EXTLOAD
)
14806 // We only care about regular loads. The PPC-specific load intrinsics
14807 // will not lead to a merge opportunity.
14808 if (!DAG
.areNonVolatileConsecutiveLoads(LD2
, LD1
, ElemSize
, 1))
14809 InputsAreConsecutiveLoads
= false;
14810 if (!DAG
.areNonVolatileConsecutiveLoads(LD1
, LD2
, ElemSize
, 1))
14811 InputsAreReverseConsecutive
= false;
14813 // Exit early if the loads are neither consecutive nor reverse consecutive.
14814 if (!InputsAreConsecutiveLoads
&& !InputsAreReverseConsecutive
)
14816 InputLoads
.push_back(LD2
);
14819 assert(!(InputsAreConsecutiveLoads
&& InputsAreReverseConsecutive
) &&
14820 "The loads cannot be both consecutive and reverse consecutive.");
14823 SDValue ReturnSDVal
;
14824 if (InputsAreConsecutiveLoads
) {
14825 assert(FirstLoad
&& "Input needs to be a LoadSDNode.");
14826 WideLoad
= DAG
.getLoad(N
->getValueType(0), dl
, FirstLoad
->getChain(),
14827 FirstLoad
->getBasePtr(), FirstLoad
->getPointerInfo(),
14828 FirstLoad
->getAlign());
14829 ReturnSDVal
= WideLoad
;
14830 } else if (InputsAreReverseConsecutive
) {
14831 LoadSDNode
*LastLoad
= InputLoads
.back();
14832 assert(LastLoad
&& "Input needs to be a LoadSDNode.");
14833 WideLoad
= DAG
.getLoad(N
->getValueType(0), dl
, LastLoad
->getChain(),
14834 LastLoad
->getBasePtr(), LastLoad
->getPointerInfo(),
14835 LastLoad
->getAlign());
14836 SmallVector
<int, 16> Ops
;
14837 for (int i
= N
->getNumOperands() - 1; i
>= 0; i
--)
14840 ReturnSDVal
= DAG
.getVectorShuffle(N
->getValueType(0), dl
, WideLoad
,
14841 DAG
.getUNDEF(N
->getValueType(0)), Ops
);
14845 for (auto *LD
: InputLoads
)
14846 DAG
.makeEquivalentMemoryOrdering(LD
, WideLoad
);
14847 return ReturnSDVal
;
14850 // This function adds the required vector_shuffle needed to get
14851 // the elements of the vector extract in the correct position
14852 // as specified by the CorrectElems encoding.
14853 static SDValue
addShuffleForVecExtend(SDNode
*N
, SelectionDAG
&DAG
,
14854 SDValue Input
, uint64_t Elems
,
14855 uint64_t CorrectElems
) {
14858 unsigned NumElems
= Input
.getValueType().getVectorNumElements();
14859 SmallVector
<int, 16> ShuffleMask(NumElems
, -1);
14861 // Knowing the element indices being extracted from the original
14862 // vector and the order in which they're being inserted, just put
14863 // them at element indices required for the instruction.
14864 for (unsigned i
= 0; i
< N
->getNumOperands(); i
++) {
14865 if (DAG
.getDataLayout().isLittleEndian())
14866 ShuffleMask
[CorrectElems
& 0xF] = Elems
& 0xF;
14868 ShuffleMask
[(CorrectElems
& 0xF0) >> 4] = (Elems
& 0xF0) >> 4;
14869 CorrectElems
= CorrectElems
>> 8;
14870 Elems
= Elems
>> 8;
14874 DAG
.getVectorShuffle(Input
.getValueType(), dl
, Input
,
14875 DAG
.getUNDEF(Input
.getValueType()), ShuffleMask
);
14877 EVT VT
= N
->getValueType(0);
14878 SDValue Conv
= DAG
.getBitcast(VT
, Shuffle
);
14880 EVT ExtVT
= EVT::getVectorVT(*DAG
.getContext(),
14881 Input
.getValueType().getVectorElementType(),
14882 VT
.getVectorNumElements());
14883 return DAG
.getNode(ISD::SIGN_EXTEND_INREG
, dl
, VT
, Conv
,
14884 DAG
.getValueType(ExtVT
));
14887 // Look for build vector patterns where input operands come from sign
14888 // extended vector_extract elements of specific indices. If the correct indices
14889 // aren't used, add a vector shuffle to fix up the indices and create
14890 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14891 // during instruction selection.
14892 static SDValue
combineBVOfVecSExt(SDNode
*N
, SelectionDAG
&DAG
) {
14893 // This array encodes the indices that the vector sign extend instructions
14894 // extract from when extending from one type to another for both BE and LE.
14895 // The right nibble of each byte corresponds to the LE incides.
14896 // and the left nibble of each byte corresponds to the BE incides.
14897 // For example: 0x3074B8FC byte->word
14898 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14899 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14900 // For example: 0x000070F8 byte->double word
14901 // For LE: the allowed indices are: 0x0,0x8
14902 // For BE: the allowed indices are: 0x7,0xF
14903 uint64_t TargetElems
[] = {
14904 0x3074B8FC, // b->w
14905 0x000070F8, // b->d
14906 0x10325476, // h->w
14907 0x00003074, // h->d
14908 0x00001032, // w->d
14911 uint64_t Elems
= 0;
14915 auto isSExtOfVecExtract
= [&](SDValue Op
) -> bool {
14918 if (Op
.getOpcode() != ISD::SIGN_EXTEND
&&
14919 Op
.getOpcode() != ISD::SIGN_EXTEND_INREG
)
14922 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14923 // of the right width.
14924 SDValue Extract
= Op
.getOperand(0);
14925 if (Extract
.getOpcode() == ISD::ANY_EXTEND
)
14926 Extract
= Extract
.getOperand(0);
14927 if (Extract
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
14930 ConstantSDNode
*ExtOp
= dyn_cast
<ConstantSDNode
>(Extract
.getOperand(1));
14934 Index
= ExtOp
->getZExtValue();
14935 if (Input
&& Input
!= Extract
.getOperand(0))
14939 Input
= Extract
.getOperand(0);
14941 Elems
= Elems
<< 8;
14942 Index
= DAG
.getDataLayout().isLittleEndian() ? Index
: Index
<< 4;
14948 // If the build vector operands aren't sign extended vector extracts,
14949 // of the same input vector, then return.
14950 for (unsigned i
= 0; i
< N
->getNumOperands(); i
++) {
14951 if (!isSExtOfVecExtract(N
->getOperand(i
))) {
14956 // If the vector extract indices are not correct, add the appropriate
14958 int TgtElemArrayIdx
;
14959 int InputSize
= Input
.getValueType().getScalarSizeInBits();
14960 int OutputSize
= N
->getValueType(0).getScalarSizeInBits();
14961 if (InputSize
+ OutputSize
== 40)
14962 TgtElemArrayIdx
= 0;
14963 else if (InputSize
+ OutputSize
== 72)
14964 TgtElemArrayIdx
= 1;
14965 else if (InputSize
+ OutputSize
== 48)
14966 TgtElemArrayIdx
= 2;
14967 else if (InputSize
+ OutputSize
== 80)
14968 TgtElemArrayIdx
= 3;
14969 else if (InputSize
+ OutputSize
== 96)
14970 TgtElemArrayIdx
= 4;
14974 uint64_t CorrectElems
= TargetElems
[TgtElemArrayIdx
];
14975 CorrectElems
= DAG
.getDataLayout().isLittleEndian()
14976 ? CorrectElems
& 0x0F0F0F0F0F0F0F0F
14977 : CorrectElems
& 0xF0F0F0F0F0F0F0F0;
14978 if (Elems
!= CorrectElems
) {
14979 return addShuffleForVecExtend(N
, DAG
, Input
, Elems
, CorrectElems
);
14982 // Regular lowering will catch cases where a shuffle is not needed.
14986 // Look for the pattern of a load from a narrow width to i128, feeding
14987 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14988 // (LXVRZX). This node represents a zero extending load that will be matched
14989 // to the Load VSX Vector Rightmost instructions.
14990 static SDValue
combineBVZEXTLOAD(SDNode
*N
, SelectionDAG
&DAG
) {
14993 // This combine is only eligible for a BUILD_VECTOR of v1i128.
14994 if (N
->getValueType(0) != MVT::v1i128
)
14997 SDValue Operand
= N
->getOperand(0);
14998 // Proceed with the transformation if the operand to the BUILD_VECTOR
14999 // is a load instruction.
15000 if (Operand
.getOpcode() != ISD::LOAD
)
15003 auto *LD
= cast
<LoadSDNode
>(Operand
);
15004 EVT MemoryType
= LD
->getMemoryVT();
15006 // This transformation is only valid if the we are loading either a byte,
15007 // halfword, word, or doubleword.
15008 bool ValidLDType
= MemoryType
== MVT::i8
|| MemoryType
== MVT::i16
||
15009 MemoryType
== MVT::i32
|| MemoryType
== MVT::i64
;
15011 // Ensure that the load from the narrow width is being zero extended to i128.
15012 if (!ValidLDType
||
15013 (LD
->getExtensionType() != ISD::ZEXTLOAD
&&
15014 LD
->getExtensionType() != ISD::EXTLOAD
))
15017 SDValue LoadOps
[] = {
15018 LD
->getChain(), LD
->getBasePtr(),
15019 DAG
.getIntPtrConstant(MemoryType
.getScalarSizeInBits(), DL
)};
15021 return DAG
.getMemIntrinsicNode(PPCISD::LXVRZX
, DL
,
15022 DAG
.getVTList(MVT::v1i128
, MVT::Other
),
15023 LoadOps
, MemoryType
, LD
->getMemOperand());
15026 SDValue
PPCTargetLowering::DAGCombineBuildVector(SDNode
*N
,
15027 DAGCombinerInfo
&DCI
) const {
15028 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
15029 "Should be called with a BUILD_VECTOR node");
15031 SelectionDAG
&DAG
= DCI
.DAG
;
15034 if (!Subtarget
.hasVSX())
15037 // The target independent DAG combiner will leave a build_vector of
15038 // float-to-int conversions intact. We can generate MUCH better code for
15039 // a float-to-int conversion of a vector of floats.
15040 SDValue FirstInput
= N
->getOperand(0);
15041 if (FirstInput
.getOpcode() == PPCISD::MFVSR
) {
15042 SDValue Reduced
= combineElementTruncationToVectorTruncation(N
, DCI
);
15047 // If we're building a vector out of consecutive loads, just load that
15049 SDValue Reduced
= combineBVOfConsecutiveLoads(N
, DAG
);
15053 // If we're building a vector out of extended elements from another vector
15054 // we have P9 vector integer extend instructions. The code assumes legal
15055 // input types (i.e. it can't handle things like v4i16) so do not run before
15057 if (Subtarget
.hasP9Altivec() && !DCI
.isBeforeLegalize()) {
15058 Reduced
= combineBVOfVecSExt(N
, DAG
);
15063 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15064 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15065 // is a load from <valid narrow width> to i128.
15066 if (Subtarget
.isISA3_1()) {
15067 SDValue BVOfZLoad
= combineBVZEXTLOAD(N
, DAG
);
15072 if (N
->getValueType(0) != MVT::v2f64
)
15076 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15077 if (FirstInput
.getOpcode() != ISD::SINT_TO_FP
&&
15078 FirstInput
.getOpcode() != ISD::UINT_TO_FP
)
15080 if (N
->getOperand(1).getOpcode() != ISD::SINT_TO_FP
&&
15081 N
->getOperand(1).getOpcode() != ISD::UINT_TO_FP
)
15083 if (FirstInput
.getOpcode() != N
->getOperand(1).getOpcode())
15086 SDValue Ext1
= FirstInput
.getOperand(0);
15087 SDValue Ext2
= N
->getOperand(1).getOperand(0);
15088 if(Ext1
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
||
15089 Ext2
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
15092 ConstantSDNode
*Ext1Op
= dyn_cast
<ConstantSDNode
>(Ext1
.getOperand(1));
15093 ConstantSDNode
*Ext2Op
= dyn_cast
<ConstantSDNode
>(Ext2
.getOperand(1));
15094 if (!Ext1Op
|| !Ext2Op
)
15096 if (Ext1
.getOperand(0).getValueType() != MVT::v4i32
||
15097 Ext1
.getOperand(0) != Ext2
.getOperand(0))
15100 int FirstElem
= Ext1Op
->getZExtValue();
15101 int SecondElem
= Ext2Op
->getZExtValue();
15103 if (FirstElem
== 0 && SecondElem
== 1)
15104 SubvecIdx
= Subtarget
.isLittleEndian() ? 1 : 0;
15105 else if (FirstElem
== 2 && SecondElem
== 3)
15106 SubvecIdx
= Subtarget
.isLittleEndian() ? 0 : 1;
15110 SDValue SrcVec
= Ext1
.getOperand(0);
15111 auto NodeType
= (N
->getOperand(1).getOpcode() == ISD::SINT_TO_FP
) ?
15112 PPCISD::SINT_VEC_TO_FP
: PPCISD::UINT_VEC_TO_FP
;
15113 return DAG
.getNode(NodeType
, dl
, MVT::v2f64
,
15114 SrcVec
, DAG
.getIntPtrConstant(SubvecIdx
, dl
));
15117 SDValue
PPCTargetLowering::combineFPToIntToFP(SDNode
*N
,
15118 DAGCombinerInfo
&DCI
) const {
15119 assert((N
->getOpcode() == ISD::SINT_TO_FP
||
15120 N
->getOpcode() == ISD::UINT_TO_FP
) &&
15121 "Need an int -> FP conversion node here");
15123 if (useSoftFloat() || !Subtarget
.has64BitSupport())
15126 SelectionDAG
&DAG
= DCI
.DAG
;
15130 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15131 // from the hardware.
15132 if (Op
.getValueType() != MVT::f32
&& Op
.getValueType() != MVT::f64
)
15134 if (!Op
.getOperand(0).getValueType().isSimple())
15136 if (Op
.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1
) ||
15137 Op
.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64
))
15140 SDValue
FirstOperand(Op
.getOperand(0));
15141 bool SubWordLoad
= FirstOperand
.getOpcode() == ISD::LOAD
&&
15142 (FirstOperand
.getValueType() == MVT::i8
||
15143 FirstOperand
.getValueType() == MVT::i16
);
15144 if (Subtarget
.hasP9Vector() && Subtarget
.hasP9Altivec() && SubWordLoad
) {
15145 bool Signed
= N
->getOpcode() == ISD::SINT_TO_FP
;
15146 bool DstDouble
= Op
.getValueType() == MVT::f64
;
15147 unsigned ConvOp
= Signed
?
15148 (DstDouble
? PPCISD::FCFID
: PPCISD::FCFIDS
) :
15149 (DstDouble
? PPCISD::FCFIDU
: PPCISD::FCFIDUS
);
15150 SDValue WidthConst
=
15151 DAG
.getIntPtrConstant(FirstOperand
.getValueType() == MVT::i8
? 1 : 2,
15153 LoadSDNode
*LDN
= cast
<LoadSDNode
>(FirstOperand
.getNode());
15154 SDValue Ops
[] = { LDN
->getChain(), LDN
->getBasePtr(), WidthConst
};
15155 SDValue Ld
= DAG
.getMemIntrinsicNode(PPCISD::LXSIZX
, dl
,
15156 DAG
.getVTList(MVT::f64
, MVT::Other
),
15157 Ops
, MVT::i8
, LDN
->getMemOperand());
15158 DAG
.makeEquivalentMemoryOrdering(LDN
, Ld
);
15160 // For signed conversion, we need to sign-extend the value in the VSR
15162 SDValue ExtOps
[] = { Ld
, WidthConst
};
15163 SDValue Ext
= DAG
.getNode(PPCISD::VEXTS
, dl
, MVT::f64
, ExtOps
);
15164 return DAG
.getNode(ConvOp
, dl
, DstDouble
? MVT::f64
: MVT::f32
, Ext
);
15166 return DAG
.getNode(ConvOp
, dl
, DstDouble
? MVT::f64
: MVT::f32
, Ld
);
15170 // For i32 intermediate values, unfortunately, the conversion functions
15171 // leave the upper 32 bits of the value are undefined. Within the set of
15172 // scalar instructions, we have no method for zero- or sign-extending the
15173 // value. Thus, we cannot handle i32 intermediate values here.
15174 if (Op
.getOperand(0).getValueType() == MVT::i32
)
15177 assert((Op
.getOpcode() == ISD::SINT_TO_FP
|| Subtarget
.hasFPCVT()) &&
15178 "UINT_TO_FP is supported only with FPCVT");
15180 // If we have FCFIDS, then use it when converting to single-precision.
15181 // Otherwise, convert to double-precision and then round.
15182 unsigned FCFOp
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
15183 ? (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDUS
15185 : (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDU
15187 MVT FCFTy
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
15191 // If we're converting from a float, to an int, and back to a float again,
15192 // then we don't need the store/load pair at all.
15193 if ((Op
.getOperand(0).getOpcode() == ISD::FP_TO_UINT
&&
15194 Subtarget
.hasFPCVT()) ||
15195 (Op
.getOperand(0).getOpcode() == ISD::FP_TO_SINT
)) {
15196 SDValue Src
= Op
.getOperand(0).getOperand(0);
15197 if (Src
.getValueType() == MVT::f32
) {
15198 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
15199 DCI
.AddToWorklist(Src
.getNode());
15200 } else if (Src
.getValueType() != MVT::f64
) {
15201 // Make sure that we don't pick up a ppc_fp128 source value.
15206 Op
.getOperand(0).getOpcode() == ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
15209 SDValue Tmp
= DAG
.getNode(FCTOp
, dl
, MVT::f64
, Src
);
15210 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Tmp
);
15212 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT()) {
15213 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
, MVT::f32
, FP
,
15214 DAG
.getIntPtrConstant(0, dl
, /*isTarget=*/true));
15215 DCI
.AddToWorklist(FP
.getNode());
15224 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15225 // builtins) into loads with swaps.
15226 SDValue
PPCTargetLowering::expandVSXLoadForLE(SDNode
*N
,
15227 DAGCombinerInfo
&DCI
) const {
15228 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15230 if (DCI
.isBeforeLegalizeOps())
15233 SelectionDAG
&DAG
= DCI
.DAG
;
15237 MachineMemOperand
*MMO
;
15239 switch (N
->getOpcode()) {
15241 llvm_unreachable("Unexpected opcode for little endian VSX load");
15243 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
15244 Chain
= LD
->getChain();
15245 Base
= LD
->getBasePtr();
15246 MMO
= LD
->getMemOperand();
15247 // If the MMO suggests this isn't a load of a full vector, leave
15248 // things alone. For a built-in, we have to make the change for
15249 // correctness, so if there is a size problem that will be a bug.
15250 if (!MMO
->getSize().hasValue() || MMO
->getSize().getValue() < 16)
15254 case ISD::INTRINSIC_W_CHAIN
: {
15255 MemIntrinsicSDNode
*Intrin
= cast
<MemIntrinsicSDNode
>(N
);
15256 Chain
= Intrin
->getChain();
15257 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15258 // us what we want. Get operand 2 instead.
15259 Base
= Intrin
->getOperand(2);
15260 MMO
= Intrin
->getMemOperand();
15265 MVT VecTy
= N
->getValueType(0).getSimpleVT();
15267 SDValue LoadOps
[] = { Chain
, Base
};
15268 SDValue Load
= DAG
.getMemIntrinsicNode(PPCISD::LXVD2X
, dl
,
15269 DAG
.getVTList(MVT::v2f64
, MVT::Other
),
15270 LoadOps
, MVT::v2f64
, MMO
);
15272 DCI
.AddToWorklist(Load
.getNode());
15273 Chain
= Load
.getValue(1);
15274 SDValue Swap
= DAG
.getNode(
15275 PPCISD::XXSWAPD
, dl
, DAG
.getVTList(MVT::v2f64
, MVT::Other
), Chain
, Load
);
15276 DCI
.AddToWorklist(Swap
.getNode());
15278 // Add a bitcast if the resulting load type doesn't match v2f64.
15279 if (VecTy
!= MVT::v2f64
) {
15280 SDValue N
= DAG
.getNode(ISD::BITCAST
, dl
, VecTy
, Swap
);
15281 DCI
.AddToWorklist(N
.getNode());
15282 // Package {bitcast value, swap's chain} to match Load's shape.
15283 return DAG
.getNode(ISD::MERGE_VALUES
, dl
, DAG
.getVTList(VecTy
, MVT::Other
),
15284 N
, Swap
.getValue(1));
15290 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15291 // builtins) into stores with swaps.
15292 SDValue
PPCTargetLowering::expandVSXStoreForLE(SDNode
*N
,
15293 DAGCombinerInfo
&DCI
) const {
15294 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15296 if (DCI
.isBeforeLegalizeOps())
15299 SelectionDAG
&DAG
= DCI
.DAG
;
15304 MachineMemOperand
*MMO
;
15306 switch (N
->getOpcode()) {
15308 llvm_unreachable("Unexpected opcode for little endian VSX store");
15310 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
15311 Chain
= ST
->getChain();
15312 Base
= ST
->getBasePtr();
15313 MMO
= ST
->getMemOperand();
15315 // If the MMO suggests this isn't a store of a full vector, leave
15316 // things alone. For a built-in, we have to make the change for
15317 // correctness, so if there is a size problem that will be a bug.
15318 if (!MMO
->getSize().hasValue() || MMO
->getSize().getValue() < 16)
15322 case ISD::INTRINSIC_VOID
: {
15323 MemIntrinsicSDNode
*Intrin
= cast
<MemIntrinsicSDNode
>(N
);
15324 Chain
= Intrin
->getChain();
15325 // Intrin->getBasePtr() oddly does not get what we want.
15326 Base
= Intrin
->getOperand(3);
15327 MMO
= Intrin
->getMemOperand();
15333 SDValue Src
= N
->getOperand(SrcOpnd
);
15334 MVT VecTy
= Src
.getValueType().getSimpleVT();
15336 // All stores are done as v2f64 and possible bit cast.
15337 if (VecTy
!= MVT::v2f64
) {
15338 Src
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2f64
, Src
);
15339 DCI
.AddToWorklist(Src
.getNode());
15342 SDValue Swap
= DAG
.getNode(PPCISD::XXSWAPD
, dl
,
15343 DAG
.getVTList(MVT::v2f64
, MVT::Other
), Chain
, Src
);
15344 DCI
.AddToWorklist(Swap
.getNode());
15345 Chain
= Swap
.getValue(1);
15346 SDValue StoreOps
[] = { Chain
, Swap
, Base
};
15347 SDValue Store
= DAG
.getMemIntrinsicNode(PPCISD::STXVD2X
, dl
,
15348 DAG
.getVTList(MVT::Other
),
15349 StoreOps
, VecTy
, MMO
);
15350 DCI
.AddToWorklist(Store
.getNode());
15354 // Handle DAG combine for STORE (FP_TO_INT F).
15355 SDValue
PPCTargetLowering::combineStoreFPToInt(SDNode
*N
,
15356 DAGCombinerInfo
&DCI
) const {
15357 SelectionDAG
&DAG
= DCI
.DAG
;
15359 unsigned Opcode
= N
->getOperand(1).getOpcode();
15361 bool Strict
= N
->getOperand(1)->isStrictFPOpcode();
15363 assert((Opcode
== ISD::FP_TO_SINT
|| Opcode
== ISD::FP_TO_UINT
||
15364 Opcode
== ISD::STRICT_FP_TO_SINT
|| Opcode
== ISD::STRICT_FP_TO_UINT
)
15365 && "Not a FP_TO_INT Instruction!");
15367 SDValue Val
= N
->getOperand(1).getOperand(Strict
? 1 : 0);
15368 EVT Op1VT
= N
->getOperand(1).getValueType();
15369 EVT ResVT
= Val
.getValueType();
15371 if (!Subtarget
.hasVSX() || !Subtarget
.hasFPCVT() || !isTypeLegal(ResVT
))
15374 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15375 bool ValidTypeForStoreFltAsInt
=
15376 (Op1VT
== MVT::i32
|| (Op1VT
== MVT::i64
&& Subtarget
.isPPC64()) ||
15377 (Subtarget
.hasP9Vector() && (Op1VT
== MVT::i16
|| Op1VT
== MVT::i8
)));
15379 // TODO: Lower conversion from f128 on all VSX targets
15380 if (ResVT
== MVT::ppcf128
|| (ResVT
== MVT::f128
&& !Subtarget
.hasP9Vector()))
15383 if ((Op1VT
!= MVT::i64
&& !Subtarget
.hasP8Vector()) ||
15384 cast
<StoreSDNode
>(N
)->isTruncatingStore() || !ValidTypeForStoreFltAsInt
)
15387 Val
= convertFPToInt(N
->getOperand(1), DAG
, Subtarget
);
15389 // Set number of bytes being converted.
15390 unsigned ByteSize
= Op1VT
.getScalarSizeInBits() / 8;
15391 SDValue Ops
[] = {N
->getOperand(0), Val
, N
->getOperand(2),
15392 DAG
.getIntPtrConstant(ByteSize
, dl
, false),
15393 DAG
.getValueType(Op1VT
)};
15395 Val
= DAG
.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT
, dl
,
15396 DAG
.getVTList(MVT::Other
), Ops
,
15397 cast
<StoreSDNode
>(N
)->getMemoryVT(),
15398 cast
<StoreSDNode
>(N
)->getMemOperand());
15403 static bool isAlternatingShuffMask(const ArrayRef
<int> &Mask
, int NumElts
) {
15404 // Check that the source of the element keeps flipping
15405 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15406 bool PrevElemFromFirstVec
= Mask
[0] < NumElts
;
15407 for (int i
= 1, e
= Mask
.size(); i
< e
; i
++) {
15408 if (PrevElemFromFirstVec
&& Mask
[i
] < NumElts
)
15410 if (!PrevElemFromFirstVec
&& Mask
[i
] >= NumElts
)
15412 PrevElemFromFirstVec
= !PrevElemFromFirstVec
;
15417 static bool isSplatBV(SDValue Op
) {
15418 if (Op
.getOpcode() != ISD::BUILD_VECTOR
)
15422 // Find first non-undef input.
15423 for (int i
= 0, e
= Op
.getNumOperands(); i
< e
; i
++) {
15424 FirstOp
= Op
.getOperand(i
);
15425 if (!FirstOp
.isUndef())
15429 // All inputs are undef or the same as the first non-undef input.
15430 for (int i
= 1, e
= Op
.getNumOperands(); i
< e
; i
++)
15431 if (Op
.getOperand(i
) != FirstOp
&& !Op
.getOperand(i
).isUndef())
15436 static SDValue
isScalarToVec(SDValue Op
) {
15437 if (Op
.getOpcode() == ISD::SCALAR_TO_VECTOR
)
15439 if (Op
.getOpcode() != ISD::BITCAST
)
15441 Op
= Op
.getOperand(0);
15442 if (Op
.getOpcode() == ISD::SCALAR_TO_VECTOR
)
15447 // Fix up the shuffle mask to account for the fact that the result of
15448 // scalar_to_vector is not in lane zero. This just takes all values in
15449 // the ranges specified by the min/max indices and adds the number of
15450 // elements required to ensure each element comes from the respective
15451 // position in the valid lane.
15452 // On little endian, that's just the corresponding element in the other
15453 // half of the vector. On big endian, it is in the same half but right
15454 // justified rather than left justified in that half.
15455 static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl
<int> &ShuffV
,
15456 int LHSMaxIdx
, int RHSMinIdx
,
15457 int RHSMaxIdx
, int HalfVec
,
15458 unsigned ValidLaneWidth
,
15459 const PPCSubtarget
&Subtarget
) {
15460 for (int i
= 0, e
= ShuffV
.size(); i
< e
; i
++) {
15461 int Idx
= ShuffV
[i
];
15462 if ((Idx
>= 0 && Idx
< LHSMaxIdx
) || (Idx
>= RHSMinIdx
&& Idx
< RHSMaxIdx
))
15464 Subtarget
.isLittleEndian() ? HalfVec
: HalfVec
- ValidLaneWidth
;
15468 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15469 // the original is:
15470 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15471 // In such a case, just change the shuffle mask to extract the element
15472 // from the permuted index.
15473 static SDValue
getSToVPermuted(SDValue OrigSToV
, SelectionDAG
&DAG
,
15474 const PPCSubtarget
&Subtarget
) {
15475 SDLoc
dl(OrigSToV
);
15476 EVT VT
= OrigSToV
.getValueType();
15477 assert(OrigSToV
.getOpcode() == ISD::SCALAR_TO_VECTOR
&&
15478 "Expecting a SCALAR_TO_VECTOR here");
15479 SDValue Input
= OrigSToV
.getOperand(0);
15481 if (Input
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
15482 ConstantSDNode
*Idx
= dyn_cast
<ConstantSDNode
>(Input
.getOperand(1));
15483 SDValue OrigVector
= Input
.getOperand(0);
15485 // Can't handle non-const element indices or different vector types
15486 // for the input to the extract and the output of the scalar_to_vector.
15487 if (Idx
&& VT
== OrigVector
.getValueType()) {
15488 unsigned NumElts
= VT
.getVectorNumElements();
15491 "Cannot produce a permuted scalar_to_vector for one element vector");
15492 SmallVector
<int, 16> NewMask(NumElts
, -1);
15493 unsigned ResultInElt
= NumElts
/ 2;
15494 ResultInElt
-= Subtarget
.isLittleEndian() ? 0 : 1;
15495 NewMask
[ResultInElt
] = Idx
->getZExtValue();
15496 return DAG
.getVectorShuffle(VT
, dl
, OrigVector
, OrigVector
, NewMask
);
15499 return DAG
.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED
, dl
, VT
,
15500 OrigSToV
.getOperand(0));
15503 // On little endian subtargets, combine shuffles such as:
15504 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15506 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15507 // because the latter can be matched to a single instruction merge.
15508 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15509 // to put the value into element zero. Adjust the shuffle mask so that the
15510 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
15511 // On big endian targets, this is still useful for SCALAR_TO_VECTOR
15512 // nodes with elements smaller than doubleword because all the ways
15513 // of getting scalar data into a vector register put the value in the
15514 // rightmost element of the left half of the vector.
15515 SDValue
PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode
*SVN
,
15516 SelectionDAG
&DAG
) const {
15517 SDValue LHS
= SVN
->getOperand(0);
15518 SDValue RHS
= SVN
->getOperand(1);
15519 auto Mask
= SVN
->getMask();
15520 int NumElts
= LHS
.getValueType().getVectorNumElements();
15521 SDValue
Res(SVN
, 0);
15523 bool IsLittleEndian
= Subtarget
.isLittleEndian();
15525 // On big endian targets this is only useful for subtargets with direct moves.
15526 // On little endian targets it would be useful for all subtargets with VSX.
15527 // However adding special handling for LE subtargets without direct moves
15528 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15529 // which includes direct moves.
15530 if (!Subtarget
.hasDirectMove())
15533 // If this is not a shuffle of a shuffle and the first element comes from
15534 // the second vector, canonicalize to the commuted form. This will make it
15535 // more likely to match one of the single instruction patterns.
15536 if (Mask
[0] >= NumElts
&& LHS
.getOpcode() != ISD::VECTOR_SHUFFLE
&&
15537 RHS
.getOpcode() != ISD::VECTOR_SHUFFLE
) {
15538 std::swap(LHS
, RHS
);
15539 Res
= DAG
.getCommutedVectorShuffle(*SVN
);
15540 Mask
= cast
<ShuffleVectorSDNode
>(Res
)->getMask();
15543 // Adjust the shuffle mask if either input vector comes from a
15544 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15545 // form (to prevent the need for a swap).
15546 SmallVector
<int, 16> ShuffV(Mask
);
15547 SDValue SToVLHS
= isScalarToVec(LHS
);
15548 SDValue SToVRHS
= isScalarToVec(RHS
);
15549 if (SToVLHS
|| SToVRHS
) {
15550 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15551 // same type and have differing element sizes, then do not perform
15552 // the following transformation. The current transformation for
15553 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15554 // element size. This will be updated in the future to account for
15555 // differing sizes of the LHS and RHS.
15556 if (SToVLHS
&& SToVRHS
&&
15557 (SToVLHS
.getValueType().getScalarSizeInBits() !=
15558 SToVRHS
.getValueType().getScalarSizeInBits()))
15561 int NumEltsIn
= SToVLHS
? SToVLHS
.getValueType().getVectorNumElements()
15562 : SToVRHS
.getValueType().getVectorNumElements();
15563 int NumEltsOut
= ShuffV
.size();
15564 // The width of the "valid lane" (i.e. the lane that contains the value that
15565 // is vectorized) needs to be expressed in terms of the number of elements
15566 // of the shuffle. It is thereby the ratio of the values before and after
15568 unsigned ValidLaneWidth
=
15569 SToVLHS
? SToVLHS
.getValueType().getScalarSizeInBits() /
15570 LHS
.getValueType().getScalarSizeInBits()
15571 : SToVRHS
.getValueType().getScalarSizeInBits() /
15572 RHS
.getValueType().getScalarSizeInBits();
15574 // Initially assume that neither input is permuted. These will be adjusted
15575 // accordingly if either input is.
15576 int LHSMaxIdx
= -1;
15577 int RHSMinIdx
= -1;
15578 int RHSMaxIdx
= -1;
15579 int HalfVec
= LHS
.getValueType().getVectorNumElements() / 2;
15581 // Get the permuted scalar to vector nodes for the source(s) that come from
15582 // ISD::SCALAR_TO_VECTOR.
15583 // On big endian systems, this only makes sense for element sizes smaller
15584 // than 64 bits since for 64-bit elements, all instructions already put
15585 // the value into element zero. Since scalar size of LHS and RHS may differ
15586 // after isScalarToVec, this should be checked using their own sizes.
15588 if (!IsLittleEndian
&& SToVLHS
.getValueType().getScalarSizeInBits() >= 64)
15590 // Set up the values for the shuffle vector fixup.
15591 LHSMaxIdx
= NumEltsOut
/ NumEltsIn
;
15592 SToVLHS
= getSToVPermuted(SToVLHS
, DAG
, Subtarget
);
15593 if (SToVLHS
.getValueType() != LHS
.getValueType())
15594 SToVLHS
= DAG
.getBitcast(LHS
.getValueType(), SToVLHS
);
15598 if (!IsLittleEndian
&& SToVRHS
.getValueType().getScalarSizeInBits() >= 64)
15600 RHSMinIdx
= NumEltsOut
;
15601 RHSMaxIdx
= NumEltsOut
/ NumEltsIn
+ RHSMinIdx
;
15602 SToVRHS
= getSToVPermuted(SToVRHS
, DAG
, Subtarget
);
15603 if (SToVRHS
.getValueType() != RHS
.getValueType())
15604 SToVRHS
= DAG
.getBitcast(RHS
.getValueType(), SToVRHS
);
15608 // Fix up the shuffle mask to reflect where the desired element actually is.
15609 // The minimum and maximum indices that correspond to element zero for both
15610 // the LHS and RHS are computed and will control which shuffle mask entries
15611 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15612 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15613 fixupShuffleMaskForPermutedSToV(ShuffV
, LHSMaxIdx
, RHSMinIdx
, RHSMaxIdx
,
15614 HalfVec
, ValidLaneWidth
, Subtarget
);
15615 Res
= DAG
.getVectorShuffle(SVN
->getValueType(0), dl
, LHS
, RHS
, ShuffV
);
15617 // We may have simplified away the shuffle. We won't be able to do anything
15618 // further with it here.
15619 if (!isa
<ShuffleVectorSDNode
>(Res
))
15621 Mask
= cast
<ShuffleVectorSDNode
>(Res
)->getMask();
15624 SDValue TheSplat
= IsLittleEndian
? RHS
: LHS
;
15625 // The common case after we commuted the shuffle is that the RHS is a splat
15626 // and we have elements coming in from the splat at indices that are not
15627 // conducive to using a merge.
15629 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15630 if (!isSplatBV(TheSplat
))
15633 // We are looking for a mask such that all even elements are from
15634 // one vector and all odd elements from the other.
15635 if (!isAlternatingShuffMask(Mask
, NumElts
))
15638 // Adjust the mask so we are pulling in the same index from the splat
15639 // as the index from the interesting vector in consecutive elements.
15640 if (IsLittleEndian
) {
15641 // Example (even elements from first vector):
15642 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15643 if (Mask
[0] < NumElts
)
15644 for (int i
= 1, e
= Mask
.size(); i
< e
; i
+= 2) {
15647 // If element from non-splat is undef, pick first element from splat.
15648 ShuffV
[i
] = (ShuffV
[i
- 1] >= 0 ? ShuffV
[i
- 1] : 0) + NumElts
;
15650 // Example (odd elements from first vector):
15651 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15653 for (int i
= 0, e
= Mask
.size(); i
< e
; i
+= 2) {
15656 // If element from non-splat is undef, pick first element from splat.
15657 ShuffV
[i
] = (ShuffV
[i
+ 1] >= 0 ? ShuffV
[i
+ 1] : 0) + NumElts
;
15660 // Example (even elements from first vector):
15661 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15662 if (Mask
[0] < NumElts
)
15663 for (int i
= 0, e
= Mask
.size(); i
< e
; i
+= 2) {
15666 // If element from non-splat is undef, pick first element from splat.
15667 ShuffV
[i
] = ShuffV
[i
+ 1] >= 0 ? ShuffV
[i
+ 1] - NumElts
: 0;
15669 // Example (odd elements from first vector):
15670 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15672 for (int i
= 1, e
= Mask
.size(); i
< e
; i
+= 2) {
15675 // If element from non-splat is undef, pick first element from splat.
15676 ShuffV
[i
] = ShuffV
[i
- 1] >= 0 ? ShuffV
[i
- 1] - NumElts
: 0;
15680 // If the RHS has undefs, we need to remove them since we may have created
15681 // a shuffle that adds those instead of the splat value.
15683 cast
<BuildVectorSDNode
>(TheSplat
.getNode())->getSplatValue();
15684 TheSplat
= DAG
.getSplatBuildVector(TheSplat
.getValueType(), dl
, SplatVal
);
15686 if (IsLittleEndian
)
15690 return DAG
.getVectorShuffle(SVN
->getValueType(0), dl
, LHS
, RHS
, ShuffV
);
15693 SDValue
PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode
*SVN
,
15694 LSBaseSDNode
*LSBase
,
15695 DAGCombinerInfo
&DCI
) const {
15696 assert((ISD::isNormalLoad(LSBase
) || ISD::isNormalStore(LSBase
)) &&
15697 "Not a reverse memop pattern!");
15699 auto IsElementReverse
= [](const ShuffleVectorSDNode
*SVN
) -> bool {
15700 auto Mask
= SVN
->getMask();
15702 auto I
= Mask
.rbegin();
15703 auto E
= Mask
.rend();
15705 for (; I
!= E
; ++I
) {
15713 SelectionDAG
&DAG
= DCI
.DAG
;
15714 EVT VT
= SVN
->getValueType(0);
15716 if (!isTypeLegal(VT
) || !Subtarget
.isLittleEndian() || !Subtarget
.hasVSX())
15719 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15720 // See comment in PPCVSXSwapRemoval.cpp.
15721 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15722 if (!Subtarget
.hasP9Vector())
15725 if(!IsElementReverse(SVN
))
15728 if (LSBase
->getOpcode() == ISD::LOAD
) {
15729 // If the load return value 0 has more than one user except the
15730 // shufflevector instruction, it is not profitable to replace the
15731 // shufflevector with a reverse load.
15732 for (SDNode::use_iterator UI
= LSBase
->use_begin(), UE
= LSBase
->use_end();
15734 if (UI
.getUse().getResNo() == 0 && UI
->getOpcode() != ISD::VECTOR_SHUFFLE
)
15738 SDValue LoadOps
[] = {LSBase
->getChain(), LSBase
->getBasePtr()};
15739 return DAG
.getMemIntrinsicNode(
15740 PPCISD::LOAD_VEC_BE
, dl
, DAG
.getVTList(VT
, MVT::Other
), LoadOps
,
15741 LSBase
->getMemoryVT(), LSBase
->getMemOperand());
15744 if (LSBase
->getOpcode() == ISD::STORE
) {
15745 // If there are other uses of the shuffle, the swap cannot be avoided.
15746 // Forcing the use of an X-Form (since swapped stores only have
15747 // X-Forms) without removing the swap is unprofitable.
15748 if (!SVN
->hasOneUse())
15752 SDValue StoreOps
[] = {LSBase
->getChain(), SVN
->getOperand(0),
15753 LSBase
->getBasePtr()};
15754 return DAG
.getMemIntrinsicNode(
15755 PPCISD::STORE_VEC_BE
, dl
, DAG
.getVTList(MVT::Other
), StoreOps
,
15756 LSBase
->getMemoryVT(), LSBase
->getMemOperand());
15759 llvm_unreachable("Expected a load or store node here");
15762 static bool isStoreConditional(SDValue Intrin
, unsigned &StoreWidth
) {
15763 unsigned IntrinsicID
= Intrin
.getConstantOperandVal(1);
15764 if (IntrinsicID
== Intrinsic::ppc_stdcx
)
15766 else if (IntrinsicID
== Intrinsic::ppc_stwcx
)
15768 else if (IntrinsicID
== Intrinsic::ppc_sthcx
)
15770 else if (IntrinsicID
== Intrinsic::ppc_stbcx
)
15777 SDValue
PPCTargetLowering::PerformDAGCombine(SDNode
*N
,
15778 DAGCombinerInfo
&DCI
) const {
15779 SelectionDAG
&DAG
= DCI
.DAG
;
15781 switch (N
->getOpcode()) {
15784 return combineADD(N
, DCI
);
15786 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15787 // original input as that will prevent us from selecting optimal rotates.
15788 // This only matters if the input to the extend is i32 widened to i64.
15789 SDValue Op1
= N
->getOperand(0);
15790 SDValue Op2
= N
->getOperand(1);
15791 if ((Op1
.getOpcode() != ISD::ZERO_EXTEND
&&
15792 Op1
.getOpcode() != ISD::ANY_EXTEND
) ||
15793 !isa
<ConstantSDNode
>(Op2
) || N
->getValueType(0) != MVT::i64
||
15794 Op1
.getOperand(0).getValueType() != MVT::i32
)
15796 SDValue NarrowOp
= Op1
.getOperand(0);
15797 if (NarrowOp
.getOpcode() != ISD::SHL
&& NarrowOp
.getOpcode() != ISD::SRL
&&
15798 NarrowOp
.getOpcode() != ISD::ROTL
&& NarrowOp
.getOpcode() != ISD::ROTR
)
15801 uint64_t Imm
= Op2
->getAsZExtVal();
15802 // Make sure that the constant is narrow enough to fit in the narrow type.
15803 if (!isUInt
<32>(Imm
))
15805 SDValue ConstOp
= DAG
.getConstant(Imm
, dl
, MVT::i32
);
15806 SDValue NarrowAnd
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, NarrowOp
, ConstOp
);
15807 return DAG
.getZExtOrTrunc(NarrowAnd
, dl
, N
->getValueType(0));
15810 return combineSHL(N
, DCI
);
15812 return combineSRA(N
, DCI
);
15814 return combineSRL(N
, DCI
);
15816 return combineMUL(N
, DCI
);
15818 case PPCISD::FNMSUB
:
15819 return combineFMALike(N
, DCI
);
15821 if (isNullConstant(N
->getOperand(0))) // 0 << V -> 0.
15822 return N
->getOperand(0);
15825 if (isNullConstant(N
->getOperand(0))) // 0 >>u V -> 0.
15826 return N
->getOperand(0);
15829 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(0))) {
15830 if (C
->isZero() || // 0 >>s V -> 0.
15831 C
->isAllOnes()) // -1 >>s V -> -1.
15832 return N
->getOperand(0);
15835 case ISD::SIGN_EXTEND
:
15836 case ISD::ZERO_EXTEND
:
15837 case ISD::ANY_EXTEND
:
15838 return DAGCombineExtBoolTrunc(N
, DCI
);
15839 case ISD::TRUNCATE
:
15840 return combineTRUNCATE(N
, DCI
);
15842 if (SDValue CSCC
= combineSetCC(N
, DCI
))
15845 case ISD::SELECT_CC
:
15846 return DAGCombineTruncBoolExt(N
, DCI
);
15847 case ISD::SINT_TO_FP
:
15848 case ISD::UINT_TO_FP
:
15849 return combineFPToIntToFP(N
, DCI
);
15850 case ISD::VECTOR_SHUFFLE
:
15851 if (ISD::isNormalLoad(N
->getOperand(0).getNode())) {
15852 LSBaseSDNode
* LSBase
= cast
<LSBaseSDNode
>(N
->getOperand(0));
15853 return combineVReverseMemOP(cast
<ShuffleVectorSDNode
>(N
), LSBase
, DCI
);
15855 return combineVectorShuffle(cast
<ShuffleVectorSDNode
>(N
), DCI
.DAG
);
15858 EVT Op1VT
= N
->getOperand(1).getValueType();
15859 unsigned Opcode
= N
->getOperand(1).getOpcode();
15861 if (Opcode
== ISD::FP_TO_SINT
|| Opcode
== ISD::FP_TO_UINT
||
15862 Opcode
== ISD::STRICT_FP_TO_SINT
|| Opcode
== ISD::STRICT_FP_TO_UINT
) {
15863 SDValue Val
= combineStoreFPToInt(N
, DCI
);
15868 if (Opcode
== ISD::VECTOR_SHUFFLE
&& ISD::isNormalStore(N
)) {
15869 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
->getOperand(1));
15870 SDValue Val
= combineVReverseMemOP(SVN
, cast
<LSBaseSDNode
>(N
), DCI
);
15875 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15876 if (cast
<StoreSDNode
>(N
)->isUnindexed() && Opcode
== ISD::BSWAP
&&
15877 N
->getOperand(1).getNode()->hasOneUse() &&
15878 (Op1VT
== MVT::i32
|| Op1VT
== MVT::i16
||
15879 (Subtarget
.hasLDBRX() && Subtarget
.isPPC64() && Op1VT
== MVT::i64
))) {
15881 // STBRX can only handle simple types and it makes no sense to store less
15882 // two bytes in byte-reversed order.
15883 EVT mVT
= cast
<StoreSDNode
>(N
)->getMemoryVT();
15884 if (mVT
.isExtended() || mVT
.getSizeInBits() < 16)
15887 SDValue BSwapOp
= N
->getOperand(1).getOperand(0);
15888 // Do an any-extend to 32-bits if this is a half-word input.
15889 if (BSwapOp
.getValueType() == MVT::i16
)
15890 BSwapOp
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i32
, BSwapOp
);
15892 // If the type of BSWAP operand is wider than stored memory width
15893 // it need to be shifted to the right side before STBRX.
15894 if (Op1VT
.bitsGT(mVT
)) {
15895 int Shift
= Op1VT
.getSizeInBits() - mVT
.getSizeInBits();
15896 BSwapOp
= DAG
.getNode(ISD::SRL
, dl
, Op1VT
, BSwapOp
,
15897 DAG
.getConstant(Shift
, dl
, MVT::i32
));
15898 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15899 if (Op1VT
== MVT::i64
)
15900 BSwapOp
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, BSwapOp
);
15904 N
->getOperand(0), BSwapOp
, N
->getOperand(2), DAG
.getValueType(mVT
)
15907 DAG
.getMemIntrinsicNode(PPCISD::STBRX
, dl
, DAG
.getVTList(MVT::Other
),
15908 Ops
, cast
<StoreSDNode
>(N
)->getMemoryVT(),
15909 cast
<StoreSDNode
>(N
)->getMemOperand());
15912 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15913 // So it can increase the chance of CSE constant construction.
15914 if (Subtarget
.isPPC64() && !DCI
.isBeforeLegalize() &&
15915 isa
<ConstantSDNode
>(N
->getOperand(1)) && Op1VT
== MVT::i32
) {
15916 // Need to sign-extended to 64-bits to handle negative values.
15917 EVT MemVT
= cast
<StoreSDNode
>(N
)->getMemoryVT();
15918 uint64_t Val64
= SignExtend64(N
->getConstantOperandVal(1),
15919 MemVT
.getSizeInBits());
15920 SDValue Const64
= DAG
.getConstant(Val64
, dl
, MVT::i64
);
15922 // DAG.getTruncStore() can't be used here because it doesn't accept
15923 // the general (base + offset) addressing mode.
15924 // So we use UpdateNodeOperands and setTruncatingStore instead.
15925 DAG
.UpdateNodeOperands(N
, N
->getOperand(0), Const64
, N
->getOperand(2),
15927 cast
<StoreSDNode
>(N
)->setTruncatingStore(true);
15928 return SDValue(N
, 0);
15931 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15932 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15933 if (Op1VT
.isSimple()) {
15934 MVT StoreVT
= Op1VT
.getSimpleVT();
15935 if (Subtarget
.needsSwapsForVSXMemOps() &&
15936 (StoreVT
== MVT::v2f64
|| StoreVT
== MVT::v2i64
||
15937 StoreVT
== MVT::v4f32
|| StoreVT
== MVT::v4i32
))
15938 return expandVSXStoreForLE(N
, DCI
);
15943 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
15944 EVT VT
= LD
->getValueType(0);
15946 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15947 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15948 if (VT
.isSimple()) {
15949 MVT LoadVT
= VT
.getSimpleVT();
15950 if (Subtarget
.needsSwapsForVSXMemOps() &&
15951 (LoadVT
== MVT::v2f64
|| LoadVT
== MVT::v2i64
||
15952 LoadVT
== MVT::v4f32
|| LoadVT
== MVT::v4i32
))
15953 return expandVSXLoadForLE(N
, DCI
);
15956 // We sometimes end up with a 64-bit integer load, from which we extract
15957 // two single-precision floating-point numbers. This happens with
15958 // std::complex<float>, and other similar structures, because of the way we
15959 // canonicalize structure copies. However, if we lack direct moves,
15960 // then the final bitcasts from the extracted integer values to the
15961 // floating-point numbers turn into store/load pairs. Even with direct moves,
15962 // just loading the two floating-point numbers is likely better.
15963 auto ReplaceTwoFloatLoad
= [&]() {
15964 if (VT
!= MVT::i64
)
15967 if (LD
->getExtensionType() != ISD::NON_EXTLOAD
||
15971 // We're looking for a sequence like this:
15972 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15973 // t16: i64 = srl t13, Constant:i32<32>
15974 // t17: i32 = truncate t16
15975 // t18: f32 = bitcast t17
15976 // t19: i32 = truncate t13
15977 // t20: f32 = bitcast t19
15979 if (!LD
->hasNUsesOfValue(2, 0))
15982 auto UI
= LD
->use_begin();
15983 while (UI
.getUse().getResNo() != 0) ++UI
;
15984 SDNode
*Trunc
= *UI
++;
15985 while (UI
.getUse().getResNo() != 0) ++UI
;
15986 SDNode
*RightShift
= *UI
;
15987 if (Trunc
->getOpcode() != ISD::TRUNCATE
)
15988 std::swap(Trunc
, RightShift
);
15990 if (Trunc
->getOpcode() != ISD::TRUNCATE
||
15991 Trunc
->getValueType(0) != MVT::i32
||
15992 !Trunc
->hasOneUse())
15994 if (RightShift
->getOpcode() != ISD::SRL
||
15995 !isa
<ConstantSDNode
>(RightShift
->getOperand(1)) ||
15996 RightShift
->getConstantOperandVal(1) != 32 ||
15997 !RightShift
->hasOneUse())
16000 SDNode
*Trunc2
= *RightShift
->use_begin();
16001 if (Trunc2
->getOpcode() != ISD::TRUNCATE
||
16002 Trunc2
->getValueType(0) != MVT::i32
||
16003 !Trunc2
->hasOneUse())
16006 SDNode
*Bitcast
= *Trunc
->use_begin();
16007 SDNode
*Bitcast2
= *Trunc2
->use_begin();
16009 if (Bitcast
->getOpcode() != ISD::BITCAST
||
16010 Bitcast
->getValueType(0) != MVT::f32
)
16012 if (Bitcast2
->getOpcode() != ISD::BITCAST
||
16013 Bitcast2
->getValueType(0) != MVT::f32
)
16016 if (Subtarget
.isLittleEndian())
16017 std::swap(Bitcast
, Bitcast2
);
16019 // Bitcast has the second float (in memory-layout order) and Bitcast2
16020 // has the first one.
16022 SDValue BasePtr
= LD
->getBasePtr();
16023 if (LD
->isIndexed()) {
16024 assert(LD
->getAddressingMode() == ISD::PRE_INC
&&
16025 "Non-pre-inc AM on PPC?");
16027 DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
16032 LD
->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile
;
16033 SDValue FloatLoad
= DAG
.getLoad(MVT::f32
, dl
, LD
->getChain(), BasePtr
,
16034 LD
->getPointerInfo(), LD
->getAlign(),
16035 MMOFlags
, LD
->getAAInfo());
16037 DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(),
16038 BasePtr
, DAG
.getIntPtrConstant(4, dl
));
16039 SDValue FloatLoad2
= DAG
.getLoad(
16040 MVT::f32
, dl
, SDValue(FloatLoad
.getNode(), 1), AddPtr
,
16041 LD
->getPointerInfo().getWithOffset(4),
16042 commonAlignment(LD
->getAlign(), 4), MMOFlags
, LD
->getAAInfo());
16044 if (LD
->isIndexed()) {
16045 // Note that DAGCombine should re-form any pre-increment load(s) from
16046 // what is produced here if that makes sense.
16047 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, 1), BasePtr
);
16050 DCI
.CombineTo(Bitcast2
, FloatLoad
);
16051 DCI
.CombineTo(Bitcast
, FloatLoad2
);
16053 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, LD
->isIndexed() ? 2 : 1),
16054 SDValue(FloatLoad2
.getNode(), 1));
16058 if (ReplaceTwoFloatLoad())
16059 return SDValue(N
, 0);
16061 EVT MemVT
= LD
->getMemoryVT();
16062 Type
*Ty
= MemVT
.getTypeForEVT(*DAG
.getContext());
16063 Align ABIAlignment
= DAG
.getDataLayout().getABITypeAlign(Ty
);
16064 if (LD
->isUnindexed() && VT
.isVector() &&
16065 ((Subtarget
.hasAltivec() && ISD::isNON_EXTLoad(N
) &&
16066 // P8 and later hardware should just use LOAD.
16067 !Subtarget
.hasP8Vector() &&
16068 (VT
== MVT::v16i8
|| VT
== MVT::v8i16
|| VT
== MVT::v4i32
||
16069 VT
== MVT::v4f32
))) &&
16070 LD
->getAlign() < ABIAlignment
) {
16071 // This is a type-legal unaligned Altivec load.
16072 SDValue Chain
= LD
->getChain();
16073 SDValue Ptr
= LD
->getBasePtr();
16074 bool isLittleEndian
= Subtarget
.isLittleEndian();
16076 // This implements the loading of unaligned vectors as described in
16077 // the venerable Apple Velocity Engine overview. Specifically:
16078 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16079 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16081 // The general idea is to expand a sequence of one or more unaligned
16082 // loads into an alignment-based permutation-control instruction (lvsl
16083 // or lvsr), a series of regular vector loads (which always truncate
16084 // their input address to an aligned address), and a series of
16085 // permutations. The results of these permutations are the requested
16086 // loaded values. The trick is that the last "extra" load is not taken
16087 // from the address you might suspect (sizeof(vector) bytes after the
16088 // last requested load), but rather sizeof(vector) - 1 bytes after the
16089 // last requested vector. The point of this is to avoid a page fault if
16090 // the base address happened to be aligned. This works because if the
16091 // base address is aligned, then adding less than a full vector length
16092 // will cause the last vector in the sequence to be (re)loaded.
16093 // Otherwise, the next vector will be fetched as you might suspect was
16096 // We might be able to reuse the permutation generation from
16097 // a different base address offset from this one by an aligned amount.
16098 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16099 // optimization later.
16100 Intrinsic::ID Intr
, IntrLD
, IntrPerm
;
16101 MVT PermCntlTy
, PermTy
, LDTy
;
16102 Intr
= isLittleEndian
? Intrinsic::ppc_altivec_lvsr
16103 : Intrinsic::ppc_altivec_lvsl
;
16104 IntrLD
= Intrinsic::ppc_altivec_lvx
;
16105 IntrPerm
= Intrinsic::ppc_altivec_vperm
;
16106 PermCntlTy
= MVT::v16i8
;
16107 PermTy
= MVT::v4i32
;
16110 SDValue PermCntl
= BuildIntrinsicOp(Intr
, Ptr
, DAG
, dl
, PermCntlTy
);
16112 // Create the new MMO for the new base load. It is like the original MMO,
16113 // but represents an area in memory almost twice the vector size centered
16114 // on the original address. If the address is unaligned, we might start
16115 // reading up to (sizeof(vector)-1) bytes below the address of the
16116 // original unaligned load.
16117 MachineFunction
&MF
= DAG
.getMachineFunction();
16118 MachineMemOperand
*BaseMMO
=
16119 MF
.getMachineMemOperand(LD
->getMemOperand(),
16120 -(int64_t)MemVT
.getStoreSize()+1,
16121 2*MemVT
.getStoreSize()-1);
16123 // Create the new base load.
16125 DAG
.getTargetConstant(IntrLD
, dl
, getPointerTy(MF
.getDataLayout()));
16126 SDValue BaseLoadOps
[] = { Chain
, LDXIntID
, Ptr
};
16128 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, dl
,
16129 DAG
.getVTList(PermTy
, MVT::Other
),
16130 BaseLoadOps
, LDTy
, BaseMMO
);
16132 // Note that the value of IncOffset (which is provided to the next
16133 // load's pointer info offset value, and thus used to calculate the
16134 // alignment), and the value of IncValue (which is actually used to
16135 // increment the pointer value) are different! This is because we
16136 // require the next load to appear to be aligned, even though it
16137 // is actually offset from the base pointer by a lesser amount.
16138 int IncOffset
= VT
.getSizeInBits() / 8;
16139 int IncValue
= IncOffset
;
16141 // Walk (both up and down) the chain looking for another load at the real
16142 // (aligned) offset (the alignment of the other load does not matter in
16143 // this case). If found, then do not use the offset reduction trick, as
16144 // that will prevent the loads from being later combined (as they would
16145 // otherwise be duplicates).
16146 if (!findConsecutiveLoad(LD
, DAG
))
16149 SDValue Increment
=
16150 DAG
.getConstant(IncValue
, dl
, getPointerTy(MF
.getDataLayout()));
16151 Ptr
= DAG
.getNode(ISD::ADD
, dl
, Ptr
.getValueType(), Ptr
, Increment
);
16153 MachineMemOperand
*ExtraMMO
=
16154 MF
.getMachineMemOperand(LD
->getMemOperand(),
16155 1, 2*MemVT
.getStoreSize()-1);
16156 SDValue ExtraLoadOps
[] = { Chain
, LDXIntID
, Ptr
};
16157 SDValue ExtraLoad
=
16158 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, dl
,
16159 DAG
.getVTList(PermTy
, MVT::Other
),
16160 ExtraLoadOps
, LDTy
, ExtraMMO
);
16162 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
16163 BaseLoad
.getValue(1), ExtraLoad
.getValue(1));
16165 // Because vperm has a big-endian bias, we must reverse the order
16166 // of the input vectors and complement the permute control vector
16167 // when generating little endian code. We have already handled the
16168 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16169 // and ExtraLoad here.
16171 if (isLittleEndian
)
16172 Perm
= BuildIntrinsicOp(IntrPerm
,
16173 ExtraLoad
, BaseLoad
, PermCntl
, DAG
, dl
);
16175 Perm
= BuildIntrinsicOp(IntrPerm
,
16176 BaseLoad
, ExtraLoad
, PermCntl
, DAG
, dl
);
16179 Perm
= Subtarget
.hasAltivec()
16180 ? DAG
.getNode(ISD::BITCAST
, dl
, VT
, Perm
)
16181 : DAG
.getNode(ISD::FP_ROUND
, dl
, VT
, Perm
,
16182 DAG
.getTargetConstant(1, dl
, MVT::i64
));
16183 // second argument is 1 because this rounding
16184 // is always exact.
16186 // The output of the permutation is our loaded result, the TokenFactor is
16188 DCI
.CombineTo(N
, Perm
, TF
);
16189 return SDValue(N
, 0);
16193 case ISD::INTRINSIC_WO_CHAIN
: {
16194 bool isLittleEndian
= Subtarget
.isLittleEndian();
16195 unsigned IID
= N
->getConstantOperandVal(0);
16196 Intrinsic::ID Intr
= (isLittleEndian
? Intrinsic::ppc_altivec_lvsr
16197 : Intrinsic::ppc_altivec_lvsl
);
16198 if (IID
== Intr
&& N
->getOperand(1)->getOpcode() == ISD::ADD
) {
16199 SDValue Add
= N
->getOperand(1);
16201 int Bits
= 4 /* 16 byte alignment */;
16203 if (DAG
.MaskedValueIsZero(Add
->getOperand(1),
16204 APInt::getAllOnes(Bits
/* alignment */)
16205 .zext(Add
.getScalarValueSizeInBits()))) {
16206 SDNode
*BasePtr
= Add
->getOperand(0).getNode();
16207 for (SDNode
*U
: BasePtr
->uses()) {
16208 if (U
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
16209 U
->getConstantOperandVal(0) == IID
) {
16210 // We've found another LVSL/LVSR, and this address is an aligned
16211 // multiple of that one. The results will be the same, so use the
16212 // one we've just found instead.
16214 return SDValue(U
, 0);
16219 if (isa
<ConstantSDNode
>(Add
->getOperand(1))) {
16220 SDNode
*BasePtr
= Add
->getOperand(0).getNode();
16221 for (SDNode
*U
: BasePtr
->uses()) {
16222 if (U
->getOpcode() == ISD::ADD
&&
16223 isa
<ConstantSDNode
>(U
->getOperand(1)) &&
16224 (Add
->getConstantOperandVal(1) - U
->getConstantOperandVal(1)) %
16227 SDNode
*OtherAdd
= U
;
16228 for (SDNode
*V
: OtherAdd
->uses()) {
16229 if (V
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
16230 V
->getConstantOperandVal(0) == IID
) {
16231 return SDValue(V
, 0);
16239 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16240 // Expose the vabsduw/h/b opportunity for down stream
16241 if (!DCI
.isAfterLegalizeDAG() && Subtarget
.hasP9Altivec() &&
16242 (IID
== Intrinsic::ppc_altivec_vmaxsw
||
16243 IID
== Intrinsic::ppc_altivec_vmaxsh
||
16244 IID
== Intrinsic::ppc_altivec_vmaxsb
)) {
16245 SDValue V1
= N
->getOperand(1);
16246 SDValue V2
= N
->getOperand(2);
16247 if ((V1
.getSimpleValueType() == MVT::v4i32
||
16248 V1
.getSimpleValueType() == MVT::v8i16
||
16249 V1
.getSimpleValueType() == MVT::v16i8
) &&
16250 V1
.getSimpleValueType() == V2
.getSimpleValueType()) {
16252 if (V1
.getOpcode() == ISD::SUB
&&
16253 ISD::isBuildVectorAllZeros(V1
.getOperand(0).getNode()) &&
16254 V1
.getOperand(1) == V2
) {
16255 return DAG
.getNode(ISD::ABS
, dl
, V2
.getValueType(), V2
);
16258 if (V2
.getOpcode() == ISD::SUB
&&
16259 ISD::isBuildVectorAllZeros(V2
.getOperand(0).getNode()) &&
16260 V2
.getOperand(1) == V1
) {
16261 return DAG
.getNode(ISD::ABS
, dl
, V1
.getValueType(), V1
);
16264 if (V1
.getOpcode() == ISD::SUB
&& V2
.getOpcode() == ISD::SUB
&&
16265 V1
.getOperand(0) == V2
.getOperand(1) &&
16266 V1
.getOperand(1) == V2
.getOperand(0)) {
16267 return DAG
.getNode(ISD::ABS
, dl
, V1
.getValueType(), V1
);
16274 case ISD::INTRINSIC_W_CHAIN
:
16275 switch (N
->getConstantOperandVal(1)) {
16278 case Intrinsic::ppc_altivec_vsum4sbs
:
16279 case Intrinsic::ppc_altivec_vsum4shs
:
16280 case Intrinsic::ppc_altivec_vsum4ubs
: {
16281 // These sum-across intrinsics only have a chain due to the side effect
16282 // that they may set the SAT bit. If we know the SAT bit will not be set
16283 // for some inputs, we can replace any uses of their chain with the
16285 if (BuildVectorSDNode
*BVN
=
16286 dyn_cast
<BuildVectorSDNode
>(N
->getOperand(3))) {
16287 APInt APSplatBits
, APSplatUndef
;
16288 unsigned SplatBitSize
;
16290 bool BVNIsConstantSplat
= BVN
->isConstantSplat(
16291 APSplatBits
, APSplatUndef
, SplatBitSize
, HasAnyUndefs
, 0,
16292 !Subtarget
.isLittleEndian());
16293 // If the constant splat vector is 0, the SAT bit will not be set.
16294 if (BVNIsConstantSplat
&& APSplatBits
== 0)
16295 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 1), N
->getOperand(0));
16299 case Intrinsic::ppc_vsx_lxvw4x
:
16300 case Intrinsic::ppc_vsx_lxvd2x
:
16301 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16302 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16303 if (Subtarget
.needsSwapsForVSXMemOps())
16304 return expandVSXLoadForLE(N
, DCI
);
16308 case ISD::INTRINSIC_VOID
:
16309 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16310 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16311 if (Subtarget
.needsSwapsForVSXMemOps()) {
16312 switch (N
->getConstantOperandVal(1)) {
16315 case Intrinsic::ppc_vsx_stxvw4x
:
16316 case Intrinsic::ppc_vsx_stxvd2x
:
16317 return expandVSXStoreForLE(N
, DCI
);
16322 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16323 // For subtargets without LDBRX, we can still do better than the default
16324 // expansion even for 64-bit BSWAP (LOAD).
16325 bool Is64BitBswapOn64BitTgt
=
16326 Subtarget
.isPPC64() && N
->getValueType(0) == MVT::i64
;
16327 bool IsSingleUseNormalLd
= ISD::isNormalLoad(N
->getOperand(0).getNode()) &&
16328 N
->getOperand(0).hasOneUse();
16329 if (IsSingleUseNormalLd
&&
16330 (N
->getValueType(0) == MVT::i32
|| N
->getValueType(0) == MVT::i16
||
16331 (Subtarget
.hasLDBRX() && Is64BitBswapOn64BitTgt
))) {
16332 SDValue Load
= N
->getOperand(0);
16333 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
16334 // Create the byte-swapping load.
16336 LD
->getChain(), // Chain
16337 LD
->getBasePtr(), // Ptr
16338 DAG
.getValueType(N
->getValueType(0)) // VT
16341 DAG
.getMemIntrinsicNode(PPCISD::LBRX
, dl
,
16342 DAG
.getVTList(N
->getValueType(0) == MVT::i64
?
16343 MVT::i64
: MVT::i32
, MVT::Other
),
16344 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
16346 // If this is an i16 load, insert the truncate.
16347 SDValue ResVal
= BSLoad
;
16348 if (N
->getValueType(0) == MVT::i16
)
16349 ResVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i16
, BSLoad
);
16351 // First, combine the bswap away. This makes the value produced by the
16353 DCI
.CombineTo(N
, ResVal
);
16355 // Next, combine the load away, we give it a bogus result value but a real
16356 // chain result. The result value is dead because the bswap is dead.
16357 DCI
.CombineTo(Load
.getNode(), ResVal
, BSLoad
.getValue(1));
16359 // Return N so it doesn't get rechecked!
16360 return SDValue(N
, 0);
16362 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16363 // before legalization so that the BUILD_PAIR is handled correctly.
16364 if (!DCI
.isBeforeLegalize() || !Is64BitBswapOn64BitTgt
||
16365 !IsSingleUseNormalLd
)
16367 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
->getOperand(0));
16369 // Can't split volatile or atomic loads.
16370 if (!LD
->isSimple())
16372 SDValue BasePtr
= LD
->getBasePtr();
16373 SDValue Lo
= DAG
.getLoad(MVT::i32
, dl
, LD
->getChain(), BasePtr
,
16374 LD
->getPointerInfo(), LD
->getAlign());
16375 Lo
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::i32
, Lo
);
16376 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
16377 DAG
.getIntPtrConstant(4, dl
));
16378 MachineMemOperand
*NewMMO
= DAG
.getMachineFunction().getMachineMemOperand(
16379 LD
->getMemOperand(), 4, 4);
16380 SDValue Hi
= DAG
.getLoad(MVT::i32
, dl
, LD
->getChain(), BasePtr
, NewMMO
);
16381 Hi
= DAG
.getNode(ISD::BSWAP
, dl
, MVT::i32
, Hi
);
16383 if (Subtarget
.isLittleEndian())
16384 Res
= DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Hi
, Lo
);
16386 Res
= DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Lo
, Hi
);
16388 DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
16389 Hi
.getOperand(0).getValue(1), Lo
.getOperand(0).getValue(1));
16390 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, 1), TF
);
16394 // If a VCMP_rec node already exists with exactly the same operands as this
16395 // node, use its result instead of this node (VCMP_rec computes both a CR6
16396 // and a normal output).
16398 if (!N
->getOperand(0).hasOneUse() &&
16399 !N
->getOperand(1).hasOneUse() &&
16400 !N
->getOperand(2).hasOneUse()) {
16402 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16403 SDNode
*VCMPrecNode
= nullptr;
16405 SDNode
*LHSN
= N
->getOperand(0).getNode();
16406 for (SDNode::use_iterator UI
= LHSN
->use_begin(), E
= LHSN
->use_end();
16408 if (UI
->getOpcode() == PPCISD::VCMP_rec
&&
16409 UI
->getOperand(1) == N
->getOperand(1) &&
16410 UI
->getOperand(2) == N
->getOperand(2) &&
16411 UI
->getOperand(0) == N
->getOperand(0)) {
16416 // If there is no VCMP_rec node, or if the flag value has a single use,
16417 // don't transform this.
16418 if (!VCMPrecNode
|| VCMPrecNode
->hasNUsesOfValue(0, 1))
16421 // Look at the (necessarily single) use of the flag value. If it has a
16422 // chain, this transformation is more complex. Note that multiple things
16423 // could use the value result, which we should ignore.
16424 SDNode
*FlagUser
= nullptr;
16425 for (SDNode::use_iterator UI
= VCMPrecNode
->use_begin();
16426 FlagUser
== nullptr; ++UI
) {
16427 assert(UI
!= VCMPrecNode
->use_end() && "Didn't find user!");
16428 SDNode
*User
= *UI
;
16429 for (unsigned i
= 0, e
= User
->getNumOperands(); i
!= e
; ++i
) {
16430 if (User
->getOperand(i
) == SDValue(VCMPrecNode
, 1)) {
16437 // If the user is a MFOCRF instruction, we know this is safe.
16438 // Otherwise we give up for right now.
16439 if (FlagUser
->getOpcode() == PPCISD::MFOCRF
)
16440 return SDValue(VCMPrecNode
, 0);
16444 // If this is a branch on an altivec predicate comparison, lower this so
16445 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16446 // lowering is done pre-legalize, because the legalizer lowers the predicate
16447 // compare down to code that is difficult to reassemble.
16448 // This code also handles branches that depend on the result of a store
16450 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
16451 SDValue LHS
= N
->getOperand(2), RHS
= N
->getOperand(3);
16456 if (!isa
<ConstantSDNode
>(RHS
) || (CC
!= ISD::SETEQ
&& CC
!= ISD::SETNE
))
16459 // Since we are doing this pre-legalize, the RHS can be a constant of
16460 // arbitrary bitwidth which may cause issues when trying to get the value
16461 // from the underlying APInt.
16462 auto RHSAPInt
= RHS
->getAsAPIntVal();
16463 if (!RHSAPInt
.isIntN(64))
16466 unsigned Val
= RHSAPInt
.getZExtValue();
16467 auto isImpossibleCompare
= [&]() {
16468 // If this is a comparison against something other than 0/1, then we know
16469 // that the condition is never/always true.
16470 if (Val
!= 0 && Val
!= 1) {
16471 if (CC
== ISD::SETEQ
) // Cond never true, remove branch.
16472 return N
->getOperand(0);
16473 // Always !=, turn it into an unconditional branch.
16474 return DAG
.getNode(ISD::BR
, dl
, MVT::Other
,
16475 N
->getOperand(0), N
->getOperand(4));
16479 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16480 unsigned StoreWidth
= 0;
16481 if (LHS
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
16482 isStoreConditional(LHS
, StoreWidth
)) {
16483 if (SDValue Impossible
= isImpossibleCompare())
16485 PPC::Predicate CompOpc
;
16491 CompOpc
= CC
== ISD::SETEQ
? PPC::PRED_NE
: PPC::PRED_EQ
;
16493 CompOpc
= CC
== ISD::SETEQ
? PPC::PRED_EQ
: PPC::PRED_NE
;
16495 SDValue Ops
[] = {LHS
.getOperand(0), LHS
.getOperand(2), LHS
.getOperand(3),
16496 DAG
.getConstant(StoreWidth
, dl
, MVT::i32
)};
16497 auto *MemNode
= cast
<MemSDNode
>(LHS
);
16498 SDValue ConstSt
= DAG
.getMemIntrinsicNode(
16499 PPCISD::STORE_COND
, dl
,
16500 DAG
.getVTList(MVT::i32
, MVT::Other
, MVT::Glue
), Ops
,
16501 MemNode
->getMemoryVT(), MemNode
->getMemOperand());
16504 // Unchain the branch from the original store conditional.
16505 if (N
->getOperand(0) == LHS
.getValue(1))
16506 InChain
= LHS
.getOperand(0);
16507 else if (N
->getOperand(0).getOpcode() == ISD::TokenFactor
) {
16508 SmallVector
<SDValue
, 4> InChains
;
16509 SDValue InTF
= N
->getOperand(0);
16510 for (int i
= 0, e
= InTF
.getNumOperands(); i
< e
; i
++)
16511 if (InTF
.getOperand(i
) != LHS
.getValue(1))
16512 InChains
.push_back(InTF
.getOperand(i
));
16513 InChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, InChains
);
16516 return DAG
.getNode(PPCISD::COND_BRANCH
, dl
, MVT::Other
, InChain
,
16517 DAG
.getConstant(CompOpc
, dl
, MVT::i32
),
16518 DAG
.getRegister(PPC::CR0
, MVT::i32
), N
->getOperand(4),
16519 ConstSt
.getValue(2));
16522 if (LHS
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
16523 getVectorCompareInfo(LHS
, CompareOpc
, isDot
, Subtarget
)) {
16524 assert(isDot
&& "Can't compare against a vector result!");
16526 if (SDValue Impossible
= isImpossibleCompare())
16529 bool BranchOnWhenPredTrue
= (CC
== ISD::SETEQ
) ^ (Val
== 0);
16530 // Create the PPCISD altivec 'dot' comparison node.
16532 LHS
.getOperand(2), // LHS of compare
16533 LHS
.getOperand(3), // RHS of compare
16534 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
)
16536 EVT VTs
[] = { LHS
.getOperand(2).getValueType(), MVT::Glue
};
16537 SDValue CompNode
= DAG
.getNode(PPCISD::VCMP_rec
, dl
, VTs
, Ops
);
16539 // Unpack the result based on how the target uses it.
16540 PPC::Predicate CompOpc
;
16541 switch (LHS
.getConstantOperandVal(1)) {
16542 default: // Can't happen, don't crash on invalid number though.
16543 case 0: // Branch on the value of the EQ bit of CR6.
16544 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_EQ
: PPC::PRED_NE
;
16546 case 1: // Branch on the inverted value of the EQ bit of CR6.
16547 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_NE
: PPC::PRED_EQ
;
16549 case 2: // Branch on the value of the LT bit of CR6.
16550 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_LT
: PPC::PRED_GE
;
16552 case 3: // Branch on the inverted value of the LT bit of CR6.
16553 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_GE
: PPC::PRED_LT
;
16557 return DAG
.getNode(PPCISD::COND_BRANCH
, dl
, MVT::Other
, N
->getOperand(0),
16558 DAG
.getConstant(CompOpc
, dl
, MVT::i32
),
16559 DAG
.getRegister(PPC::CR6
, MVT::i32
),
16560 N
->getOperand(4), CompNode
.getValue(1));
16564 case ISD::BUILD_VECTOR
:
16565 return DAGCombineBuildVector(N
, DCI
);
16572 PPCTargetLowering::BuildSDIVPow2(SDNode
*N
, const APInt
&Divisor
,
16574 SmallVectorImpl
<SDNode
*> &Created
) const {
16575 // fold (sdiv X, pow2)
16576 EVT VT
= N
->getValueType(0);
16577 if (VT
== MVT::i64
&& !Subtarget
.isPPC64())
16579 if ((VT
!= MVT::i32
&& VT
!= MVT::i64
) ||
16580 !(Divisor
.isPowerOf2() || Divisor
.isNegatedPowerOf2()))
16584 SDValue N0
= N
->getOperand(0);
16586 bool IsNegPow2
= Divisor
.isNegatedPowerOf2();
16587 unsigned Lg2
= (IsNegPow2
? -Divisor
: Divisor
).countr_zero();
16588 SDValue ShiftAmt
= DAG
.getConstant(Lg2
, DL
, VT
);
16590 SDValue Op
= DAG
.getNode(PPCISD::SRA_ADDZE
, DL
, VT
, N0
, ShiftAmt
);
16591 Created
.push_back(Op
.getNode());
16594 Op
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), Op
);
16595 Created
.push_back(Op
.getNode());
16601 //===----------------------------------------------------------------------===//
16602 // Inline Assembly Support
16603 //===----------------------------------------------------------------------===//
16605 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
16607 const APInt
&DemandedElts
,
16608 const SelectionDAG
&DAG
,
16609 unsigned Depth
) const {
16611 switch (Op
.getOpcode()) {
16613 case PPCISD::LBRX
: {
16614 // lhbrx is known to have the top bits cleared out.
16615 if (cast
<VTSDNode
>(Op
.getOperand(2))->getVT() == MVT::i16
)
16616 Known
.Zero
= 0xFFFF0000;
16619 case ISD::INTRINSIC_WO_CHAIN
: {
16620 switch (Op
.getConstantOperandVal(0)) {
16622 case Intrinsic::ppc_altivec_vcmpbfp_p
:
16623 case Intrinsic::ppc_altivec_vcmpeqfp_p
:
16624 case Intrinsic::ppc_altivec_vcmpequb_p
:
16625 case Intrinsic::ppc_altivec_vcmpequh_p
:
16626 case Intrinsic::ppc_altivec_vcmpequw_p
:
16627 case Intrinsic::ppc_altivec_vcmpequd_p
:
16628 case Intrinsic::ppc_altivec_vcmpequq_p
:
16629 case Intrinsic::ppc_altivec_vcmpgefp_p
:
16630 case Intrinsic::ppc_altivec_vcmpgtfp_p
:
16631 case Intrinsic::ppc_altivec_vcmpgtsb_p
:
16632 case Intrinsic::ppc_altivec_vcmpgtsh_p
:
16633 case Intrinsic::ppc_altivec_vcmpgtsw_p
:
16634 case Intrinsic::ppc_altivec_vcmpgtsd_p
:
16635 case Intrinsic::ppc_altivec_vcmpgtsq_p
:
16636 case Intrinsic::ppc_altivec_vcmpgtub_p
:
16637 case Intrinsic::ppc_altivec_vcmpgtuh_p
:
16638 case Intrinsic::ppc_altivec_vcmpgtuw_p
:
16639 case Intrinsic::ppc_altivec_vcmpgtud_p
:
16640 case Intrinsic::ppc_altivec_vcmpgtuq_p
:
16641 Known
.Zero
= ~1U; // All bits but the low one are known to be zero.
16646 case ISD::INTRINSIC_W_CHAIN
: {
16647 switch (Op
.getConstantOperandVal(1)) {
16650 case Intrinsic::ppc_load2r
:
16651 // Top bits are cleared for load2r (which is the same as lhbrx).
16652 Known
.Zero
= 0xFFFF0000;
16660 Align
PPCTargetLowering::getPrefLoopAlignment(MachineLoop
*ML
) const {
16661 switch (Subtarget
.getCPUDirective()) {
16664 case PPC::DIR_PWR4
:
16665 case PPC::DIR_PWR5
:
16666 case PPC::DIR_PWR5X
:
16667 case PPC::DIR_PWR6
:
16668 case PPC::DIR_PWR6X
:
16669 case PPC::DIR_PWR7
:
16670 case PPC::DIR_PWR8
:
16671 case PPC::DIR_PWR9
:
16672 case PPC::DIR_PWR10
:
16673 case PPC::DIR_PWR11
:
16674 case PPC::DIR_PWR_FUTURE
: {
16678 if (!DisableInnermostLoopAlign32
) {
16679 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16680 // so that we can decrease cache misses and branch-prediction misses.
16681 // Actual alignment of the loop will depend on the hotness check and other
16682 // logic in alignBlocks.
16683 if (ML
->getLoopDepth() > 1 && ML
->getSubLoops().empty())
16687 const PPCInstrInfo
*TII
= Subtarget
.getInstrInfo();
16689 // For small loops (between 5 and 8 instructions), align to a 32-byte
16690 // boundary so that the entire loop fits in one instruction-cache line.
16691 uint64_t LoopSize
= 0;
16692 for (auto I
= ML
->block_begin(), IE
= ML
->block_end(); I
!= IE
; ++I
)
16693 for (const MachineInstr
&J
: **I
) {
16694 LoopSize
+= TII
->getInstSizeInBytes(J
);
16699 if (LoopSize
> 16 && LoopSize
<= 32)
16706 return TargetLowering::getPrefLoopAlignment(ML
);
16709 /// getConstraintType - Given a constraint, return the type of
16710 /// constraint it is for this target.
16711 PPCTargetLowering::ConstraintType
16712 PPCTargetLowering::getConstraintType(StringRef Constraint
) const {
16713 if (Constraint
.size() == 1) {
16714 switch (Constraint
[0]) {
16722 return C_RegisterClass
;
16724 // FIXME: While Z does indicate a memory constraint, it specifically
16725 // indicates an r+r address (used in conjunction with the 'y' modifier
16726 // in the replacement string). Currently, we're forcing the base
16727 // register to be r0 in the asm printer (which is interpreted as zero)
16728 // and forming the complete address in the second register. This is
16732 } else if (Constraint
== "wc") { // individual CR bits.
16733 return C_RegisterClass
;
16734 } else if (Constraint
== "wa" || Constraint
== "wd" ||
16735 Constraint
== "wf" || Constraint
== "ws" ||
16736 Constraint
== "wi" || Constraint
== "ww") {
16737 return C_RegisterClass
; // VSX registers.
16739 return TargetLowering::getConstraintType(Constraint
);
16742 /// Examine constraint type and operand type and determine a weight value.
16743 /// This object must already have been set up with the operand type
16744 /// and the current alternative constraint selected.
16745 TargetLowering::ConstraintWeight
16746 PPCTargetLowering::getSingleConstraintMatchWeight(
16747 AsmOperandInfo
&info
, const char *constraint
) const {
16748 ConstraintWeight weight
= CW_Invalid
;
16749 Value
*CallOperandVal
= info
.CallOperandVal
;
16750 // If we don't have a value, we can't do a match,
16751 // but allow it at the lowest weight.
16752 if (!CallOperandVal
)
16754 Type
*type
= CallOperandVal
->getType();
16756 // Look at the constraint type.
16757 if (StringRef(constraint
) == "wc" && type
->isIntegerTy(1))
16758 return CW_Register
; // an individual CR bit.
16759 else if ((StringRef(constraint
) == "wa" ||
16760 StringRef(constraint
) == "wd" ||
16761 StringRef(constraint
) == "wf") &&
16762 type
->isVectorTy())
16763 return CW_Register
;
16764 else if (StringRef(constraint
) == "wi" && type
->isIntegerTy(64))
16765 return CW_Register
; // just hold 64-bit integers data.
16766 else if (StringRef(constraint
) == "ws" && type
->isDoubleTy())
16767 return CW_Register
;
16768 else if (StringRef(constraint
) == "ww" && type
->isFloatTy())
16769 return CW_Register
;
16771 switch (*constraint
) {
16773 weight
= TargetLowering::getSingleConstraintMatchWeight(info
, constraint
);
16776 if (type
->isIntegerTy())
16777 weight
= CW_Register
;
16780 if (type
->isFloatTy())
16781 weight
= CW_Register
;
16784 if (type
->isDoubleTy())
16785 weight
= CW_Register
;
16788 if (type
->isVectorTy())
16789 weight
= CW_Register
;
16792 weight
= CW_Register
;
16795 weight
= CW_Memory
;
16801 std::pair
<unsigned, const TargetRegisterClass
*>
16802 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
16803 StringRef Constraint
,
16805 if (Constraint
.size() == 1) {
16806 // GCC RS6000 Constraint Letters
16807 switch (Constraint
[0]) {
16808 case 'b': // R1-R31
16809 if (VT
== MVT::i64
&& Subtarget
.isPPC64())
16810 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass
);
16811 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass
);
16812 case 'r': // R0-R31
16813 if (VT
== MVT::i64
&& Subtarget
.isPPC64())
16814 return std::make_pair(0U, &PPC::G8RCRegClass
);
16815 return std::make_pair(0U, &PPC::GPRCRegClass
);
16816 // 'd' and 'f' constraints are both defined to be "the floating point
16817 // registers", where one is for 32-bit and the other for 64-bit. We don't
16818 // really care overly much here so just give them all the same reg classes.
16821 if (Subtarget
.hasSPE()) {
16822 if (VT
== MVT::f32
|| VT
== MVT::i32
)
16823 return std::make_pair(0U, &PPC::GPRCRegClass
);
16824 if (VT
== MVT::f64
|| VT
== MVT::i64
)
16825 return std::make_pair(0U, &PPC::SPERCRegClass
);
16827 if (VT
== MVT::f32
|| VT
== MVT::i32
)
16828 return std::make_pair(0U, &PPC::F4RCRegClass
);
16829 if (VT
== MVT::f64
|| VT
== MVT::i64
)
16830 return std::make_pair(0U, &PPC::F8RCRegClass
);
16834 if (Subtarget
.hasAltivec() && VT
.isVector())
16835 return std::make_pair(0U, &PPC::VRRCRegClass
);
16836 else if (Subtarget
.hasVSX())
16837 // Scalars in Altivec registers only make sense with VSX.
16838 return std::make_pair(0U, &PPC::VFRCRegClass
);
16841 return std::make_pair(0U, &PPC::CRRCRegClass
);
16843 } else if (Constraint
== "wc" && Subtarget
.useCRBits()) {
16844 // An individual CR bit.
16845 return std::make_pair(0U, &PPC::CRBITRCRegClass
);
16846 } else if ((Constraint
== "wa" || Constraint
== "wd" ||
16847 Constraint
== "wf" || Constraint
== "wi") &&
16848 Subtarget
.hasVSX()) {
16849 // A VSX register for either a scalar (FP) or vector. There is no
16850 // support for single precision scalars on subtargets prior to Power8.
16852 return std::make_pair(0U, &PPC::VSRCRegClass
);
16853 if (VT
== MVT::f32
&& Subtarget
.hasP8Vector())
16854 return std::make_pair(0U, &PPC::VSSRCRegClass
);
16855 return std::make_pair(0U, &PPC::VSFRCRegClass
);
16856 } else if ((Constraint
== "ws" || Constraint
== "ww") && Subtarget
.hasVSX()) {
16857 if (VT
== MVT::f32
&& Subtarget
.hasP8Vector())
16858 return std::make_pair(0U, &PPC::VSSRCRegClass
);
16860 return std::make_pair(0U, &PPC::VSFRCRegClass
);
16861 } else if (Constraint
== "lr") {
16862 if (VT
== MVT::i64
)
16863 return std::make_pair(0U, &PPC::LR8RCRegClass
);
16865 return std::make_pair(0U, &PPC::LRRCRegClass
);
16868 // Handle special cases of physical registers that are not properly handled
16869 // by the base class.
16870 if (Constraint
[0] == '{' && Constraint
[Constraint
.size() - 1] == '}') {
16871 // If we name a VSX register, we can't defer to the base class because it
16872 // will not recognize the correct register (their names will be VSL{0-31}
16873 // and V{0-31} so they won't match). So we match them here.
16874 if (Constraint
.size() > 3 && Constraint
[1] == 'v' && Constraint
[2] == 's') {
16875 int VSNum
= atoi(Constraint
.data() + 3);
16876 assert(VSNum
>= 0 && VSNum
<= 63 &&
16877 "Attempted to access a vsr out of range");
16879 return std::make_pair(PPC::VSL0
+ VSNum
, &PPC::VSRCRegClass
);
16880 return std::make_pair(PPC::V0
+ VSNum
- 32, &PPC::VSRCRegClass
);
16883 // For float registers, we can't defer to the base class as it will match
16884 // the SPILLTOVSRRC class.
16885 if (Constraint
.size() > 3 && Constraint
[1] == 'f') {
16886 int RegNum
= atoi(Constraint
.data() + 2);
16887 if (RegNum
> 31 || RegNum
< 0)
16888 report_fatal_error("Invalid floating point register number");
16889 if (VT
== MVT::f32
|| VT
== MVT::i32
)
16890 return Subtarget
.hasSPE()
16891 ? std::make_pair(PPC::R0
+ RegNum
, &PPC::GPRCRegClass
)
16892 : std::make_pair(PPC::F0
+ RegNum
, &PPC::F4RCRegClass
);
16893 if (VT
== MVT::f64
|| VT
== MVT::i64
)
16894 return Subtarget
.hasSPE()
16895 ? std::make_pair(PPC::S0
+ RegNum
, &PPC::SPERCRegClass
)
16896 : std::make_pair(PPC::F0
+ RegNum
, &PPC::F8RCRegClass
);
16900 std::pair
<unsigned, const TargetRegisterClass
*> R
=
16901 TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
16903 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16904 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16905 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16907 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16908 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16909 if (R
.first
&& VT
== MVT::i64
&& Subtarget
.isPPC64() &&
16910 PPC::GPRCRegClass
.contains(R
.first
))
16911 return std::make_pair(TRI
->getMatchingSuperReg(R
.first
,
16912 PPC::sub_32
, &PPC::G8RCRegClass
),
16913 &PPC::G8RCRegClass
);
16915 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16916 if (!R
.second
&& StringRef("{cc}").equals_insensitive(Constraint
)) {
16917 R
.first
= PPC::CR0
;
16918 R
.second
= &PPC::CRRCRegClass
;
16920 // FIXME: This warning should ideally be emitted in the front end.
16921 const auto &TM
= getTargetMachine();
16922 if (Subtarget
.isAIXABI() && !TM
.getAIXExtendedAltivecABI()) {
16923 if (((R
.first
>= PPC::V20
&& R
.first
<= PPC::V31
) ||
16924 (R
.first
>= PPC::VF20
&& R
.first
<= PPC::VF31
)) &&
16925 (R
.second
== &PPC::VSRCRegClass
|| R
.second
== &PPC::VSFRCRegClass
))
16926 errs() << "warning: vector registers 20 to 32 are reserved in the "
16927 "default AIX AltiVec ABI and cannot be used\n";
16933 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16934 /// vector. If it is invalid, don't add anything to Ops.
16935 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
16936 StringRef Constraint
,
16937 std::vector
<SDValue
> &Ops
,
16938 SelectionDAG
&DAG
) const {
16941 // Only support length 1 constraints.
16942 if (Constraint
.size() > 1)
16945 char Letter
= Constraint
[0];
16956 ConstantSDNode
*CST
= dyn_cast
<ConstantSDNode
>(Op
);
16957 if (!CST
) return; // Must be an immediate to match.
16959 int64_t Value
= CST
->getSExtValue();
16960 EVT TCVT
= MVT::i64
; // All constants taken to be 64 bits so that negative
16961 // numbers are printed as such.
16963 default: llvm_unreachable("Unknown constraint letter!");
16964 case 'I': // "I" is a signed 16-bit constant.
16965 if (isInt
<16>(Value
))
16966 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
16968 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16969 if (isShiftedUInt
<16, 16>(Value
))
16970 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
16972 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16973 if (isShiftedInt
<16, 16>(Value
))
16974 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
16976 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16977 if (isUInt
<16>(Value
))
16978 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
16980 case 'M': // "M" is a constant that is greater than 31.
16982 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
16984 case 'N': // "N" is a positive constant that is an exact power of two.
16985 if (Value
> 0 && isPowerOf2_64(Value
))
16986 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
16988 case 'O': // "O" is the constant zero.
16990 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
16992 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16993 if (isInt
<16>(-Value
))
16994 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
17001 if (Result
.getNode()) {
17002 Ops
.push_back(Result
);
17006 // Handle standard constraint letters.
17007 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
17010 void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst
&I
,
17011 SmallVectorImpl
<SDValue
> &Ops
,
17012 SelectionDAG
&DAG
) const {
17013 if (I
.getNumOperands() <= 1)
17015 if (!isa
<ConstantSDNode
>(Ops
[1].getNode()))
17017 auto IntrinsicID
= Ops
[1].getNode()->getAsZExtVal();
17018 if (IntrinsicID
!= Intrinsic::ppc_tdw
&& IntrinsicID
!= Intrinsic::ppc_tw
&&
17019 IntrinsicID
!= Intrinsic::ppc_trapd
&& IntrinsicID
!= Intrinsic::ppc_trap
)
17022 if (MDNode
*MDN
= I
.getMetadata(LLVMContext::MD_annotation
))
17023 Ops
.push_back(DAG
.getMDNode(MDN
));
17026 // isLegalAddressingMode - Return true if the addressing mode represented
17027 // by AM is legal for this target, for a load/store of the specified type.
17028 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
17029 const AddrMode
&AM
, Type
*Ty
,
17031 Instruction
*I
) const {
17032 // Vector type r+i form is supported since power9 as DQ form. We don't check
17033 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17034 // imm form is preferred and the offset can be adjusted to use imm form later
17035 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17036 // max offset to check legal addressing mode, we should be a little aggressive
17037 // to contain other offsets for that LSRUse.
17038 if (Ty
->isVectorTy() && AM
.BaseOffs
!= 0 && !Subtarget
.hasP9Vector())
17041 // PPC allows a sign-extended 16-bit immediate field.
17042 if (AM
.BaseOffs
<= -(1LL << 16) || AM
.BaseOffs
>= (1LL << 16)-1)
17045 // No global is ever allowed as a base.
17049 // PPC only support r+r,
17050 switch (AM
.Scale
) {
17051 case 0: // "r+i" or just "i", depending on HasBaseReg.
17054 if (AM
.HasBaseReg
&& AM
.BaseOffs
) // "r+r+i" is not allowed.
17056 // Otherwise we have r+r or r+i.
17059 if (AM
.HasBaseReg
|| AM
.BaseOffs
) // 2*r+r or 2*r+i is not allowed.
17061 // Allow 2*r as r+r.
17064 // No other scales are supported.
17071 SDValue
PPCTargetLowering::LowerRETURNADDR(SDValue Op
,
17072 SelectionDAG
&DAG
) const {
17073 MachineFunction
&MF
= DAG
.getMachineFunction();
17074 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
17075 MFI
.setReturnAddressIsTaken(true);
17077 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
17081 unsigned Depth
= Op
.getConstantOperandVal(0);
17083 // Make sure the function does not optimize away the store of the RA to
17085 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
17086 FuncInfo
->setLRStoreRequired();
17087 bool isPPC64
= Subtarget
.isPPC64();
17088 auto PtrVT
= getPointerTy(MF
.getDataLayout());
17091 // The link register (return address) is saved in the caller's frame
17092 // not the callee's stack frame. So we must get the caller's frame
17093 // address and load the return address at the LR offset from there.
17094 SDValue FrameAddr
=
17095 DAG
.getLoad(Op
.getValueType(), dl
, DAG
.getEntryNode(),
17096 LowerFRAMEADDR(Op
, DAG
), MachinePointerInfo());
17098 DAG
.getConstant(Subtarget
.getFrameLowering()->getReturnSaveOffset(), dl
,
17099 isPPC64
? MVT::i64
: MVT::i32
);
17100 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(),
17101 DAG
.getNode(ISD::ADD
, dl
, PtrVT
, FrameAddr
, Offset
),
17102 MachinePointerInfo());
17105 // Just load the return address off the stack.
17106 SDValue RetAddrFI
= getReturnAddrFrameIndex(DAG
);
17107 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), RetAddrFI
,
17108 MachinePointerInfo());
17111 SDValue
PPCTargetLowering::LowerFRAMEADDR(SDValue Op
,
17112 SelectionDAG
&DAG
) const {
17114 unsigned Depth
= Op
.getConstantOperandVal(0);
17116 MachineFunction
&MF
= DAG
.getMachineFunction();
17117 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
17118 MFI
.setFrameAddressIsTaken(true);
17120 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
17121 bool isPPC64
= PtrVT
== MVT::i64
;
17123 // Naked functions never have a frame pointer, and so we use r1. For all
17124 // other functions, this decision must be delayed until during PEI.
17126 if (MF
.getFunction().hasFnAttribute(Attribute::Naked
))
17127 FrameReg
= isPPC64
? PPC::X1
: PPC::R1
;
17129 FrameReg
= isPPC64
? PPC::FP8
: PPC::FP
;
17131 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, FrameReg
,
17134 FrameAddr
= DAG
.getLoad(Op
.getValueType(), dl
, DAG
.getEntryNode(),
17135 FrameAddr
, MachinePointerInfo());
17139 // FIXME? Maybe this could be a TableGen attribute on some registers and
17140 // this table could be generated automatically from RegInfo.
17141 Register
PPCTargetLowering::getRegisterByName(const char* RegName
, LLT VT
,
17142 const MachineFunction
&MF
) const {
17143 bool isPPC64
= Subtarget
.isPPC64();
17145 bool is64Bit
= isPPC64
&& VT
== LLT::scalar(64);
17146 if (!is64Bit
&& VT
!= LLT::scalar(32))
17147 report_fatal_error("Invalid register global variable type");
17149 Register Reg
= StringSwitch
<Register
>(RegName
)
17150 .Case("r1", is64Bit
? PPC::X1
: PPC::R1
)
17151 .Case("r2", isPPC64
? Register() : PPC::R2
)
17152 .Case("r13", (is64Bit
? PPC::X13
: PPC::R13
))
17153 .Default(Register());
17157 report_fatal_error("Invalid register name global variable");
17160 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA
) const {
17161 // 32-bit SVR4 ABI access everything as got-indirect.
17162 if (Subtarget
.is32BitELFABI())
17165 // AIX accesses everything indirectly through the TOC, which is similar to
17167 if (Subtarget
.isAIXABI())
17170 CodeModel::Model CModel
= getTargetMachine().getCodeModel();
17171 // If it is small or large code model, module locals are accessed
17172 // indirectly by loading their address from .toc/.got.
17173 if (CModel
== CodeModel::Small
|| CModel
== CodeModel::Large
)
17176 // JumpTable and BlockAddress are accessed as got-indirect.
17177 if (isa
<JumpTableSDNode
>(GA
) || isa
<BlockAddressSDNode
>(GA
))
17180 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(GA
))
17181 return Subtarget
.isGVIndirectSymbol(G
->getGlobal());
17187 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
17188 // The PowerPC target isn't yet aware of offsets.
17192 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
17194 MachineFunction
&MF
,
17195 unsigned Intrinsic
) const {
17196 switch (Intrinsic
) {
17197 case Intrinsic::ppc_atomicrmw_xchg_i128
:
17198 case Intrinsic::ppc_atomicrmw_add_i128
:
17199 case Intrinsic::ppc_atomicrmw_sub_i128
:
17200 case Intrinsic::ppc_atomicrmw_nand_i128
:
17201 case Intrinsic::ppc_atomicrmw_and_i128
:
17202 case Intrinsic::ppc_atomicrmw_or_i128
:
17203 case Intrinsic::ppc_atomicrmw_xor_i128
:
17204 case Intrinsic::ppc_cmpxchg_i128
:
17205 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
17206 Info
.memVT
= MVT::i128
;
17207 Info
.ptrVal
= I
.getArgOperand(0);
17209 Info
.align
= Align(16);
17210 Info
.flags
= MachineMemOperand::MOLoad
| MachineMemOperand::MOStore
|
17211 MachineMemOperand::MOVolatile
;
17213 case Intrinsic::ppc_atomic_load_i128
:
17214 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
17215 Info
.memVT
= MVT::i128
;
17216 Info
.ptrVal
= I
.getArgOperand(0);
17218 Info
.align
= Align(16);
17219 Info
.flags
= MachineMemOperand::MOLoad
| MachineMemOperand::MOVolatile
;
17221 case Intrinsic::ppc_atomic_store_i128
:
17222 Info
.opc
= ISD::INTRINSIC_VOID
;
17223 Info
.memVT
= MVT::i128
;
17224 Info
.ptrVal
= I
.getArgOperand(2);
17226 Info
.align
= Align(16);
17227 Info
.flags
= MachineMemOperand::MOStore
| MachineMemOperand::MOVolatile
;
17229 case Intrinsic::ppc_altivec_lvx
:
17230 case Intrinsic::ppc_altivec_lvxl
:
17231 case Intrinsic::ppc_altivec_lvebx
:
17232 case Intrinsic::ppc_altivec_lvehx
:
17233 case Intrinsic::ppc_altivec_lvewx
:
17234 case Intrinsic::ppc_vsx_lxvd2x
:
17235 case Intrinsic::ppc_vsx_lxvw4x
:
17236 case Intrinsic::ppc_vsx_lxvd2x_be
:
17237 case Intrinsic::ppc_vsx_lxvw4x_be
:
17238 case Intrinsic::ppc_vsx_lxvl
:
17239 case Intrinsic::ppc_vsx_lxvll
: {
17241 switch (Intrinsic
) {
17242 case Intrinsic::ppc_altivec_lvebx
:
17245 case Intrinsic::ppc_altivec_lvehx
:
17248 case Intrinsic::ppc_altivec_lvewx
:
17251 case Intrinsic::ppc_vsx_lxvd2x
:
17252 case Intrinsic::ppc_vsx_lxvd2x_be
:
17260 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
17262 Info
.ptrVal
= I
.getArgOperand(0);
17263 Info
.offset
= -VT
.getStoreSize()+1;
17264 Info
.size
= 2*VT
.getStoreSize()-1;
17265 Info
.align
= Align(1);
17266 Info
.flags
= MachineMemOperand::MOLoad
;
17269 case Intrinsic::ppc_altivec_stvx
:
17270 case Intrinsic::ppc_altivec_stvxl
:
17271 case Intrinsic::ppc_altivec_stvebx
:
17272 case Intrinsic::ppc_altivec_stvehx
:
17273 case Intrinsic::ppc_altivec_stvewx
:
17274 case Intrinsic::ppc_vsx_stxvd2x
:
17275 case Intrinsic::ppc_vsx_stxvw4x
:
17276 case Intrinsic::ppc_vsx_stxvd2x_be
:
17277 case Intrinsic::ppc_vsx_stxvw4x_be
:
17278 case Intrinsic::ppc_vsx_stxvl
:
17279 case Intrinsic::ppc_vsx_stxvll
: {
17281 switch (Intrinsic
) {
17282 case Intrinsic::ppc_altivec_stvebx
:
17285 case Intrinsic::ppc_altivec_stvehx
:
17288 case Intrinsic::ppc_altivec_stvewx
:
17291 case Intrinsic::ppc_vsx_stxvd2x
:
17292 case Intrinsic::ppc_vsx_stxvd2x_be
:
17300 Info
.opc
= ISD::INTRINSIC_VOID
;
17302 Info
.ptrVal
= I
.getArgOperand(1);
17303 Info
.offset
= -VT
.getStoreSize()+1;
17304 Info
.size
= 2*VT
.getStoreSize()-1;
17305 Info
.align
= Align(1);
17306 Info
.flags
= MachineMemOperand::MOStore
;
17309 case Intrinsic::ppc_stdcx
:
17310 case Intrinsic::ppc_stwcx
:
17311 case Intrinsic::ppc_sthcx
:
17312 case Intrinsic::ppc_stbcx
: {
17314 auto Alignment
= Align(8);
17315 switch (Intrinsic
) {
17316 case Intrinsic::ppc_stdcx
:
17319 case Intrinsic::ppc_stwcx
:
17321 Alignment
= Align(4);
17323 case Intrinsic::ppc_sthcx
:
17325 Alignment
= Align(2);
17327 case Intrinsic::ppc_stbcx
:
17329 Alignment
= Align(1);
17332 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
17334 Info
.ptrVal
= I
.getArgOperand(0);
17336 Info
.align
= Alignment
;
17337 Info
.flags
= MachineMemOperand::MOStore
| MachineMemOperand::MOVolatile
;
17347 /// It returns EVT::Other if the type should be determined using generic
17348 /// target-independent logic.
17349 EVT
PPCTargetLowering::getOptimalMemOpType(
17350 const MemOp
&Op
, const AttributeList
&FuncAttributes
) const {
17351 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None
) {
17352 // We should use Altivec/VSX loads and stores when available. For unaligned
17353 // addresses, unaligned VSX loads are only fast starting with the P8.
17354 if (Subtarget
.hasAltivec() && Op
.size() >= 16) {
17355 if (Op
.isMemset() && Subtarget
.hasVSX()) {
17356 uint64_t TailSize
= Op
.size() % 16;
17357 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17358 // element if vector element type matches tail store. For tail size
17359 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17360 if (TailSize
> 2 && TailSize
<= 4) {
17365 if (Op
.isAligned(Align(16)) || Subtarget
.hasP8Vector())
17370 if (Subtarget
.isPPC64()) {
17377 /// Returns true if it is beneficial to convert a load of a constant
17378 /// to just the constant itself.
17379 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt
&Imm
,
17381 assert(Ty
->isIntegerTy());
17383 unsigned BitSize
= Ty
->getPrimitiveSizeInBits();
17384 return !(BitSize
== 0 || BitSize
> 64);
17387 bool PPCTargetLowering::isTruncateFree(Type
*Ty1
, Type
*Ty2
) const {
17388 if (!Ty1
->isIntegerTy() || !Ty2
->isIntegerTy())
17390 unsigned NumBits1
= Ty1
->getPrimitiveSizeInBits();
17391 unsigned NumBits2
= Ty2
->getPrimitiveSizeInBits();
17392 return NumBits1
== 64 && NumBits2
== 32;
17395 bool PPCTargetLowering::isTruncateFree(EVT VT1
, EVT VT2
) const {
17396 if (!VT1
.isInteger() || !VT2
.isInteger())
17398 unsigned NumBits1
= VT1
.getSizeInBits();
17399 unsigned NumBits2
= VT2
.getSizeInBits();
17400 return NumBits1
== 64 && NumBits2
== 32;
17403 bool PPCTargetLowering::isZExtFree(SDValue Val
, EVT VT2
) const {
17404 // Generally speaking, zexts are not free, but they are free when they can be
17405 // folded with other operations.
17406 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Val
)) {
17407 EVT MemVT
= LD
->getMemoryVT();
17408 if ((MemVT
== MVT::i1
|| MemVT
== MVT::i8
|| MemVT
== MVT::i16
||
17409 (Subtarget
.isPPC64() && MemVT
== MVT::i32
)) &&
17410 (LD
->getExtensionType() == ISD::NON_EXTLOAD
||
17411 LD
->getExtensionType() == ISD::ZEXTLOAD
))
17415 // FIXME: Add other cases...
17416 // - 32-bit shifts with a zext to i64
17417 // - zext after ctlz, bswap, etc.
17418 // - zext after and by a constant mask
17420 return TargetLowering::isZExtFree(Val
, VT2
);
17423 bool PPCTargetLowering::isFPExtFree(EVT DestVT
, EVT SrcVT
) const {
17424 assert(DestVT
.isFloatingPoint() && SrcVT
.isFloatingPoint() &&
17425 "invalid fpext types");
17426 // Extending to float128 is not free.
17427 if (DestVT
== MVT::f128
)
17432 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
17433 return isInt
<16>(Imm
) || isUInt
<16>(Imm
);
17436 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
17437 return isInt
<16>(Imm
) || isUInt
<16>(Imm
);
17440 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT
, unsigned, Align
,
17441 MachineMemOperand::Flags
,
17442 unsigned *Fast
) const {
17443 if (DisablePPCUnaligned
)
17446 // PowerPC supports unaligned memory access for simple non-vector types.
17447 // Although accessing unaligned addresses is not as efficient as accessing
17448 // aligned addresses, it is generally more efficient than manual expansion,
17449 // and generally only traps for software emulation when crossing page
17452 if (!VT
.isSimple())
17455 if (VT
.isFloatingPoint() && !VT
.isVector() &&
17456 !Subtarget
.allowsUnalignedFPAccess())
17459 if (VT
.getSimpleVT().isVector()) {
17460 if (Subtarget
.hasVSX()) {
17461 if (VT
!= MVT::v2f64
&& VT
!= MVT::v2i64
&&
17462 VT
!= MVT::v4f32
&& VT
!= MVT::v4i32
)
17469 if (VT
== MVT::ppcf128
)
17478 bool PPCTargetLowering::decomposeMulByConstant(LLVMContext
&Context
, EVT VT
,
17480 // Check integral scalar types.
17481 if (!VT
.isScalarInteger())
17483 if (auto *ConstNode
= dyn_cast
<ConstantSDNode
>(C
.getNode())) {
17484 if (!ConstNode
->getAPIntValue().isSignedIntN(64))
17486 // This transformation will generate >= 2 operations. But the following
17487 // cases will generate <= 2 instructions during ISEL. So exclude them.
17488 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17489 // HW instruction, ie. MULLI
17490 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17491 // instruction is needed than case 1, ie. MULLI and RLDICR
17492 int64_t Imm
= ConstNode
->getSExtValue();
17493 unsigned Shift
= llvm::countr_zero
<uint64_t>(Imm
);
17495 if (isInt
<16>(Imm
))
17497 uint64_t UImm
= static_cast<uint64_t>(Imm
);
17498 if (isPowerOf2_64(UImm
+ 1) || isPowerOf2_64(UImm
- 1) ||
17499 isPowerOf2_64(1 - UImm
) || isPowerOf2_64(-1 - UImm
))
17505 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction
&MF
,
17507 return isFMAFasterThanFMulAndFAdd(
17508 MF
.getFunction(), VT
.getTypeForEVT(MF
.getFunction().getContext()));
17511 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function
&F
,
17513 if (Subtarget
.hasSPE() || Subtarget
.useSoftFloat())
17515 switch (Ty
->getScalarType()->getTypeID()) {
17516 case Type::FloatTyID
:
17517 case Type::DoubleTyID
:
17519 case Type::FP128TyID
:
17520 return Subtarget
.hasP9Vector();
17526 // FIXME: add more patterns which are not profitable to hoist.
17527 bool PPCTargetLowering::isProfitableToHoist(Instruction
*I
) const {
17528 if (!I
->hasOneUse())
17531 Instruction
*User
= I
->user_back();
17532 assert(User
&& "A single use instruction with no uses.");
17534 switch (I
->getOpcode()) {
17535 case Instruction::FMul
: {
17536 // Don't break FMA, PowerPC prefers FMA.
17537 if (User
->getOpcode() != Instruction::FSub
&&
17538 User
->getOpcode() != Instruction::FAdd
)
17541 const TargetOptions
&Options
= getTargetMachine().Options
;
17542 const Function
*F
= I
->getFunction();
17543 const DataLayout
&DL
= F
->getDataLayout();
17544 Type
*Ty
= User
->getOperand(0)->getType();
17547 isFMAFasterThanFMulAndFAdd(*F
, Ty
) &&
17548 isOperationLegalOrCustom(ISD::FMA
, getValueType(DL
, Ty
)) &&
17549 (Options
.AllowFPOpFusion
== FPOpFusion::Fast
|| Options
.UnsafeFPMath
));
17551 case Instruction::Load
: {
17552 // Don't break "store (load float*)" pattern, this pattern will be combined
17553 // to "store (load int32)" in later InstCombine pass. See function
17554 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17555 // cycles than loading a 32 bit integer.
17556 LoadInst
*LI
= cast
<LoadInst
>(I
);
17557 // For the loads that combineLoadToOperationType does nothing, like
17558 // ordered load, it should be profitable to hoist them.
17559 // For swifterror load, it can only be used for pointer to pointer type, so
17560 // later type check should get rid of this case.
17561 if (!LI
->isUnordered())
17564 if (User
->getOpcode() != Instruction::Store
)
17567 if (I
->getType()->getTypeID() != Type::FloatTyID
)
17579 PPCTargetLowering::getScratchRegisters(CallingConv::ID
) const {
17580 // LR is a callee-save register, but we must treat it as clobbered by any call
17581 // site. Hence we include LR in the scratch registers, which are in turn added
17582 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17583 // to CTR, which is used by any indirect call.
17584 static const MCPhysReg ScratchRegs
[] = {
17585 PPC::X12
, PPC::LR8
, PPC::CTR8
, 0
17588 return ScratchRegs
;
17591 Register
PPCTargetLowering::getExceptionPointerRegister(
17592 const Constant
*PersonalityFn
) const {
17593 return Subtarget
.isPPC64() ? PPC::X3
: PPC::R3
;
17596 Register
PPCTargetLowering::getExceptionSelectorRegister(
17597 const Constant
*PersonalityFn
) const {
17598 return Subtarget
.isPPC64() ? PPC::X4
: PPC::R4
;
17602 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
17603 EVT VT
, unsigned DefinedValues
) const {
17604 if (VT
== MVT::v2i64
)
17605 return Subtarget
.hasDirectMove(); // Don't need stack ops with direct moves
17607 if (Subtarget
.hasVSX())
17610 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT
, DefinedValues
);
17613 Sched::Preference
PPCTargetLowering::getSchedulingPreference(SDNode
*N
) const {
17614 if (DisableILPPref
|| Subtarget
.enableMachineScheduler())
17615 return TargetLowering::getSchedulingPreference(N
);
17620 // Create a fast isel object.
17622 PPCTargetLowering::createFastISel(FunctionLoweringInfo
&FuncInfo
,
17623 const TargetLibraryInfo
*LibInfo
) const {
17624 return PPC::createFastISel(FuncInfo
, LibInfo
);
17627 // 'Inverted' means the FMA opcode after negating one multiplicand.
17628 // For example, (fma -a b c) = (fnmsub a b c)
17629 static unsigned invertFMAOpcode(unsigned Opc
) {
17632 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17634 return PPCISD::FNMSUB
;
17635 case PPCISD::FNMSUB
:
17640 SDValue
PPCTargetLowering::getNegatedExpression(SDValue Op
, SelectionDAG
&DAG
,
17641 bool LegalOps
, bool OptForSize
,
17642 NegatibleCost
&Cost
,
17643 unsigned Depth
) const {
17644 if (Depth
> SelectionDAG::MaxRecursionDepth
)
17647 unsigned Opc
= Op
.getOpcode();
17648 EVT VT
= Op
.getValueType();
17649 SDNodeFlags Flags
= Op
.getNode()->getFlags();
17652 case PPCISD::FNMSUB
:
17653 if (!Op
.hasOneUse() || !isTypeLegal(VT
))
17656 const TargetOptions
&Options
= getTargetMachine().Options
;
17657 SDValue N0
= Op
.getOperand(0);
17658 SDValue N1
= Op
.getOperand(1);
17659 SDValue N2
= Op
.getOperand(2);
17662 NegatibleCost N2Cost
= NegatibleCost::Expensive
;
17664 getNegatedExpression(N2
, DAG
, LegalOps
, OptForSize
, N2Cost
, Depth
+ 1);
17669 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17670 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17671 // These transformations may change sign of zeroes. For example,
17672 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17673 if (Flags
.hasNoSignedZeros() || Options
.NoSignedZerosFPMath
) {
17674 // Try and choose the cheaper one to negate.
17675 NegatibleCost N0Cost
= NegatibleCost::Expensive
;
17676 SDValue NegN0
= getNegatedExpression(N0
, DAG
, LegalOps
, OptForSize
,
17677 N0Cost
, Depth
+ 1);
17679 NegatibleCost N1Cost
= NegatibleCost::Expensive
;
17680 SDValue NegN1
= getNegatedExpression(N1
, DAG
, LegalOps
, OptForSize
,
17681 N1Cost
, Depth
+ 1);
17683 if (NegN0
&& N0Cost
<= N1Cost
) {
17684 Cost
= std::min(N0Cost
, N2Cost
);
17685 return DAG
.getNode(Opc
, Loc
, VT
, NegN0
, N1
, NegN2
, Flags
);
17686 } else if (NegN1
) {
17687 Cost
= std::min(N1Cost
, N2Cost
);
17688 return DAG
.getNode(Opc
, Loc
, VT
, N0
, NegN1
, NegN2
, Flags
);
17692 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17693 if (isOperationLegal(ISD::FMA
, VT
)) {
17695 return DAG
.getNode(ISD::FMA
, Loc
, VT
, N0
, N1
, NegN2
, Flags
);
17701 return TargetLowering::getNegatedExpression(Op
, DAG
, LegalOps
, OptForSize
,
17705 // Override to enable LOAD_STACK_GUARD lowering on Linux.
17706 bool PPCTargetLowering::useLoadStackGuardNode() const {
17707 if (!Subtarget
.isTargetLinux())
17708 return TargetLowering::useLoadStackGuardNode();
17712 // Override to disable global variable loading on Linux and insert AIX canary
17713 // word declaration.
17714 void PPCTargetLowering::insertSSPDeclarations(Module
&M
) const {
17715 if (Subtarget
.isAIXABI()) {
17716 M
.getOrInsertGlobal(AIXSSPCanaryWordName
,
17717 PointerType::getUnqual(M
.getContext()));
17720 if (!Subtarget
.isTargetLinux())
17721 return TargetLowering::insertSSPDeclarations(M
);
17724 Value
*PPCTargetLowering::getSDagStackGuard(const Module
&M
) const {
17725 if (Subtarget
.isAIXABI())
17726 return M
.getGlobalVariable(AIXSSPCanaryWordName
);
17727 return TargetLowering::getSDagStackGuard(M
);
17730 bool PPCTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
17731 bool ForCodeSize
) const {
17732 if (!VT
.isSimple() || !Subtarget
.hasVSX())
17735 switch(VT
.getSimpleVT().SimpleTy
) {
17737 // For FP types that are currently not supported by PPC backend, return
17738 // false. Examples: f16, f80.
17742 if (Subtarget
.hasPrefixInstrs() && Subtarget
.hasP10Vector()) {
17743 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17747 APSInt
IntResult(16, false);
17748 // The rounding mode doesn't really matter because we only care about floats
17749 // that can be converted to integers exactly.
17750 Imm
.convertToInteger(IntResult
, APFloat::rmTowardZero
, &IsExact
);
17751 // For exact values in the range [-16, 15] we can materialize the float.
17752 if (IsExact
&& IntResult
<= 15 && IntResult
>= -16)
17754 return Imm
.isZero();
17757 return Imm
.isPosZero();
17761 // For vector shift operation op, fold
17762 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17763 static SDValue
stripModuloOnShift(const TargetLowering
&TLI
, SDNode
*N
,
17764 SelectionDAG
&DAG
) {
17765 SDValue N0
= N
->getOperand(0);
17766 SDValue N1
= N
->getOperand(1);
17767 EVT VT
= N0
.getValueType();
17768 unsigned OpSizeInBits
= VT
.getScalarSizeInBits();
17769 unsigned Opcode
= N
->getOpcode();
17770 unsigned TargetOpcode
;
17774 llvm_unreachable("Unexpected shift operation");
17776 TargetOpcode
= PPCISD::SHL
;
17779 TargetOpcode
= PPCISD::SRL
;
17782 TargetOpcode
= PPCISD::SRA
;
17786 if (VT
.isVector() && TLI
.isOperationLegal(Opcode
, VT
) &&
17787 N1
->getOpcode() == ISD::AND
)
17788 if (ConstantSDNode
*Mask
= isConstOrConstSplat(N1
->getOperand(1)))
17789 if (Mask
->getZExtValue() == OpSizeInBits
- 1)
17790 return DAG
.getNode(TargetOpcode
, SDLoc(N
), VT
, N0
, N1
->getOperand(0));
17795 SDValue
PPCTargetLowering::combineSHL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
17796 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
17799 SDValue N0
= N
->getOperand(0);
17800 ConstantSDNode
*CN1
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
17801 if (!Subtarget
.isISA3_0() || !Subtarget
.isPPC64() ||
17802 N0
.getOpcode() != ISD::SIGN_EXTEND
||
17803 N0
.getOperand(0).getValueType() != MVT::i32
|| CN1
== nullptr ||
17804 N
->getValueType(0) != MVT::i64
)
17807 // We can't save an operation here if the value is already extended, and
17808 // the existing shift is easier to combine.
17809 SDValue ExtsSrc
= N0
.getOperand(0);
17810 if (ExtsSrc
.getOpcode() == ISD::TRUNCATE
&&
17811 ExtsSrc
.getOperand(0).getOpcode() == ISD::AssertSext
)
17815 SDValue ShiftBy
= SDValue(CN1
, 0);
17816 // We want the shift amount to be i32 on the extswli, but the shift could
17818 if (ShiftBy
.getValueType() == MVT::i64
)
17819 ShiftBy
= DCI
.DAG
.getConstant(CN1
->getZExtValue(), DL
, MVT::i32
);
17821 return DCI
.DAG
.getNode(PPCISD::EXTSWSLI
, DL
, MVT::i64
, N0
->getOperand(0),
17825 SDValue
PPCTargetLowering::combineSRA(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
17826 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
17832 SDValue
PPCTargetLowering::combineSRL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
17833 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
17839 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17840 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17841 // When C is zero, the equation (addi Z, -C) can be simplified to Z
17842 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17843 static SDValue
combineADDToADDZE(SDNode
*N
, SelectionDAG
&DAG
,
17844 const PPCSubtarget
&Subtarget
) {
17845 if (!Subtarget
.isPPC64())
17848 SDValue LHS
= N
->getOperand(0);
17849 SDValue RHS
= N
->getOperand(1);
17851 auto isZextOfCompareWithConstant
= [](SDValue Op
) {
17852 if (Op
.getOpcode() != ISD::ZERO_EXTEND
|| !Op
.hasOneUse() ||
17853 Op
.getValueType() != MVT::i64
)
17856 SDValue Cmp
= Op
.getOperand(0);
17857 if (Cmp
.getOpcode() != ISD::SETCC
|| !Cmp
.hasOneUse() ||
17858 Cmp
.getOperand(0).getValueType() != MVT::i64
)
17861 if (auto *Constant
= dyn_cast
<ConstantSDNode
>(Cmp
.getOperand(1))) {
17862 int64_t NegConstant
= 0 - Constant
->getSExtValue();
17863 // Due to the limitations of the addi instruction,
17864 // -C is required to be [-32768, 32767].
17865 return isInt
<16>(NegConstant
);
17871 bool LHSHasPattern
= isZextOfCompareWithConstant(LHS
);
17872 bool RHSHasPattern
= isZextOfCompareWithConstant(RHS
);
17874 // If there is a pattern, canonicalize a zext operand to the RHS.
17875 if (LHSHasPattern
&& !RHSHasPattern
)
17876 std::swap(LHS
, RHS
);
17877 else if (!LHSHasPattern
&& !RHSHasPattern
)
17881 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Glue
);
17882 SDValue Cmp
= RHS
.getOperand(0);
17883 SDValue Z
= Cmp
.getOperand(0);
17884 auto *Constant
= cast
<ConstantSDNode
>(Cmp
.getOperand(1));
17885 int64_t NegConstant
= 0 - Constant
->getSExtValue();
17887 switch(cast
<CondCodeSDNode
>(Cmp
.getOperand(2))->get()) {
17891 // --> addze X, (addic Z, -1).carry
17893 // add X, (zext(setne Z, C))--
17894 // \ when -32768 <= -C <= 32767 && C != 0
17895 // --> addze X, (addic (addi Z, -C), -1).carry
17896 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Z
,
17897 DAG
.getConstant(NegConstant
, DL
, MVT::i64
));
17898 SDValue AddOrZ
= NegConstant
!= 0 ? Add
: Z
;
17899 SDValue Addc
= DAG
.getNode(ISD::ADDC
, DL
, DAG
.getVTList(MVT::i64
, MVT::Glue
),
17900 AddOrZ
, DAG
.getConstant(-1ULL, DL
, MVT::i64
));
17901 return DAG
.getNode(ISD::ADDE
, DL
, VTs
, LHS
, DAG
.getConstant(0, DL
, MVT::i64
),
17902 SDValue(Addc
.getNode(), 1));
17906 // --> addze X, (subfic Z, 0).carry
17908 // add X, (zext(sete Z, C))--
17909 // \ when -32768 <= -C <= 32767 && C != 0
17910 // --> addze X, (subfic (addi Z, -C), 0).carry
17911 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Z
,
17912 DAG
.getConstant(NegConstant
, DL
, MVT::i64
));
17913 SDValue AddOrZ
= NegConstant
!= 0 ? Add
: Z
;
17914 SDValue Subc
= DAG
.getNode(ISD::SUBC
, DL
, DAG
.getVTList(MVT::i64
, MVT::Glue
),
17915 DAG
.getConstant(0, DL
, MVT::i64
), AddOrZ
);
17916 return DAG
.getNode(ISD::ADDE
, DL
, VTs
, LHS
, DAG
.getConstant(0, DL
, MVT::i64
),
17917 SDValue(Subc
.getNode(), 1));
17925 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17926 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17927 // In this case both C1 and C2 must be known constants.
17928 // C1+C2 must fit into a 34 bit signed integer.
17929 static SDValue
combineADDToMAT_PCREL_ADDR(SDNode
*N
, SelectionDAG
&DAG
,
17930 const PPCSubtarget
&Subtarget
) {
17931 if (!Subtarget
.isUsingPCRelativeCalls())
17934 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17935 // If we find that node try to cast the Global Address and the Constant.
17936 SDValue LHS
= N
->getOperand(0);
17937 SDValue RHS
= N
->getOperand(1);
17939 if (LHS
.getOpcode() != PPCISD::MAT_PCREL_ADDR
)
17940 std::swap(LHS
, RHS
);
17942 if (LHS
.getOpcode() != PPCISD::MAT_PCREL_ADDR
)
17945 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17946 GlobalAddressSDNode
*GSDN
= dyn_cast
<GlobalAddressSDNode
>(LHS
.getOperand(0));
17947 ConstantSDNode
* ConstNode
= dyn_cast
<ConstantSDNode
>(RHS
);
17949 // Check that both casts succeeded.
17950 if (!GSDN
|| !ConstNode
)
17953 int64_t NewOffset
= GSDN
->getOffset() + ConstNode
->getSExtValue();
17956 // The signed int offset needs to fit in 34 bits.
17957 if (!isInt
<34>(NewOffset
))
17960 // The new global address is a copy of the old global address except
17961 // that it has the updated Offset.
17963 DAG
.getTargetGlobalAddress(GSDN
->getGlobal(), DL
, GSDN
->getValueType(0),
17964 NewOffset
, GSDN
->getTargetFlags());
17966 DAG
.getNode(PPCISD::MAT_PCREL_ADDR
, DL
, GSDN
->getValueType(0), GA
);
17970 SDValue
PPCTargetLowering::combineADD(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
17971 if (auto Value
= combineADDToADDZE(N
, DCI
.DAG
, Subtarget
))
17974 if (auto Value
= combineADDToMAT_PCREL_ADDR(N
, DCI
.DAG
, Subtarget
))
17980 // Detect TRUNCATE operations on bitcasts of float128 values.
17981 // What we are looking for here is the situtation where we extract a subset
17982 // of bits from a 128 bit float.
17983 // This can be of two forms:
17984 // 1) BITCAST of f128 feeding TRUNCATE
17985 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17986 // The reason this is required is because we do not have a legal i128 type
17987 // and so we want to prevent having to store the f128 and then reload part
17989 SDValue
PPCTargetLowering::combineTRUNCATE(SDNode
*N
,
17990 DAGCombinerInfo
&DCI
) const {
17991 // If we are using CRBits then try that first.
17992 if (Subtarget
.useCRBits()) {
17993 // Check if CRBits did anything and return that if it did.
17994 if (SDValue CRTruncValue
= DAGCombineTruncBoolExt(N
, DCI
))
17995 return CRTruncValue
;
17999 SDValue Op0
= N
->getOperand(0);
18001 // Looking for a truncate of i128 to i64.
18002 if (Op0
.getValueType() != MVT::i128
|| N
->getValueType(0) != MVT::i64
)
18005 int EltToExtract
= DCI
.DAG
.getDataLayout().isBigEndian() ? 1 : 0;
18007 // SRL feeding TRUNCATE.
18008 if (Op0
.getOpcode() == ISD::SRL
) {
18009 ConstantSDNode
*ConstNode
= dyn_cast
<ConstantSDNode
>(Op0
.getOperand(1));
18010 // The right shift has to be by 64 bits.
18011 if (!ConstNode
|| ConstNode
->getZExtValue() != 64)
18014 // Switch the element number to extract.
18015 EltToExtract
= EltToExtract
? 0 : 1;
18016 // Update Op0 past the SRL.
18017 Op0
= Op0
.getOperand(0);
18020 // BITCAST feeding a TRUNCATE possibly via SRL.
18021 if (Op0
.getOpcode() == ISD::BITCAST
&&
18022 Op0
.getValueType() == MVT::i128
&&
18023 Op0
.getOperand(0).getValueType() == MVT::f128
) {
18024 SDValue Bitcast
= DCI
.DAG
.getBitcast(MVT::v2i64
, Op0
.getOperand(0));
18025 return DCI
.DAG
.getNode(
18026 ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i64
, Bitcast
,
18027 DCI
.DAG
.getTargetConstant(EltToExtract
, dl
, MVT::i32
));
18032 SDValue
PPCTargetLowering::combineMUL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
18033 SelectionDAG
&DAG
= DCI
.DAG
;
18035 ConstantSDNode
*ConstOpOrElement
= isConstOrConstSplat(N
->getOperand(1));
18036 if (!ConstOpOrElement
)
18039 // An imul is usually smaller than the alternative sequence for legal type.
18040 if (DAG
.getMachineFunction().getFunction().hasMinSize() &&
18041 isOperationLegal(ISD::MUL
, N
->getValueType(0)))
18044 auto IsProfitable
= [this](bool IsNeg
, bool IsAddOne
, EVT VT
) -> bool {
18045 switch (this->Subtarget
.getCPUDirective()) {
18047 // TODO: enhance the condition for subtarget before pwr8
18049 case PPC::DIR_PWR8
:
18050 // type mul add shl
18054 case PPC::DIR_PWR9
:
18055 case PPC::DIR_PWR10
:
18056 case PPC::DIR_PWR11
:
18057 case PPC::DIR_PWR_FUTURE
:
18058 // type mul add shl
18062 // The cycle RATIO of related operations are showed as a table above.
18063 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18064 // scalar and vector type. For 2 instrs patterns, add/sub + shl
18065 // are 4, it is always profitable; but for 3 instrs patterns
18066 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18067 // So we should only do it for vector type.
18068 return IsAddOne
&& IsNeg
? VT
.isVector() : true;
18072 EVT VT
= N
->getValueType(0);
18075 const APInt
&MulAmt
= ConstOpOrElement
->getAPIntValue();
18076 bool IsNeg
= MulAmt
.isNegative();
18077 APInt MulAmtAbs
= MulAmt
.abs();
18079 if ((MulAmtAbs
- 1).isPowerOf2()) {
18080 // (mul x, 2^N + 1) => (add (shl x, N), x)
18081 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18083 if (!IsProfitable(IsNeg
, true, VT
))
18086 SDValue Op0
= N
->getOperand(0);
18088 DAG
.getNode(ISD::SHL
, DL
, VT
, N
->getOperand(0),
18089 DAG
.getConstant((MulAmtAbs
- 1).logBase2(), DL
, VT
));
18090 SDValue Res
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op0
, Op1
);
18095 return DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), Res
);
18096 } else if ((MulAmtAbs
+ 1).isPowerOf2()) {
18097 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18098 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18100 if (!IsProfitable(IsNeg
, false, VT
))
18103 SDValue Op0
= N
->getOperand(0);
18105 DAG
.getNode(ISD::SHL
, DL
, VT
, N
->getOperand(0),
18106 DAG
.getConstant((MulAmtAbs
+ 1).logBase2(), DL
, VT
));
18109 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op1
, Op0
);
18111 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op0
, Op1
);
18118 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18119 // in combiner since we need to check SD flags and other subtarget features.
18120 SDValue
PPCTargetLowering::combineFMALike(SDNode
*N
,
18121 DAGCombinerInfo
&DCI
) const {
18122 SDValue N0
= N
->getOperand(0);
18123 SDValue N1
= N
->getOperand(1);
18124 SDValue N2
= N
->getOperand(2);
18125 SDNodeFlags Flags
= N
->getFlags();
18126 EVT VT
= N
->getValueType(0);
18127 SelectionDAG
&DAG
= DCI
.DAG
;
18128 const TargetOptions
&Options
= getTargetMachine().Options
;
18129 unsigned Opc
= N
->getOpcode();
18130 bool CodeSize
= DAG
.getMachineFunction().getFunction().hasOptSize();
18131 bool LegalOps
= !DCI
.isBeforeLegalizeOps();
18134 if (!isOperationLegal(ISD::FMA
, VT
))
18137 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18138 // since (fnmsub a b c)=-0 while c-ab=+0.
18139 if (!Flags
.hasNoSignedZeros() && !Options
.NoSignedZerosFPMath
)
18142 // (fma (fneg a) b c) => (fnmsub a b c)
18143 // (fnmsub (fneg a) b c) => (fma a b c)
18144 if (SDValue NegN0
= getCheaperNegatedExpression(N0
, DAG
, LegalOps
, CodeSize
))
18145 return DAG
.getNode(invertFMAOpcode(Opc
), Loc
, VT
, NegN0
, N1
, N2
, Flags
);
18147 // (fma a (fneg b) c) => (fnmsub a b c)
18148 // (fnmsub a (fneg b) c) => (fma a b c)
18149 if (SDValue NegN1
= getCheaperNegatedExpression(N1
, DAG
, LegalOps
, CodeSize
))
18150 return DAG
.getNode(invertFMAOpcode(Opc
), Loc
, VT
, N0
, NegN1
, N2
, Flags
);
18155 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
18156 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18157 if (!Subtarget
.is64BitELFABI())
18160 // If not a tail call then no need to proceed.
18161 if (!CI
->isTailCall())
18164 // If sibling calls have been disabled and tail-calls aren't guaranteed
18165 // there is no reason to duplicate.
18166 auto &TM
= getTargetMachine();
18167 if (!TM
.Options
.GuaranteedTailCallOpt
&& DisableSCO
)
18170 // Can't tail call a function called indirectly, or if it has variadic args.
18171 const Function
*Callee
= CI
->getCalledFunction();
18172 if (!Callee
|| Callee
->isVarArg())
18175 // Make sure the callee and caller calling conventions are eligible for tco.
18176 const Function
*Caller
= CI
->getParent()->getParent();
18177 if (!areCallingConvEligibleForTCO_64SVR4(Caller
->getCallingConv(),
18178 CI
->getCallingConv()))
18181 // If the function is local then we have a good chance at tail-calling it
18182 return getTargetMachine().shouldAssumeDSOLocal(Callee
);
18185 bool PPCTargetLowering::
18186 isMaskAndCmp0FoldingBeneficial(const Instruction
&AndI
) const {
18187 const Value
*Mask
= AndI
.getOperand(1);
18188 // If the mask is suitable for andi. or andis. we should sink the and.
18189 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Mask
)) {
18190 // Can't handle constants wider than 64-bits.
18191 if (CI
->getBitWidth() > 64)
18193 int64_t ConstVal
= CI
->getZExtValue();
18194 return isUInt
<16>(ConstVal
) ||
18195 (isUInt
<16>(ConstVal
>> 16) && !(ConstVal
& 0xFFFF));
18198 // For non-constant masks, we can always use the record-form and.
18202 /// getAddrModeForFlags - Based on the set of address flags, select the most
18203 /// optimal instruction format to match by.
18204 PPC::AddrMode
PPCTargetLowering::getAddrModeForFlags(unsigned Flags
) const {
18205 // This is not a node we should be handling here.
18206 if (Flags
== PPC::MOF_None
)
18207 return PPC::AM_None
;
18208 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18209 for (auto FlagSet
: AddrModesMap
.at(PPC::AM_DForm
))
18210 if ((Flags
& FlagSet
) == FlagSet
)
18211 return PPC::AM_DForm
;
18212 for (auto FlagSet
: AddrModesMap
.at(PPC::AM_DSForm
))
18213 if ((Flags
& FlagSet
) == FlagSet
)
18214 return PPC::AM_DSForm
;
18215 for (auto FlagSet
: AddrModesMap
.at(PPC::AM_DQForm
))
18216 if ((Flags
& FlagSet
) == FlagSet
)
18217 return PPC::AM_DQForm
;
18218 for (auto FlagSet
: AddrModesMap
.at(PPC::AM_PrefixDForm
))
18219 if ((Flags
& FlagSet
) == FlagSet
)
18220 return PPC::AM_PrefixDForm
;
18221 // If no other forms are selected, return an X-Form as it is the most
18222 // general addressing mode.
18223 return PPC::AM_XForm
;
18226 /// Set alignment flags based on whether or not the Frame Index is aligned.
18227 /// Utilized when computing flags for address computation when selecting
18228 /// load and store instructions.
18229 static void setAlignFlagsForFI(SDValue N
, unsigned &FlagSet
,
18230 SelectionDAG
&DAG
) {
18231 bool IsAdd
= ((N
.getOpcode() == ISD::ADD
) || (N
.getOpcode() == ISD::OR
));
18232 FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(IsAdd
? N
.getOperand(0) : N
);
18235 const MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
18236 unsigned FrameIndexAlign
= MFI
.getObjectAlign(FI
->getIndex()).value();
18237 // If this is (add $FI, $S16Imm), the alignment flags are already set
18238 // based on the immediate. We just need to clear the alignment flags
18239 // if the FI alignment is weaker.
18240 if ((FrameIndexAlign
% 4) != 0)
18241 FlagSet
&= ~PPC::MOF_RPlusSImm16Mult4
;
18242 if ((FrameIndexAlign
% 16) != 0)
18243 FlagSet
&= ~PPC::MOF_RPlusSImm16Mult16
;
18244 // If the address is a plain FrameIndex, set alignment flags based on
18247 if ((FrameIndexAlign
% 4) == 0)
18248 FlagSet
|= PPC::MOF_RPlusSImm16Mult4
;
18249 if ((FrameIndexAlign
% 16) == 0)
18250 FlagSet
|= PPC::MOF_RPlusSImm16Mult16
;
18254 /// Given a node, compute flags that are used for address computation when
18255 /// selecting load and store instructions. The flags computed are stored in
18256 /// FlagSet. This function takes into account whether the node is a constant,
18257 /// an ADD, OR, or a constant, and computes the address flags accordingly.
18258 static void computeFlagsForAddressComputation(SDValue N
, unsigned &FlagSet
,
18259 SelectionDAG
&DAG
) {
18260 // Set the alignment flags for the node depending on if the node is
18261 // 4-byte or 16-byte aligned.
18262 auto SetAlignFlagsForImm
= [&](uint64_t Imm
) {
18263 if ((Imm
& 0x3) == 0)
18264 FlagSet
|= PPC::MOF_RPlusSImm16Mult4
;
18265 if ((Imm
& 0xf) == 0)
18266 FlagSet
|= PPC::MOF_RPlusSImm16Mult16
;
18269 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
)) {
18270 // All 32-bit constants can be computed as LIS + Disp.
18271 const APInt
&ConstImm
= CN
->getAPIntValue();
18272 if (ConstImm
.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18273 FlagSet
|= PPC::MOF_AddrIsSImm32
;
18274 SetAlignFlagsForImm(ConstImm
.getZExtValue());
18275 setAlignFlagsForFI(N
, FlagSet
, DAG
);
18277 if (ConstImm
.isSignedIntN(34)) // Flag to handle 34-bit constants.
18278 FlagSet
|= PPC::MOF_RPlusSImm34
;
18279 else // Let constant materialization handle large constants.
18280 FlagSet
|= PPC::MOF_NotAddNorCst
;
18281 } else if (N
.getOpcode() == ISD::ADD
|| provablyDisjointOr(DAG
, N
)) {
18282 // This address can be represented as an addition of:
18283 // - Register + Imm16 (possibly a multiple of 4/16)
18284 // - Register + Imm34
18285 // - Register + PPCISD::Lo
18286 // - Register + Register
18287 // In any case, we won't have to match this as Base + Zero.
18288 SDValue RHS
= N
.getOperand(1);
18289 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(RHS
)) {
18290 const APInt
&ConstImm
= CN
->getAPIntValue();
18291 if (ConstImm
.isSignedIntN(16)) {
18292 FlagSet
|= PPC::MOF_RPlusSImm16
; // Signed 16-bit immediates.
18293 SetAlignFlagsForImm(ConstImm
.getZExtValue());
18294 setAlignFlagsForFI(N
, FlagSet
, DAG
);
18296 if (ConstImm
.isSignedIntN(34))
18297 FlagSet
|= PPC::MOF_RPlusSImm34
; // Signed 34-bit immediates.
18299 FlagSet
|= PPC::MOF_RPlusR
; // Register.
18300 } else if (RHS
.getOpcode() == PPCISD::Lo
&& !RHS
.getConstantOperandVal(1))
18301 FlagSet
|= PPC::MOF_RPlusLo
; // PPCISD::Lo.
18303 FlagSet
|= PPC::MOF_RPlusR
;
18304 } else { // The address computation is not a constant or an addition.
18305 setAlignFlagsForFI(N
, FlagSet
, DAG
);
18306 FlagSet
|= PPC::MOF_NotAddNorCst
;
18310 static bool isPCRelNode(SDValue N
) {
18311 return (N
.getOpcode() == PPCISD::MAT_PCREL_ADDR
||
18312 isValidPCRelNode
<ConstantPoolSDNode
>(N
) ||
18313 isValidPCRelNode
<GlobalAddressSDNode
>(N
) ||
18314 isValidPCRelNode
<JumpTableSDNode
>(N
) ||
18315 isValidPCRelNode
<BlockAddressSDNode
>(N
));
18318 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18319 /// the address flags of the load/store instruction that is to be matched.
18320 unsigned PPCTargetLowering::computeMOFlags(const SDNode
*Parent
, SDValue N
,
18321 SelectionDAG
&DAG
) const {
18322 unsigned FlagSet
= PPC::MOF_None
;
18324 // Compute subtarget flags.
18325 if (!Subtarget
.hasP9Vector())
18326 FlagSet
|= PPC::MOF_SubtargetBeforeP9
;
18328 FlagSet
|= PPC::MOF_SubtargetP9
;
18330 if (Subtarget
.hasPrefixInstrs())
18331 FlagSet
|= PPC::MOF_SubtargetP10
;
18333 if (Subtarget
.hasSPE())
18334 FlagSet
|= PPC::MOF_SubtargetSPE
;
18336 // Check if we have a PCRel node and return early.
18337 if ((FlagSet
& PPC::MOF_SubtargetP10
) && isPCRelNode(N
))
18340 // If the node is the paired load/store intrinsics, compute flags for
18341 // address computation and return early.
18342 unsigned ParentOp
= Parent
->getOpcode();
18343 if (Subtarget
.isISA3_1() && ((ParentOp
== ISD::INTRINSIC_W_CHAIN
) ||
18344 (ParentOp
== ISD::INTRINSIC_VOID
))) {
18345 unsigned ID
= Parent
->getConstantOperandVal(1);
18346 if ((ID
== Intrinsic::ppc_vsx_lxvp
) || (ID
== Intrinsic::ppc_vsx_stxvp
)) {
18347 SDValue IntrinOp
= (ID
== Intrinsic::ppc_vsx_lxvp
)
18348 ? Parent
->getOperand(2)
18349 : Parent
->getOperand(3);
18350 computeFlagsForAddressComputation(IntrinOp
, FlagSet
, DAG
);
18351 FlagSet
|= PPC::MOF_Vector
;
18356 // Mark this as something we don't want to handle here if it is atomic
18357 // or pre-increment instruction.
18358 if (const LSBaseSDNode
*LSB
= dyn_cast
<LSBaseSDNode
>(Parent
))
18359 if (LSB
->isIndexed())
18360 return PPC::MOF_None
;
18362 // Compute in-memory type flags. This is based on if there are scalars,
18363 // floats or vectors.
18364 const MemSDNode
*MN
= dyn_cast
<MemSDNode
>(Parent
);
18365 assert(MN
&& "Parent should be a MemSDNode!");
18366 EVT MemVT
= MN
->getMemoryVT();
18367 unsigned Size
= MemVT
.getSizeInBits();
18368 if (MemVT
.isScalarInteger()) {
18369 assert(Size
<= 128 &&
18370 "Not expecting scalar integers larger than 16 bytes!");
18372 FlagSet
|= PPC::MOF_SubWordInt
;
18373 else if (Size
== 32)
18374 FlagSet
|= PPC::MOF_WordInt
;
18376 FlagSet
|= PPC::MOF_DoubleWordInt
;
18377 } else if (MemVT
.isVector() && !MemVT
.isFloatingPoint()) { // Integer vectors.
18379 FlagSet
|= PPC::MOF_Vector
;
18380 else if (Size
== 256) {
18381 assert(Subtarget
.pairedVectorMemops() &&
18382 "256-bit vectors are only available when paired vector memops is "
18384 FlagSet
|= PPC::MOF_Vector
;
18386 llvm_unreachable("Not expecting illegal vectors!");
18387 } else { // Floating point type: can be scalar, f128 or vector types.
18388 if (Size
== 32 || Size
== 64)
18389 FlagSet
|= PPC::MOF_ScalarFloat
;
18390 else if (MemVT
== MVT::f128
|| MemVT
.isVector())
18391 FlagSet
|= PPC::MOF_Vector
;
18393 llvm_unreachable("Not expecting illegal scalar floats!");
18396 // Compute flags for address computation.
18397 computeFlagsForAddressComputation(N
, FlagSet
, DAG
);
18399 // Compute type extension flags.
18400 if (const LoadSDNode
*LN
= dyn_cast
<LoadSDNode
>(Parent
)) {
18401 switch (LN
->getExtensionType()) {
18402 case ISD::SEXTLOAD
:
18403 FlagSet
|= PPC::MOF_SExt
;
18406 case ISD::ZEXTLOAD
:
18407 FlagSet
|= PPC::MOF_ZExt
;
18409 case ISD::NON_EXTLOAD
:
18410 FlagSet
|= PPC::MOF_NoExt
;
18414 FlagSet
|= PPC::MOF_NoExt
;
18416 // For integers, no extension is the same as zero extension.
18417 // We set the extension mode to zero extension so we don't have
18418 // to add separate entries in AddrModesMap for loads and stores.
18419 if (MemVT
.isScalarInteger() && (FlagSet
& PPC::MOF_NoExt
)) {
18420 FlagSet
|= PPC::MOF_ZExt
;
18421 FlagSet
&= ~PPC::MOF_NoExt
;
18424 // If we don't have prefixed instructions, 34-bit constants should be
18425 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18426 bool IsNonP1034BitConst
=
18427 ((PPC::MOF_RPlusSImm34
| PPC::MOF_AddrIsSImm32
| PPC::MOF_SubtargetP10
) &
18428 FlagSet
) == PPC::MOF_RPlusSImm34
;
18429 if (N
.getOpcode() != ISD::ADD
&& N
.getOpcode() != ISD::OR
&&
18430 IsNonP1034BitConst
)
18431 FlagSet
|= PPC::MOF_NotAddNorCst
;
18436 /// SelectForceXFormMode - Given the specified address, force it to be
18437 /// represented as an indexed [r+r] operation (an XForm instruction).
18438 PPC::AddrMode
PPCTargetLowering::SelectForceXFormMode(SDValue N
, SDValue
&Disp
,
18440 SelectionDAG
&DAG
) const {
18442 PPC::AddrMode Mode
= PPC::AM_XForm
;
18443 int16_t ForceXFormImm
= 0;
18444 if (provablyDisjointOr(DAG
, N
) &&
18445 !isIntS16Immediate(N
.getOperand(1), ForceXFormImm
)) {
18446 Disp
= N
.getOperand(0);
18447 Base
= N
.getOperand(1);
18451 // If the address is the result of an add, we will utilize the fact that the
18452 // address calculation includes an implicit add. However, we can reduce
18453 // register pressure if we do not materialize a constant just for use as the
18454 // index register. We only get rid of the add if it is not an add of a
18455 // value and a 16-bit signed constant and both have a single use.
18456 if (N
.getOpcode() == ISD::ADD
&&
18457 (!isIntS16Immediate(N
.getOperand(1), ForceXFormImm
) ||
18458 !N
.getOperand(1).hasOneUse() || !N
.getOperand(0).hasOneUse())) {
18459 Disp
= N
.getOperand(0);
18460 Base
= N
.getOperand(1);
18464 // Otherwise, use R0 as the base register.
18465 Disp
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
18472 bool PPCTargetLowering::splitValueIntoRegisterParts(
18473 SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Val
, SDValue
*Parts
,
18474 unsigned NumParts
, MVT PartVT
, std::optional
<CallingConv::ID
> CC
) const {
18475 EVT ValVT
= Val
.getValueType();
18476 // If we are splitting a scalar integer into f64 parts (i.e. so they
18477 // can be placed into VFRC registers), we need to zero extend and
18478 // bitcast the values. This will ensure the value is placed into a
18479 // VSR using direct moves or stack operations as needed.
18480 if (PartVT
== MVT::f64
&&
18481 (ValVT
== MVT::i32
|| ValVT
== MVT::i16
|| ValVT
== MVT::i8
)) {
18482 Val
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, Val
);
18483 Val
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::f64
, Val
);
18490 SDValue
PPCTargetLowering::lowerToLibCall(const char *LibCallName
, SDValue Op
,
18491 SelectionDAG
&DAG
) const {
18492 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
18493 TargetLowering::CallLoweringInfo
CLI(DAG
);
18494 EVT RetVT
= Op
.getValueType();
18495 Type
*RetTy
= RetVT
.getTypeForEVT(*DAG
.getContext());
18497 DAG
.getExternalSymbol(LibCallName
, TLI
.getPointerTy(DAG
.getDataLayout()));
18498 bool SignExtend
= TLI
.shouldSignExtendTypeInLibCall(RetVT
, false);
18499 TargetLowering::ArgListTy Args
;
18500 TargetLowering::ArgListEntry Entry
;
18501 for (const SDValue
&N
: Op
->op_values()) {
18502 EVT ArgVT
= N
.getValueType();
18503 Type
*ArgTy
= ArgVT
.getTypeForEVT(*DAG
.getContext());
18506 Entry
.IsSExt
= TLI
.shouldSignExtendTypeInLibCall(ArgVT
, SignExtend
);
18507 Entry
.IsZExt
= !Entry
.IsSExt
;
18508 Args
.push_back(Entry
);
18511 SDValue InChain
= DAG
.getEntryNode();
18512 SDValue TCChain
= InChain
;
18513 const Function
&F
= DAG
.getMachineFunction().getFunction();
18515 TLI
.isInTailCallPosition(DAG
, Op
.getNode(), TCChain
) &&
18516 (RetTy
== F
.getReturnType() || F
.getReturnType()->isVoidTy());
18519 CLI
.setDebugLoc(SDLoc(Op
))
18521 .setLibCallee(CallingConv::C
, RetTy
, Callee
, std::move(Args
))
18522 .setTailCall(isTailCall
)
18523 .setSExtResult(SignExtend
)
18524 .setZExtResult(!SignExtend
)
18525 .setIsPostTypeLegalization(true);
18526 return TLI
.LowerCallTo(CLI
).first
;
18529 SDValue
PPCTargetLowering::lowerLibCallBasedOnType(
18530 const char *LibCallFloatName
, const char *LibCallDoubleName
, SDValue Op
,
18531 SelectionDAG
&DAG
) const {
18532 if (Op
.getValueType() == MVT::f32
)
18533 return lowerToLibCall(LibCallFloatName
, Op
, DAG
);
18535 if (Op
.getValueType() == MVT::f64
)
18536 return lowerToLibCall(LibCallDoubleName
, Op
, DAG
);
18541 bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op
) const {
18542 SDNodeFlags Flags
= Op
.getNode()->getFlags();
18543 return isLowringToMASSSafe(Op
) && Flags
.hasNoSignedZeros() &&
18544 Flags
.hasNoNaNs() && Flags
.hasNoInfs();
18547 bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op
) const {
18548 return Op
.getNode()->getFlags().hasApproximateFuncs();
18551 bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18552 return getTargetMachine().Options
.PPCGenScalarMASSEntries
;
18555 SDValue
PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName
,
18556 const char *LibCallFloatName
,
18557 const char *LibCallDoubleNameFinite
,
18558 const char *LibCallFloatNameFinite
,
18560 SelectionDAG
&DAG
) const {
18561 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op
))
18564 if (!isLowringToMASSFiniteSafe(Op
))
18565 return lowerLibCallBasedOnType(LibCallFloatName
, LibCallDoubleName
, Op
,
18568 return lowerLibCallBasedOnType(LibCallFloatNameFinite
,
18569 LibCallDoubleNameFinite
, Op
, DAG
);
18572 SDValue
PPCTargetLowering::lowerPow(SDValue Op
, SelectionDAG
&DAG
) const {
18573 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18574 "__xl_powf_finite", Op
, DAG
);
18577 SDValue
PPCTargetLowering::lowerSin(SDValue Op
, SelectionDAG
&DAG
) const {
18578 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18579 "__xl_sinf_finite", Op
, DAG
);
18582 SDValue
PPCTargetLowering::lowerCos(SDValue Op
, SelectionDAG
&DAG
) const {
18583 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18584 "__xl_cosf_finite", Op
, DAG
);
18587 SDValue
PPCTargetLowering::lowerLog(SDValue Op
, SelectionDAG
&DAG
) const {
18588 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18589 "__xl_logf_finite", Op
, DAG
);
18592 SDValue
PPCTargetLowering::lowerLog10(SDValue Op
, SelectionDAG
&DAG
) const {
18593 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18594 "__xl_log10f_finite", Op
, DAG
);
18597 SDValue
PPCTargetLowering::lowerExp(SDValue Op
, SelectionDAG
&DAG
) const {
18598 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18599 "__xl_expf_finite", Op
, DAG
);
18602 // If we happen to match to an aligned D-Form, check if the Frame Index is
18603 // adequately aligned. If it is not, reset the mode to match to X-Form.
18604 static void setXFormForUnalignedFI(SDValue N
, unsigned Flags
,
18605 PPC::AddrMode
&Mode
) {
18606 if (!isa
<FrameIndexSDNode
>(N
))
18608 if ((Mode
== PPC::AM_DSForm
&& !(Flags
& PPC::MOF_RPlusSImm16Mult4
)) ||
18609 (Mode
== PPC::AM_DQForm
&& !(Flags
& PPC::MOF_RPlusSImm16Mult16
)))
18610 Mode
= PPC::AM_XForm
;
18613 /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18614 /// compute the address flags of the node, get the optimal address mode based
18615 /// on the flags, and set the Base and Disp based on the address mode.
18616 PPC::AddrMode
PPCTargetLowering::SelectOptimalAddrMode(const SDNode
*Parent
,
18617 SDValue N
, SDValue
&Disp
,
18620 MaybeAlign Align
) const {
18623 // Compute the address flags.
18624 unsigned Flags
= computeMOFlags(Parent
, N
, DAG
);
18626 // Get the optimal address mode based on the Flags.
18627 PPC::AddrMode Mode
= getAddrModeForFlags(Flags
);
18629 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18630 // Select an X-Form load if it is not.
18631 setXFormForUnalignedFI(N
, Flags
, Mode
);
18633 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18634 if ((Mode
== PPC::AM_XForm
) && isPCRelNode(N
)) {
18635 assert(Subtarget
.isUsingPCRelativeCalls() &&
18636 "Must be using PC-Relative calls when a valid PC-Relative node is "
18638 Mode
= PPC::AM_PCRel
;
18641 // Set Base and Disp accordingly depending on the address mode.
18643 case PPC::AM_DForm
:
18644 case PPC::AM_DSForm
:
18645 case PPC::AM_DQForm
: {
18646 // This is a register plus a 16-bit immediate. The base will be the
18647 // register and the displacement will be the immediate unless it
18648 // isn't sufficiently aligned.
18649 if (Flags
& PPC::MOF_RPlusSImm16
) {
18650 SDValue Op0
= N
.getOperand(0);
18651 SDValue Op1
= N
.getOperand(1);
18652 int16_t Imm
= Op1
->getAsZExtVal();
18653 if (!Align
|| isAligned(*Align
, Imm
)) {
18654 Disp
= DAG
.getTargetConstant(Imm
, DL
, N
.getValueType());
18656 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(Op0
)) {
18657 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
18658 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
18663 // This is a register plus the @lo relocation. The base is the register
18664 // and the displacement is the global address.
18665 else if (Flags
& PPC::MOF_RPlusLo
) {
18666 Disp
= N
.getOperand(1).getOperand(0); // The global address.
18667 assert(Disp
.getOpcode() == ISD::TargetGlobalAddress
||
18668 Disp
.getOpcode() == ISD::TargetGlobalTLSAddress
||
18669 Disp
.getOpcode() == ISD::TargetConstantPool
||
18670 Disp
.getOpcode() == ISD::TargetJumpTable
);
18671 Base
= N
.getOperand(0);
18674 // This is a constant address at most 32 bits. The base will be
18675 // zero or load-immediate-shifted and the displacement will be
18676 // the low 16 bits of the address.
18677 else if (Flags
& PPC::MOF_AddrIsSImm32
) {
18678 auto *CN
= cast
<ConstantSDNode
>(N
);
18679 EVT CNType
= CN
->getValueType(0);
18680 uint64_t CNImm
= CN
->getZExtValue();
18681 // If this address fits entirely in a 16-bit sext immediate field, codegen
18684 if (isIntS16Immediate(CN
, Imm
) && (!Align
|| isAligned(*Align
, Imm
))) {
18685 Disp
= DAG
.getTargetConstant(Imm
, DL
, CNType
);
18686 Base
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
18690 // Handle 32-bit sext immediate with LIS + Addr mode.
18691 if ((CNType
== MVT::i32
|| isInt
<32>(CNImm
)) &&
18692 (!Align
|| isAligned(*Align
, CNImm
))) {
18693 int32_t Addr
= (int32_t)CNImm
;
18694 // Otherwise, break this down into LIS + Disp.
18695 Disp
= DAG
.getTargetConstant((int16_t)Addr
, DL
, MVT::i32
);
18697 DAG
.getTargetConstant((Addr
- (int16_t)Addr
) >> 16, DL
, MVT::i32
);
18698 uint32_t LIS
= CNType
== MVT::i32
? PPC::LIS
: PPC::LIS8
;
18699 Base
= SDValue(DAG
.getMachineNode(LIS
, DL
, CNType
, Base
), 0);
18703 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18704 Disp
= DAG
.getTargetConstant(0, DL
, getPointerTy(DAG
.getDataLayout()));
18705 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
)) {
18706 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
18707 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
18712 case PPC::AM_PrefixDForm
: {
18714 unsigned Opcode
= N
.getOpcode();
18715 if (((Opcode
== ISD::ADD
) || (Opcode
== ISD::OR
)) &&
18716 (isIntS34Immediate(N
.getOperand(1), Imm34
))) {
18717 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18718 Disp
= DAG
.getTargetConstant(Imm34
, DL
, N
.getValueType());
18719 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0)))
18720 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
18722 Base
= N
.getOperand(0);
18723 } else if (isIntS34Immediate(N
, Imm34
)) {
18724 // The address is a 34-bit signed immediate.
18725 Disp
= DAG
.getTargetConstant(Imm34
, DL
, N
.getValueType());
18726 Base
= DAG
.getRegister(PPC::ZERO8
, N
.getValueType());
18730 case PPC::AM_PCRel
: {
18731 // When selecting PC-Relative instructions, "Base" is not utilized as
18732 // we select the address as [PC+imm].
18738 default: { // By default, X-Form is always available to be selected.
18739 // When a frame index is not aligned, we also match by XForm.
18740 FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
);
18741 Base
= FI
? N
: N
.getOperand(1);
18742 Disp
= FI
? DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
18751 CCAssignFn
*PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC
,
18753 bool IsVarArg
) const {
18755 case CallingConv::Cold
:
18756 return (Return
? RetCC_PPC_Cold
: CC_PPC64_ELF
);
18758 return CC_PPC64_ELF
;
18762 bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
18763 return Subtarget
.isPPC64() && Subtarget
.hasQuadwordAtomics();
18766 TargetLowering::AtomicExpansionKind
18767 PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const {
18768 unsigned Size
= AI
->getType()->getPrimitiveSizeInBits();
18769 if (shouldInlineQuadwordAtomics() && Size
== 128)
18770 return AtomicExpansionKind::MaskedIntrinsic
;
18772 switch (AI
->getOperation()) {
18773 case AtomicRMWInst::UIncWrap
:
18774 case AtomicRMWInst::UDecWrap
:
18775 return AtomicExpansionKind::CmpXChg
;
18777 return TargetLowering::shouldExpandAtomicRMWInIR(AI
);
18780 llvm_unreachable("unreachable atomicrmw operation");
18783 TargetLowering::AtomicExpansionKind
18784 PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst
*AI
) const {
18785 unsigned Size
= AI
->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18786 if (shouldInlineQuadwordAtomics() && Size
== 128)
18787 return AtomicExpansionKind::MaskedIntrinsic
;
18788 return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI
);
18791 static Intrinsic::ID
18792 getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp
) {
18795 llvm_unreachable("Unexpected AtomicRMW BinOp");
18796 case AtomicRMWInst::Xchg
:
18797 return Intrinsic::ppc_atomicrmw_xchg_i128
;
18798 case AtomicRMWInst::Add
:
18799 return Intrinsic::ppc_atomicrmw_add_i128
;
18800 case AtomicRMWInst::Sub
:
18801 return Intrinsic::ppc_atomicrmw_sub_i128
;
18802 case AtomicRMWInst::And
:
18803 return Intrinsic::ppc_atomicrmw_and_i128
;
18804 case AtomicRMWInst::Or
:
18805 return Intrinsic::ppc_atomicrmw_or_i128
;
18806 case AtomicRMWInst::Xor
:
18807 return Intrinsic::ppc_atomicrmw_xor_i128
;
18808 case AtomicRMWInst::Nand
:
18809 return Intrinsic::ppc_atomicrmw_nand_i128
;
18813 Value
*PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
18814 IRBuilderBase
&Builder
, AtomicRMWInst
*AI
, Value
*AlignedAddr
, Value
*Incr
,
18815 Value
*Mask
, Value
*ShiftAmt
, AtomicOrdering Ord
) const {
18816 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18817 Module
*M
= Builder
.GetInsertBlock()->getParent()->getParent();
18818 Type
*ValTy
= Incr
->getType();
18819 assert(ValTy
->getPrimitiveSizeInBits() == 128);
18820 Function
*RMW
= Intrinsic::getDeclaration(
18821 M
, getIntrinsicForAtomicRMWBinOp128(AI
->getOperation()));
18822 Type
*Int64Ty
= Type::getInt64Ty(M
->getContext());
18823 Value
*IncrLo
= Builder
.CreateTrunc(Incr
, Int64Ty
, "incr_lo");
18825 Builder
.CreateTrunc(Builder
.CreateLShr(Incr
, 64), Int64Ty
, "incr_hi");
18826 Value
*LoHi
= Builder
.CreateCall(RMW
, {AlignedAddr
, IncrLo
, IncrHi
});
18827 Value
*Lo
= Builder
.CreateExtractValue(LoHi
, 0, "lo");
18828 Value
*Hi
= Builder
.CreateExtractValue(LoHi
, 1, "hi");
18829 Lo
= Builder
.CreateZExt(Lo
, ValTy
, "lo64");
18830 Hi
= Builder
.CreateZExt(Hi
, ValTy
, "hi64");
18831 return Builder
.CreateOr(
18832 Lo
, Builder
.CreateShl(Hi
, ConstantInt::get(ValTy
, 64)), "val64");
18835 Value
*PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
18836 IRBuilderBase
&Builder
, AtomicCmpXchgInst
*CI
, Value
*AlignedAddr
,
18837 Value
*CmpVal
, Value
*NewVal
, Value
*Mask
, AtomicOrdering Ord
) const {
18838 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18839 Module
*M
= Builder
.GetInsertBlock()->getParent()->getParent();
18840 Type
*ValTy
= CmpVal
->getType();
18841 assert(ValTy
->getPrimitiveSizeInBits() == 128);
18842 Function
*IntCmpXchg
=
18843 Intrinsic::getDeclaration(M
, Intrinsic::ppc_cmpxchg_i128
);
18844 Type
*Int64Ty
= Type::getInt64Ty(M
->getContext());
18845 Value
*CmpLo
= Builder
.CreateTrunc(CmpVal
, Int64Ty
, "cmp_lo");
18847 Builder
.CreateTrunc(Builder
.CreateLShr(CmpVal
, 64), Int64Ty
, "cmp_hi");
18848 Value
*NewLo
= Builder
.CreateTrunc(NewVal
, Int64Ty
, "new_lo");
18850 Builder
.CreateTrunc(Builder
.CreateLShr(NewVal
, 64), Int64Ty
, "new_hi");
18851 emitLeadingFence(Builder
, CI
, Ord
);
18853 Builder
.CreateCall(IntCmpXchg
, {AlignedAddr
, CmpLo
, CmpHi
, NewLo
, NewHi
});
18854 emitTrailingFence(Builder
, CI
, Ord
);
18855 Value
*Lo
= Builder
.CreateExtractValue(LoHi
, 0, "lo");
18856 Value
*Hi
= Builder
.CreateExtractValue(LoHi
, 1, "hi");
18857 Lo
= Builder
.CreateZExt(Lo
, ValTy
, "lo64");
18858 Hi
= Builder
.CreateZExt(Hi
, ValTy
, "hi64");
18859 return Builder
.CreateOr(
18860 Lo
, Builder
.CreateShl(Hi
, ConstantInt::get(ValTy
, 64)), "val64");