1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SystemZTargetLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
15 #include "SystemZConstantPoolValue.h"
16 #include "SystemZMachineFunctionInfo.h"
17 #include "SystemZTargetMachine.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22 #include "llvm/IR/Intrinsics.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/KnownBits.h"
30 #define DEBUG_TYPE "systemz-lower"
33 // Represents information about a comparison.
35 Comparison(SDValue Op0In
, SDValue Op1In
)
36 : Op0(Op0In
), Op1(Op1In
), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
38 // The operands to the comparison.
41 // The opcode that should be used to compare Op0 and Op1.
44 // A SystemZICMP value. Only used for integer comparisons.
47 // The mask of CC values that Opcode can produce.
50 // The mask of CC values for which the original condition is true.
53 } // end anonymous namespace
55 // Classify VT as either 32 or 64 bit.
56 static bool is32Bit(EVT VT
) {
57 switch (VT
.getSimpleVT().SimpleTy
) {
63 llvm_unreachable("Unsupported type");
67 // Return a version of MachineOperand that can be safely used before the
69 static MachineOperand
earlyUseOperand(MachineOperand Op
) {
75 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine
&TM
,
76 const SystemZSubtarget
&STI
)
77 : TargetLowering(TM
), Subtarget(STI
) {
78 MVT PtrVT
= MVT::getIntegerVT(8 * TM
.getPointerSize(0));
80 // Set up the register classes.
81 if (Subtarget
.hasHighWord())
82 addRegisterClass(MVT::i32
, &SystemZ::GRX32BitRegClass
);
84 addRegisterClass(MVT::i32
, &SystemZ::GR32BitRegClass
);
85 addRegisterClass(MVT::i64
, &SystemZ::GR64BitRegClass
);
86 if (Subtarget
.hasVector()) {
87 addRegisterClass(MVT::f32
, &SystemZ::VR32BitRegClass
);
88 addRegisterClass(MVT::f64
, &SystemZ::VR64BitRegClass
);
90 addRegisterClass(MVT::f32
, &SystemZ::FP32BitRegClass
);
91 addRegisterClass(MVT::f64
, &SystemZ::FP64BitRegClass
);
93 if (Subtarget
.hasVectorEnhancements1())
94 addRegisterClass(MVT::f128
, &SystemZ::VR128BitRegClass
);
96 addRegisterClass(MVT::f128
, &SystemZ::FP128BitRegClass
);
98 if (Subtarget
.hasVector()) {
99 addRegisterClass(MVT::v16i8
, &SystemZ::VR128BitRegClass
);
100 addRegisterClass(MVT::v8i16
, &SystemZ::VR128BitRegClass
);
101 addRegisterClass(MVT::v4i32
, &SystemZ::VR128BitRegClass
);
102 addRegisterClass(MVT::v2i64
, &SystemZ::VR128BitRegClass
);
103 addRegisterClass(MVT::v4f32
, &SystemZ::VR128BitRegClass
);
104 addRegisterClass(MVT::v2f64
, &SystemZ::VR128BitRegClass
);
107 // Compute derived properties from the register classes
108 computeRegisterProperties(Subtarget
.getRegisterInfo());
110 // Set up special registers.
111 setStackPointerRegisterToSaveRestore(SystemZ::R15D
);
113 // TODO: It may be better to default to latency-oriented scheduling, however
114 // LLVM's current latency-oriented scheduler can't handle physreg definitions
115 // such as SystemZ has with CC, so set this to the register-pressure
116 // scheduler, because it can.
117 setSchedulingPreference(Sched::RegPressure
);
119 setBooleanContents(ZeroOrOneBooleanContent
);
120 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
122 // Instructions are strings of 2-byte aligned 2-byte values.
123 setMinFunctionAlignment(2);
124 // For performance reasons we prefer 16-byte alignment.
125 setPrefFunctionAlignment(4);
127 // Handle operations that are handled in a similar way for all types.
128 for (unsigned I
= MVT::FIRST_INTEGER_VALUETYPE
;
129 I
<= MVT::LAST_FP_VALUETYPE
;
131 MVT VT
= MVT::SimpleValueType(I
);
132 if (isTypeLegal(VT
)) {
133 // Lower SET_CC into an IPM-based sequence.
134 setOperationAction(ISD::SETCC
, VT
, Custom
);
136 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
137 setOperationAction(ISD::SELECT
, VT
, Expand
);
139 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
140 setOperationAction(ISD::SELECT_CC
, VT
, Custom
);
141 setOperationAction(ISD::BR_CC
, VT
, Custom
);
145 // Expand jump table branches as address arithmetic followed by an
147 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
149 // Expand BRCOND into a BR_CC (see above).
150 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
152 // Handle integer types.
153 for (unsigned I
= MVT::FIRST_INTEGER_VALUETYPE
;
154 I
<= MVT::LAST_INTEGER_VALUETYPE
;
156 MVT VT
= MVT::SimpleValueType(I
);
157 if (isTypeLegal(VT
)) {
158 // Expand individual DIV and REMs into DIVREMs.
159 setOperationAction(ISD::SDIV
, VT
, Expand
);
160 setOperationAction(ISD::UDIV
, VT
, Expand
);
161 setOperationAction(ISD::SREM
, VT
, Expand
);
162 setOperationAction(ISD::UREM
, VT
, Expand
);
163 setOperationAction(ISD::SDIVREM
, VT
, Custom
);
164 setOperationAction(ISD::UDIVREM
, VT
, Custom
);
166 // Support addition/subtraction with overflow.
167 setOperationAction(ISD::SADDO
, VT
, Custom
);
168 setOperationAction(ISD::SSUBO
, VT
, Custom
);
170 // Support addition/subtraction with carry.
171 setOperationAction(ISD::UADDO
, VT
, Custom
);
172 setOperationAction(ISD::USUBO
, VT
, Custom
);
174 // Support carry in as value rather than glue.
175 setOperationAction(ISD::ADDCARRY
, VT
, Custom
);
176 setOperationAction(ISD::SUBCARRY
, VT
, Custom
);
178 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
179 // stores, putting a serialization instruction after the stores.
180 setOperationAction(ISD::ATOMIC_LOAD
, VT
, Custom
);
181 setOperationAction(ISD::ATOMIC_STORE
, VT
, Custom
);
183 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
184 // available, or if the operand is constant.
185 setOperationAction(ISD::ATOMIC_LOAD_SUB
, VT
, Custom
);
187 // Use POPCNT on z196 and above.
188 if (Subtarget
.hasPopulationCount())
189 setOperationAction(ISD::CTPOP
, VT
, Custom
);
191 setOperationAction(ISD::CTPOP
, VT
, Expand
);
193 // No special instructions for these.
194 setOperationAction(ISD::CTTZ
, VT
, Expand
);
195 setOperationAction(ISD::ROTR
, VT
, Expand
);
197 // Use *MUL_LOHI where possible instead of MULH*.
198 setOperationAction(ISD::MULHS
, VT
, Expand
);
199 setOperationAction(ISD::MULHU
, VT
, Expand
);
200 setOperationAction(ISD::SMUL_LOHI
, VT
, Custom
);
201 setOperationAction(ISD::UMUL_LOHI
, VT
, Custom
);
203 // Only z196 and above have native support for conversions to unsigned.
204 // On z10, promoting to i64 doesn't generate an inexact condition for
205 // values that are outside the i32 range but in the i64 range, so use
206 // the default expansion.
207 if (!Subtarget
.hasFPExtension())
208 setOperationAction(ISD::FP_TO_UINT
, VT
, Expand
);
212 // Type legalization will convert 8- and 16-bit atomic operations into
213 // forms that operate on i32s (but still keeping the original memory VT).
214 // Lower them into full i32 operations.
215 setOperationAction(ISD::ATOMIC_SWAP
, MVT::i32
, Custom
);
216 setOperationAction(ISD::ATOMIC_LOAD_ADD
, MVT::i32
, Custom
);
217 setOperationAction(ISD::ATOMIC_LOAD_SUB
, MVT::i32
, Custom
);
218 setOperationAction(ISD::ATOMIC_LOAD_AND
, MVT::i32
, Custom
);
219 setOperationAction(ISD::ATOMIC_LOAD_OR
, MVT::i32
, Custom
);
220 setOperationAction(ISD::ATOMIC_LOAD_XOR
, MVT::i32
, Custom
);
221 setOperationAction(ISD::ATOMIC_LOAD_NAND
, MVT::i32
, Custom
);
222 setOperationAction(ISD::ATOMIC_LOAD_MIN
, MVT::i32
, Custom
);
223 setOperationAction(ISD::ATOMIC_LOAD_MAX
, MVT::i32
, Custom
);
224 setOperationAction(ISD::ATOMIC_LOAD_UMIN
, MVT::i32
, Custom
);
225 setOperationAction(ISD::ATOMIC_LOAD_UMAX
, MVT::i32
, Custom
);
227 // Even though i128 is not a legal type, we still need to custom lower
228 // the atomic operations in order to exploit SystemZ instructions.
229 setOperationAction(ISD::ATOMIC_LOAD
, MVT::i128
, Custom
);
230 setOperationAction(ISD::ATOMIC_STORE
, MVT::i128
, Custom
);
232 // We can use the CC result of compare-and-swap to implement
233 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
234 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i32
, Custom
);
235 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i64
, Custom
);
236 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i128
, Custom
);
238 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
240 // Traps are legal, as we will convert them to "j .+2".
241 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
243 // z10 has instructions for signed but not unsigned FP conversion.
244 // Handle unsigned 32-bit types as signed 64-bit types.
245 if (!Subtarget
.hasFPExtension()) {
246 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Promote
);
247 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
250 // We have native support for a 64-bit CTLZ, via FLOGR.
251 setOperationAction(ISD::CTLZ
, MVT::i32
, Promote
);
252 setOperationAction(ISD::CTLZ_ZERO_UNDEF
, MVT::i32
, Promote
);
253 setOperationAction(ISD::CTLZ
, MVT::i64
, Legal
);
255 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
256 setOperationAction(ISD::OR
, MVT::i64
, Custom
);
258 // FIXME: Can we support these natively?
259 setOperationAction(ISD::SRL_PARTS
, MVT::i64
, Expand
);
260 setOperationAction(ISD::SHL_PARTS
, MVT::i64
, Expand
);
261 setOperationAction(ISD::SRA_PARTS
, MVT::i64
, Expand
);
263 // We have native instructions for i8, i16 and i32 extensions, but not i1.
264 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
265 for (MVT VT
: MVT::integer_valuetypes()) {
266 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
267 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
268 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i1
, Promote
);
271 // Handle the various types of symbolic address.
272 setOperationAction(ISD::ConstantPool
, PtrVT
, Custom
);
273 setOperationAction(ISD::GlobalAddress
, PtrVT
, Custom
);
274 setOperationAction(ISD::GlobalTLSAddress
, PtrVT
, Custom
);
275 setOperationAction(ISD::BlockAddress
, PtrVT
, Custom
);
276 setOperationAction(ISD::JumpTable
, PtrVT
, Custom
);
278 // We need to handle dynamic allocations specially because of the
279 // 160-byte area at the bottom of the stack.
280 setOperationAction(ISD::DYNAMIC_STACKALLOC
, PtrVT
, Custom
);
281 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, PtrVT
, Custom
);
283 // Use custom expanders so that we can force the function to use
285 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Custom
);
286 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Custom
);
288 // Handle prefetches with PFD or PFDRL.
289 setOperationAction(ISD::PREFETCH
, MVT::Other
, Custom
);
291 for (MVT VT
: MVT::vector_valuetypes()) {
292 // Assume by default that all vector operations need to be expanded.
293 for (unsigned Opcode
= 0; Opcode
< ISD::BUILTIN_OP_END
; ++Opcode
)
294 if (getOperationAction(Opcode
, VT
) == Legal
)
295 setOperationAction(Opcode
, VT
, Expand
);
297 // Likewise all truncating stores and extending loads.
298 for (MVT InnerVT
: MVT::vector_valuetypes()) {
299 setTruncStoreAction(VT
, InnerVT
, Expand
);
300 setLoadExtAction(ISD::SEXTLOAD
, VT
, InnerVT
, Expand
);
301 setLoadExtAction(ISD::ZEXTLOAD
, VT
, InnerVT
, Expand
);
302 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
305 if (isTypeLegal(VT
)) {
306 // These operations are legal for anything that can be stored in a
307 // vector register, even if there is no native support for the format
308 // as such. In particular, we can do these for v4f32 even though there
309 // are no specific instructions for that format.
310 setOperationAction(ISD::LOAD
, VT
, Legal
);
311 setOperationAction(ISD::STORE
, VT
, Legal
);
312 setOperationAction(ISD::VSELECT
, VT
, Legal
);
313 setOperationAction(ISD::BITCAST
, VT
, Legal
);
314 setOperationAction(ISD::UNDEF
, VT
, Legal
);
316 // Likewise, except that we need to replace the nodes with something
318 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
319 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Custom
);
323 // Handle integer vector types.
324 for (MVT VT
: MVT::integer_vector_valuetypes()) {
325 if (isTypeLegal(VT
)) {
326 // These operations have direct equivalents.
327 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Legal
);
328 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Legal
);
329 setOperationAction(ISD::ADD
, VT
, Legal
);
330 setOperationAction(ISD::SUB
, VT
, Legal
);
331 if (VT
!= MVT::v2i64
)
332 setOperationAction(ISD::MUL
, VT
, Legal
);
333 setOperationAction(ISD::AND
, VT
, Legal
);
334 setOperationAction(ISD::OR
, VT
, Legal
);
335 setOperationAction(ISD::XOR
, VT
, Legal
);
336 if (Subtarget
.hasVectorEnhancements1())
337 setOperationAction(ISD::CTPOP
, VT
, Legal
);
339 setOperationAction(ISD::CTPOP
, VT
, Custom
);
340 setOperationAction(ISD::CTTZ
, VT
, Legal
);
341 setOperationAction(ISD::CTLZ
, VT
, Legal
);
343 // Convert a GPR scalar to a vector by inserting it into element 0.
344 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
346 // Use a series of unpacks for extensions.
347 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, VT
, Custom
);
348 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, VT
, Custom
);
350 // Detect shifts by a scalar amount and convert them into
352 setOperationAction(ISD::SHL
, VT
, Custom
);
353 setOperationAction(ISD::SRA
, VT
, Custom
);
354 setOperationAction(ISD::SRL
, VT
, Custom
);
356 // At present ROTL isn't matched by DAGCombiner. ROTR should be
357 // converted into ROTL.
358 setOperationAction(ISD::ROTL
, VT
, Expand
);
359 setOperationAction(ISD::ROTR
, VT
, Expand
);
361 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
362 // and inverting the result as necessary.
363 setOperationAction(ISD::SETCC
, VT
, Custom
);
367 if (Subtarget
.hasVector()) {
368 // There should be no need to check for float types other than v2f64
369 // since <2 x f32> isn't a legal type.
370 setOperationAction(ISD::FP_TO_SINT
, MVT::v2i64
, Legal
);
371 setOperationAction(ISD::FP_TO_SINT
, MVT::v2f64
, Legal
);
372 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Legal
);
373 setOperationAction(ISD::FP_TO_UINT
, MVT::v2f64
, Legal
);
374 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i64
, Legal
);
375 setOperationAction(ISD::SINT_TO_FP
, MVT::v2f64
, Legal
);
376 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Legal
);
377 setOperationAction(ISD::UINT_TO_FP
, MVT::v2f64
, Legal
);
380 // Handle floating-point types.
381 for (unsigned I
= MVT::FIRST_FP_VALUETYPE
;
382 I
<= MVT::LAST_FP_VALUETYPE
;
384 MVT VT
= MVT::SimpleValueType(I
);
385 if (isTypeLegal(VT
)) {
386 // We can use FI for FRINT.
387 setOperationAction(ISD::FRINT
, VT
, Legal
);
389 // We can use the extended form of FI for other rounding operations.
390 if (Subtarget
.hasFPExtension()) {
391 setOperationAction(ISD::FNEARBYINT
, VT
, Legal
);
392 setOperationAction(ISD::FFLOOR
, VT
, Legal
);
393 setOperationAction(ISD::FCEIL
, VT
, Legal
);
394 setOperationAction(ISD::FTRUNC
, VT
, Legal
);
395 setOperationAction(ISD::FROUND
, VT
, Legal
);
398 // No special instructions for these.
399 setOperationAction(ISD::FSIN
, VT
, Expand
);
400 setOperationAction(ISD::FCOS
, VT
, Expand
);
401 setOperationAction(ISD::FSINCOS
, VT
, Expand
);
402 setOperationAction(ISD::FREM
, VT
, Expand
);
403 setOperationAction(ISD::FPOW
, VT
, Expand
);
407 // Handle floating-point vector types.
408 if (Subtarget
.hasVector()) {
409 // Scalar-to-vector conversion is just a subreg.
410 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
411 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2f64
, Legal
);
413 // Some insertions and extractions can be done directly but others
414 // need to go via integers.
415 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
416 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v2f64
, Custom
);
417 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Custom
);
418 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Custom
);
420 // These operations have direct equivalents.
421 setOperationAction(ISD::FADD
, MVT::v2f64
, Legal
);
422 setOperationAction(ISD::FNEG
, MVT::v2f64
, Legal
);
423 setOperationAction(ISD::FSUB
, MVT::v2f64
, Legal
);
424 setOperationAction(ISD::FMUL
, MVT::v2f64
, Legal
);
425 setOperationAction(ISD::FMA
, MVT::v2f64
, Legal
);
426 setOperationAction(ISD::FDIV
, MVT::v2f64
, Legal
);
427 setOperationAction(ISD::FABS
, MVT::v2f64
, Legal
);
428 setOperationAction(ISD::FSQRT
, MVT::v2f64
, Legal
);
429 setOperationAction(ISD::FRINT
, MVT::v2f64
, Legal
);
430 setOperationAction(ISD::FNEARBYINT
, MVT::v2f64
, Legal
);
431 setOperationAction(ISD::FFLOOR
, MVT::v2f64
, Legal
);
432 setOperationAction(ISD::FCEIL
, MVT::v2f64
, Legal
);
433 setOperationAction(ISD::FTRUNC
, MVT::v2f64
, Legal
);
434 setOperationAction(ISD::FROUND
, MVT::v2f64
, Legal
);
437 // The vector enhancements facility 1 has instructions for these.
438 if (Subtarget
.hasVectorEnhancements1()) {
439 setOperationAction(ISD::FADD
, MVT::v4f32
, Legal
);
440 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
441 setOperationAction(ISD::FSUB
, MVT::v4f32
, Legal
);
442 setOperationAction(ISD::FMUL
, MVT::v4f32
, Legal
);
443 setOperationAction(ISD::FMA
, MVT::v4f32
, Legal
);
444 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
445 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
446 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
447 setOperationAction(ISD::FRINT
, MVT::v4f32
, Legal
);
448 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Legal
);
449 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
450 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
451 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
452 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
454 setOperationAction(ISD::FMAXNUM
, MVT::f64
, Legal
);
455 setOperationAction(ISD::FMAXIMUM
, MVT::f64
, Legal
);
456 setOperationAction(ISD::FMINNUM
, MVT::f64
, Legal
);
457 setOperationAction(ISD::FMINIMUM
, MVT::f64
, Legal
);
459 setOperationAction(ISD::FMAXNUM
, MVT::v2f64
, Legal
);
460 setOperationAction(ISD::FMAXIMUM
, MVT::v2f64
, Legal
);
461 setOperationAction(ISD::FMINNUM
, MVT::v2f64
, Legal
);
462 setOperationAction(ISD::FMINIMUM
, MVT::v2f64
, Legal
);
464 setOperationAction(ISD::FMAXNUM
, MVT::f32
, Legal
);
465 setOperationAction(ISD::FMAXIMUM
, MVT::f32
, Legal
);
466 setOperationAction(ISD::FMINNUM
, MVT::f32
, Legal
);
467 setOperationAction(ISD::FMINIMUM
, MVT::f32
, Legal
);
469 setOperationAction(ISD::FMAXNUM
, MVT::v4f32
, Legal
);
470 setOperationAction(ISD::FMAXIMUM
, MVT::v4f32
, Legal
);
471 setOperationAction(ISD::FMINNUM
, MVT::v4f32
, Legal
);
472 setOperationAction(ISD::FMINIMUM
, MVT::v4f32
, Legal
);
474 setOperationAction(ISD::FMAXNUM
, MVT::f128
, Legal
);
475 setOperationAction(ISD::FMAXIMUM
, MVT::f128
, Legal
);
476 setOperationAction(ISD::FMINNUM
, MVT::f128
, Legal
);
477 setOperationAction(ISD::FMINIMUM
, MVT::f128
, Legal
);
480 // We have fused multiply-addition for f32 and f64 but not f128.
481 setOperationAction(ISD::FMA
, MVT::f32
, Legal
);
482 setOperationAction(ISD::FMA
, MVT::f64
, Legal
);
483 if (Subtarget
.hasVectorEnhancements1())
484 setOperationAction(ISD::FMA
, MVT::f128
, Legal
);
486 setOperationAction(ISD::FMA
, MVT::f128
, Expand
);
488 // We don't have a copysign instruction on vector registers.
489 if (Subtarget
.hasVectorEnhancements1())
490 setOperationAction(ISD::FCOPYSIGN
, MVT::f128
, Expand
);
492 // Needed so that we don't try to implement f128 constant loads using
493 // a load-and-extend of a f80 constant (in cases where the constant
494 // would fit in an f80).
495 for (MVT VT
: MVT::fp_valuetypes())
496 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::f80
, Expand
);
498 // We don't have extending load instruction on vector registers.
499 if (Subtarget
.hasVectorEnhancements1()) {
500 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, MVT::f32
, Expand
);
501 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, MVT::f64
, Expand
);
504 // Floating-point truncation and stores need to be done separately.
505 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
506 setTruncStoreAction(MVT::f128
, MVT::f32
, Expand
);
507 setTruncStoreAction(MVT::f128
, MVT::f64
, Expand
);
509 // We have 64-bit FPR<->GPR moves, but need special handling for
511 if (!Subtarget
.hasVector()) {
512 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
513 setOperationAction(ISD::BITCAST
, MVT::f32
, Custom
);
516 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
517 // structure, but VAEND is a no-op.
518 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
519 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
520 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
522 // Codes for which we want to perform some z-specific combinations.
523 setTargetDAGCombine(ISD::ZERO_EXTEND
);
524 setTargetDAGCombine(ISD::SIGN_EXTEND
);
525 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG
);
526 setTargetDAGCombine(ISD::LOAD
);
527 setTargetDAGCombine(ISD::STORE
);
528 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT
);
529 setTargetDAGCombine(ISD::FP_ROUND
);
530 setTargetDAGCombine(ISD::FP_EXTEND
);
531 setTargetDAGCombine(ISD::BSWAP
);
532 setTargetDAGCombine(ISD::SDIV
);
533 setTargetDAGCombine(ISD::UDIV
);
534 setTargetDAGCombine(ISD::SREM
);
535 setTargetDAGCombine(ISD::UREM
);
537 // Handle intrinsics.
538 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
539 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
541 // We want to use MVC in preference to even a single load/store pair.
542 MaxStoresPerMemcpy
= 0;
543 MaxStoresPerMemcpyOptSize
= 0;
545 // The main memset sequence is a byte store followed by an MVC.
546 // Two STC or MV..I stores win over that, but the kind of fused stores
547 // generated by target-independent code don't when the byte value is
548 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
549 // than "STC;MVC". Handle the choice in target-specific code instead.
550 MaxStoresPerMemset
= 0;
551 MaxStoresPerMemsetOptSize
= 0;
554 EVT
SystemZTargetLowering::getSetCCResultType(const DataLayout
&DL
,
555 LLVMContext
&, EVT VT
) const {
558 return VT
.changeVectorElementTypeToInteger();
561 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT
) const {
562 VT
= VT
.getScalarType();
567 switch (VT
.getSimpleVT().SimpleTy
) {
572 return Subtarget
.hasVectorEnhancements1();
581 // Return true if Imm can be generated with a vector instruction, such as VGM.
582 bool SystemZTargetLowering::
583 analyzeFPImm(const APFloat
&Imm
, unsigned BitWidth
, unsigned &Start
,
584 unsigned &End
, const SystemZInstrInfo
*TII
) {
585 APInt IntImm
= Imm
.bitcastToAPInt();
586 if (IntImm
.getActiveBits() > 64)
589 // See if this immediate could be generated with VGM.
590 bool Success
= TII
->isRxSBGMask(IntImm
.getZExtValue(), BitWidth
, Start
, End
);
593 // isRxSBGMask returns the bit numbers for a full 64-bit value,
594 // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
595 // bit numbers for an BitsPerElement value, so that 0 denotes
596 // 1 << (BitsPerElement-1).
597 Start
-= 64 - BitWidth
;
598 End
-= 64 - BitWidth
;
602 bool SystemZTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
) const {
603 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
604 if (Imm
.isZero() || Imm
.isNegZero())
607 if (!Subtarget
.hasVector())
609 const SystemZInstrInfo
*TII
=
610 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
612 return analyzeFPImm(Imm
, VT
.getSizeInBits(), Start
, End
, TII
);
615 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
616 // We can use CGFI or CLGFI.
617 return isInt
<32>(Imm
) || isUInt
<32>(Imm
);
620 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
621 // We can use ALGFI or SLGFI.
622 return isUInt
<32>(Imm
) || isUInt
<32>(-Imm
);
625 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT
,
629 // Unaligned accesses should never be slower than the expanded version.
630 // We check specifically for aligned accesses in the few cases where
631 // they are required.
637 // Information about the addressing mode for a memory access.
638 struct AddressingMode
{
639 // True if a long displacement is supported.
640 bool LongDisplacement
;
642 // True if use of index register is supported.
645 AddressingMode(bool LongDispl
, bool IdxReg
) :
646 LongDisplacement(LongDispl
), IndexReg(IdxReg
) {}
649 // Return the desired addressing mode for a Load which has only one use (in
650 // the same block) which is a Store.
651 static AddressingMode
getLoadStoreAddrMode(bool HasVector
,
653 // With vector support a Load->Store combination may be combined to either
654 // an MVC or vector operations and it seems to work best to allow the
655 // vector addressing mode.
657 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
659 // Otherwise only the MVC case is special.
660 bool MVC
= Ty
->isIntegerTy(8);
661 return AddressingMode(!MVC
/*LongDispl*/, !MVC
/*IdxReg*/);
664 // Return the addressing mode which seems most desirable given an LLVM
665 // Instruction pointer.
666 static AddressingMode
667 supportedAddressingMode(Instruction
*I
, bool HasVector
) {
668 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
669 switch (II
->getIntrinsicID()) {
671 case Intrinsic::memset
:
672 case Intrinsic::memmove
:
673 case Intrinsic::memcpy
:
674 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
678 if (isa
<LoadInst
>(I
) && I
->hasOneUse()) {
679 auto *SingleUser
= dyn_cast
<Instruction
>(*I
->user_begin());
680 if (SingleUser
->getParent() == I
->getParent()) {
681 if (isa
<ICmpInst
>(SingleUser
)) {
682 if (auto *C
= dyn_cast
<ConstantInt
>(SingleUser
->getOperand(1)))
683 if (C
->getBitWidth() <= 64 &&
684 (isInt
<16>(C
->getSExtValue()) || isUInt
<16>(C
->getZExtValue())))
685 // Comparison of memory with 16 bit signed / unsigned immediate
686 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
687 } else if (isa
<StoreInst
>(SingleUser
))
689 return getLoadStoreAddrMode(HasVector
, I
->getType());
691 } else if (auto *StoreI
= dyn_cast
<StoreInst
>(I
)) {
692 if (auto *LoadI
= dyn_cast
<LoadInst
>(StoreI
->getValueOperand()))
693 if (LoadI
->hasOneUse() && LoadI
->getParent() == I
->getParent())
695 return getLoadStoreAddrMode(HasVector
, LoadI
->getType());
698 if (HasVector
&& (isa
<LoadInst
>(I
) || isa
<StoreInst
>(I
))) {
700 // * Use LDE instead of LE/LEY for z13 to avoid partial register
701 // dependencies (LDE only supports small offsets).
702 // * Utilize the vector registers to hold floating point
703 // values (vector load / store instructions only support small
706 Type
*MemAccessTy
= (isa
<LoadInst
>(I
) ? I
->getType() :
707 I
->getOperand(0)->getType());
708 bool IsFPAccess
= MemAccessTy
->isFloatingPointTy();
709 bool IsVectorAccess
= MemAccessTy
->isVectorTy();
711 // A store of an extracted vector element will be combined into a VSTE type
713 if (!IsVectorAccess
&& isa
<StoreInst
>(I
)) {
714 Value
*DataOp
= I
->getOperand(0);
715 if (isa
<ExtractElementInst
>(DataOp
))
716 IsVectorAccess
= true;
719 // A load which gets inserted into a vector element will be combined into a
720 // VLE type instruction.
721 if (!IsVectorAccess
&& isa
<LoadInst
>(I
) && I
->hasOneUse()) {
722 User
*LoadUser
= *I
->user_begin();
723 if (isa
<InsertElementInst
>(LoadUser
))
724 IsVectorAccess
= true;
727 if (IsFPAccess
|| IsVectorAccess
)
728 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
731 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
734 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
735 const AddrMode
&AM
, Type
*Ty
, unsigned AS
, Instruction
*I
) const {
736 // Punt on globals for now, although they can be used in limited
737 // RELATIVE LONG cases.
741 // Require a 20-bit signed offset.
742 if (!isInt
<20>(AM
.BaseOffs
))
745 AddressingMode
SupportedAM(true, true);
747 SupportedAM
= supportedAddressingMode(I
, Subtarget
.hasVector());
749 if (!SupportedAM
.LongDisplacement
&& !isUInt
<12>(AM
.BaseOffs
))
752 if (!SupportedAM
.IndexReg
)
753 // No indexing allowed.
754 return AM
.Scale
== 0;
756 // Indexing is OK but no scale factor can be applied.
757 return AM
.Scale
== 0 || AM
.Scale
== 1;
760 bool SystemZTargetLowering::isTruncateFree(Type
*FromType
, Type
*ToType
) const {
761 if (!FromType
->isIntegerTy() || !ToType
->isIntegerTy())
763 unsigned FromBits
= FromType
->getPrimitiveSizeInBits();
764 unsigned ToBits
= ToType
->getPrimitiveSizeInBits();
765 return FromBits
> ToBits
;
768 bool SystemZTargetLowering::isTruncateFree(EVT FromVT
, EVT ToVT
) const {
769 if (!FromVT
.isInteger() || !ToVT
.isInteger())
771 unsigned FromBits
= FromVT
.getSizeInBits();
772 unsigned ToBits
= ToVT
.getSizeInBits();
773 return FromBits
> ToBits
;
776 //===----------------------------------------------------------------------===//
777 // Inline asm support
778 //===----------------------------------------------------------------------===//
780 TargetLowering::ConstraintType
781 SystemZTargetLowering::getConstraintType(StringRef Constraint
) const {
782 if (Constraint
.size() == 1) {
783 switch (Constraint
[0]) {
784 case 'a': // Address register
785 case 'd': // Data register (equivalent to 'r')
786 case 'f': // Floating-point register
787 case 'h': // High-part register
788 case 'r': // General-purpose register
789 case 'v': // Vector register
790 return C_RegisterClass
;
792 case 'Q': // Memory with base and unsigned 12-bit displacement
793 case 'R': // Likewise, plus an index
794 case 'S': // Memory with base and signed 20-bit displacement
795 case 'T': // Likewise, plus an index
796 case 'm': // Equivalent to 'T'.
799 case 'I': // Unsigned 8-bit constant
800 case 'J': // Unsigned 12-bit constant
801 case 'K': // Signed 16-bit constant
802 case 'L': // Signed 20-bit displacement (on all targets we support)
803 case 'M': // 0x7fffffff
810 return TargetLowering::getConstraintType(Constraint
);
813 TargetLowering::ConstraintWeight
SystemZTargetLowering::
814 getSingleConstraintMatchWeight(AsmOperandInfo
&info
,
815 const char *constraint
) const {
816 ConstraintWeight weight
= CW_Invalid
;
817 Value
*CallOperandVal
= info
.CallOperandVal
;
818 // If we don't have a value, we can't do a match,
819 // but allow it at the lowest weight.
822 Type
*type
= CallOperandVal
->getType();
823 // Look at the constraint type.
824 switch (*constraint
) {
826 weight
= TargetLowering::getSingleConstraintMatchWeight(info
, constraint
);
829 case 'a': // Address register
830 case 'd': // Data register (equivalent to 'r')
831 case 'h': // High-part register
832 case 'r': // General-purpose register
833 if (CallOperandVal
->getType()->isIntegerTy())
834 weight
= CW_Register
;
837 case 'f': // Floating-point register
838 if (type
->isFloatingPointTy())
839 weight
= CW_Register
;
842 case 'v': // Vector register
843 if ((type
->isVectorTy() || type
->isFloatingPointTy()) &&
844 Subtarget
.hasVector())
845 weight
= CW_Register
;
848 case 'I': // Unsigned 8-bit constant
849 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
850 if (isUInt
<8>(C
->getZExtValue()))
851 weight
= CW_Constant
;
854 case 'J': // Unsigned 12-bit constant
855 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
856 if (isUInt
<12>(C
->getZExtValue()))
857 weight
= CW_Constant
;
860 case 'K': // Signed 16-bit constant
861 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
862 if (isInt
<16>(C
->getSExtValue()))
863 weight
= CW_Constant
;
866 case 'L': // Signed 20-bit displacement (on all targets we support)
867 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
868 if (isInt
<20>(C
->getSExtValue()))
869 weight
= CW_Constant
;
872 case 'M': // 0x7fffffff
873 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
874 if (C
->getZExtValue() == 0x7fffffff)
875 weight
= CW_Constant
;
881 // Parse a "{tNNN}" register constraint for which the register type "t"
882 // has already been verified. MC is the class associated with "t" and
883 // Map maps 0-based register numbers to LLVM register numbers.
884 static std::pair
<unsigned, const TargetRegisterClass
*>
885 parseRegisterNumber(StringRef Constraint
, const TargetRegisterClass
*RC
,
886 const unsigned *Map
, unsigned Size
) {
887 assert(*(Constraint
.end()-1) == '}' && "Missing '}'");
888 if (isdigit(Constraint
[2])) {
891 Constraint
.slice(2, Constraint
.size() - 1).getAsInteger(10, Index
);
892 if (!Failed
&& Index
< Size
&& Map
[Index
])
893 return std::make_pair(Map
[Index
], RC
);
895 return std::make_pair(0U, nullptr);
898 std::pair
<unsigned, const TargetRegisterClass
*>
899 SystemZTargetLowering::getRegForInlineAsmConstraint(
900 const TargetRegisterInfo
*TRI
, StringRef Constraint
, MVT VT
) const {
901 if (Constraint
.size() == 1) {
902 // GCC Constraint Letters
903 switch (Constraint
[0]) {
905 case 'd': // Data register (equivalent to 'r')
906 case 'r': // General-purpose register
908 return std::make_pair(0U, &SystemZ::GR64BitRegClass
);
909 else if (VT
== MVT::i128
)
910 return std::make_pair(0U, &SystemZ::GR128BitRegClass
);
911 return std::make_pair(0U, &SystemZ::GR32BitRegClass
);
913 case 'a': // Address register
915 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass
);
916 else if (VT
== MVT::i128
)
917 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass
);
918 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass
);
920 case 'h': // High-part register (an LLVM extension)
921 return std::make_pair(0U, &SystemZ::GRH32BitRegClass
);
923 case 'f': // Floating-point register
925 return std::make_pair(0U, &SystemZ::FP64BitRegClass
);
926 else if (VT
== MVT::f128
)
927 return std::make_pair(0U, &SystemZ::FP128BitRegClass
);
928 return std::make_pair(0U, &SystemZ::FP32BitRegClass
);
930 case 'v': // Vector register
931 if (Subtarget
.hasVector()) {
933 return std::make_pair(0U, &SystemZ::VR32BitRegClass
);
935 return std::make_pair(0U, &SystemZ::VR64BitRegClass
);
936 return std::make_pair(0U, &SystemZ::VR128BitRegClass
);
941 if (Constraint
.size() > 0 && Constraint
[0] == '{') {
942 // We need to override the default register parsing for GPRs and FPRs
943 // because the interpretation depends on VT. The internal names of
944 // the registers are also different from the external names
945 // (F0D and F0S instead of F0, etc.).
946 if (Constraint
[1] == 'r') {
948 return parseRegisterNumber(Constraint
, &SystemZ::GR32BitRegClass
,
949 SystemZMC::GR32Regs
, 16);
951 return parseRegisterNumber(Constraint
, &SystemZ::GR128BitRegClass
,
952 SystemZMC::GR128Regs
, 16);
953 return parseRegisterNumber(Constraint
, &SystemZ::GR64BitRegClass
,
954 SystemZMC::GR64Regs
, 16);
956 if (Constraint
[1] == 'f') {
958 return parseRegisterNumber(Constraint
, &SystemZ::FP32BitRegClass
,
959 SystemZMC::FP32Regs
, 16);
961 return parseRegisterNumber(Constraint
, &SystemZ::FP128BitRegClass
,
962 SystemZMC::FP128Regs
, 16);
963 return parseRegisterNumber(Constraint
, &SystemZ::FP64BitRegClass
,
964 SystemZMC::FP64Regs
, 16);
966 if (Constraint
[1] == 'v') {
968 return parseRegisterNumber(Constraint
, &SystemZ::VR32BitRegClass
,
969 SystemZMC::VR32Regs
, 32);
971 return parseRegisterNumber(Constraint
, &SystemZ::VR64BitRegClass
,
972 SystemZMC::VR64Regs
, 32);
973 return parseRegisterNumber(Constraint
, &SystemZ::VR128BitRegClass
,
974 SystemZMC::VR128Regs
, 32);
977 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
980 void SystemZTargetLowering::
981 LowerAsmOperandForConstraint(SDValue Op
, std::string
&Constraint
,
982 std::vector
<SDValue
> &Ops
,
983 SelectionDAG
&DAG
) const {
984 // Only support length 1 constraints for now.
985 if (Constraint
.length() == 1) {
986 switch (Constraint
[0]) {
987 case 'I': // Unsigned 8-bit constant
988 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
989 if (isUInt
<8>(C
->getZExtValue()))
990 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
994 case 'J': // Unsigned 12-bit constant
995 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
996 if (isUInt
<12>(C
->getZExtValue()))
997 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
1001 case 'K': // Signed 16-bit constant
1002 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1003 if (isInt
<16>(C
->getSExtValue()))
1004 Ops
.push_back(DAG
.getTargetConstant(C
->getSExtValue(), SDLoc(Op
),
1005 Op
.getValueType()));
1008 case 'L': // Signed 20-bit displacement (on all targets we support)
1009 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1010 if (isInt
<20>(C
->getSExtValue()))
1011 Ops
.push_back(DAG
.getTargetConstant(C
->getSExtValue(), SDLoc(Op
),
1012 Op
.getValueType()));
1015 case 'M': // 0x7fffffff
1016 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1017 if (C
->getZExtValue() == 0x7fffffff)
1018 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
1019 Op
.getValueType()));
1023 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
1026 //===----------------------------------------------------------------------===//
1027 // Calling conventions
1028 //===----------------------------------------------------------------------===//
1030 #include "SystemZGenCallingConv.inc"
1032 const MCPhysReg
*SystemZTargetLowering::getScratchRegisters(
1033 CallingConv::ID
) const {
1034 static const MCPhysReg ScratchRegs
[] = { SystemZ::R0D
, SystemZ::R1D
,
1039 bool SystemZTargetLowering::allowTruncateForTailCall(Type
*FromType
,
1040 Type
*ToType
) const {
1041 return isTruncateFree(FromType
, ToType
);
1044 bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
1045 return CI
->isTailCall();
1048 // We do not yet support 128-bit single-element vector types. If the user
1049 // attempts to use such types as function argument or return type, prefer
1050 // to error out instead of emitting code violating the ABI.
1051 static void VerifyVectorType(MVT VT
, EVT ArgVT
) {
1052 if (ArgVT
.isVector() && !VT
.isVector())
1053 report_fatal_error("Unsupported vector argument or return type");
1056 static void VerifyVectorTypes(const SmallVectorImpl
<ISD::InputArg
> &Ins
) {
1057 for (unsigned i
= 0; i
< Ins
.size(); ++i
)
1058 VerifyVectorType(Ins
[i
].VT
, Ins
[i
].ArgVT
);
1061 static void VerifyVectorTypes(const SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
1062 for (unsigned i
= 0; i
< Outs
.size(); ++i
)
1063 VerifyVectorType(Outs
[i
].VT
, Outs
[i
].ArgVT
);
1066 // Value is a value that has been passed to us in the location described by VA
1067 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1068 // any loads onto Chain.
1069 static SDValue
convertLocVTToValVT(SelectionDAG
&DAG
, const SDLoc
&DL
,
1070 CCValAssign
&VA
, SDValue Chain
,
1072 // If the argument has been promoted from a smaller type, insert an
1073 // assertion to capture this.
1074 if (VA
.getLocInfo() == CCValAssign::SExt
)
1075 Value
= DAG
.getNode(ISD::AssertSext
, DL
, VA
.getLocVT(), Value
,
1076 DAG
.getValueType(VA
.getValVT()));
1077 else if (VA
.getLocInfo() == CCValAssign::ZExt
)
1078 Value
= DAG
.getNode(ISD::AssertZext
, DL
, VA
.getLocVT(), Value
,
1079 DAG
.getValueType(VA
.getValVT()));
1081 if (VA
.isExtInLoc())
1082 Value
= DAG
.getNode(ISD::TRUNCATE
, DL
, VA
.getValVT(), Value
);
1083 else if (VA
.getLocInfo() == CCValAssign::BCvt
) {
1084 // If this is a short vector argument loaded from the stack,
1085 // extend from i64 to full vector size and then bitcast.
1086 assert(VA
.getLocVT() == MVT::i64
);
1087 assert(VA
.getValVT().isVector());
1088 Value
= DAG
.getBuildVector(MVT::v2i64
, DL
, {Value
, DAG
.getUNDEF(MVT::i64
)});
1089 Value
= DAG
.getNode(ISD::BITCAST
, DL
, VA
.getValVT(), Value
);
1091 assert(VA
.getLocInfo() == CCValAssign::Full
&& "Unsupported getLocInfo");
1095 // Value is a value of type VA.getValVT() that we need to copy into
1096 // the location described by VA. Return a copy of Value converted to
1097 // VA.getValVT(). The caller is responsible for handling indirect values.
1098 static SDValue
convertValVTToLocVT(SelectionDAG
&DAG
, const SDLoc
&DL
,
1099 CCValAssign
&VA
, SDValue Value
) {
1100 switch (VA
.getLocInfo()) {
1101 case CCValAssign::SExt
:
1102 return DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VA
.getLocVT(), Value
);
1103 case CCValAssign::ZExt
:
1104 return DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VA
.getLocVT(), Value
);
1105 case CCValAssign::AExt
:
1106 return DAG
.getNode(ISD::ANY_EXTEND
, DL
, VA
.getLocVT(), Value
);
1107 case CCValAssign::BCvt
:
1108 // If this is a short vector argument to be stored to the stack,
1109 // bitcast to v2i64 and then extract first element.
1110 assert(VA
.getLocVT() == MVT::i64
);
1111 assert(VA
.getValVT().isVector());
1112 Value
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Value
);
1113 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VA
.getLocVT(), Value
,
1114 DAG
.getConstant(0, DL
, MVT::i32
));
1115 case CCValAssign::Full
:
1118 llvm_unreachable("Unhandled getLocInfo()");
1122 SDValue
SystemZTargetLowering::LowerFormalArguments(
1123 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
1124 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
1125 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
1126 MachineFunction
&MF
= DAG
.getMachineFunction();
1127 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1128 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1129 SystemZMachineFunctionInfo
*FuncInfo
=
1130 MF
.getInfo
<SystemZMachineFunctionInfo
>();
1132 static_cast<const SystemZFrameLowering
*>(Subtarget
.getFrameLowering());
1133 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1135 // Detect unsupported vector argument types.
1136 if (Subtarget
.hasVector())
1137 VerifyVectorTypes(Ins
);
1139 // Assign locations to all of the incoming arguments.
1140 SmallVector
<CCValAssign
, 16> ArgLocs
;
1141 SystemZCCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1142 CCInfo
.AnalyzeFormalArguments(Ins
, CC_SystemZ
);
1144 unsigned NumFixedGPRs
= 0;
1145 unsigned NumFixedFPRs
= 0;
1146 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1148 CCValAssign
&VA
= ArgLocs
[I
];
1149 EVT LocVT
= VA
.getLocVT();
1150 if (VA
.isRegLoc()) {
1151 // Arguments passed in registers
1152 const TargetRegisterClass
*RC
;
1153 switch (LocVT
.getSimpleVT().SimpleTy
) {
1155 // Integers smaller than i64 should be promoted to i64.
1156 llvm_unreachable("Unexpected argument type");
1159 RC
= &SystemZ::GR32BitRegClass
;
1163 RC
= &SystemZ::GR64BitRegClass
;
1167 RC
= &SystemZ::FP32BitRegClass
;
1171 RC
= &SystemZ::FP64BitRegClass
;
1179 RC
= &SystemZ::VR128BitRegClass
;
1183 unsigned VReg
= MRI
.createVirtualRegister(RC
);
1184 MRI
.addLiveIn(VA
.getLocReg(), VReg
);
1185 ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, LocVT
);
1187 assert(VA
.isMemLoc() && "Argument not register or memory");
1189 // Create the frame index object for this incoming parameter.
1190 int FI
= MFI
.CreateFixedObject(LocVT
.getSizeInBits() / 8,
1191 VA
.getLocMemOffset(), true);
1193 // Create the SelectionDAG nodes corresponding to a load
1194 // from this parameter. Unpromoted ints and floats are
1195 // passed as right-justified 8-byte values.
1196 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
1197 if (VA
.getLocVT() == MVT::i32
|| VA
.getLocVT() == MVT::f32
)
1198 FIN
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, FIN
,
1199 DAG
.getIntPtrConstant(4, DL
));
1200 ArgValue
= DAG
.getLoad(LocVT
, DL
, Chain
, FIN
,
1201 MachinePointerInfo::getFixedStack(MF
, FI
));
1204 // Convert the value of the argument register into the value that's
1206 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
1207 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), DL
, Chain
, ArgValue
,
1208 MachinePointerInfo()));
1209 // If the original argument was split (e.g. i128), we need
1210 // to load all parts of it here (using the same address).
1211 unsigned ArgIndex
= Ins
[I
].OrigArgIndex
;
1212 assert (Ins
[I
].PartOffset
== 0);
1213 while (I
+ 1 != E
&& Ins
[I
+ 1].OrigArgIndex
== ArgIndex
) {
1214 CCValAssign
&PartVA
= ArgLocs
[I
+ 1];
1215 unsigned PartOffset
= Ins
[I
+ 1].PartOffset
;
1216 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, ArgValue
,
1217 DAG
.getIntPtrConstant(PartOffset
, DL
));
1218 InVals
.push_back(DAG
.getLoad(PartVA
.getValVT(), DL
, Chain
, Address
,
1219 MachinePointerInfo()));
1223 InVals
.push_back(convertLocVTToValVT(DAG
, DL
, VA
, Chain
, ArgValue
));
1227 // Save the number of non-varargs registers for later use by va_start, etc.
1228 FuncInfo
->setVarArgsFirstGPR(NumFixedGPRs
);
1229 FuncInfo
->setVarArgsFirstFPR(NumFixedFPRs
);
1231 // Likewise the address (in the form of a frame index) of where the
1232 // first stack vararg would be. The 1-byte size here is arbitrary.
1233 int64_t StackSize
= CCInfo
.getNextStackOffset();
1234 FuncInfo
->setVarArgsFrameIndex(MFI
.CreateFixedObject(1, StackSize
, true));
1236 // ...and a similar frame index for the caller-allocated save area
1237 // that will be used to store the incoming registers.
1238 int64_t RegSaveOffset
= TFL
->getOffsetOfLocalArea();
1239 unsigned RegSaveIndex
= MFI
.CreateFixedObject(1, RegSaveOffset
, true);
1240 FuncInfo
->setRegSaveFrameIndex(RegSaveIndex
);
1242 // Store the FPR varargs in the reserved frame slots. (We store the
1243 // GPRs as part of the prologue.)
1244 if (NumFixedFPRs
< SystemZ::NumArgFPRs
) {
1245 SDValue MemOps
[SystemZ::NumArgFPRs
];
1246 for (unsigned I
= NumFixedFPRs
; I
< SystemZ::NumArgFPRs
; ++I
) {
1247 unsigned Offset
= TFL
->getRegSpillOffset(SystemZ::ArgFPRs
[I
]);
1248 int FI
= MFI
.CreateFixedObject(8, RegSaveOffset
+ Offset
, true);
1249 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
1250 unsigned VReg
= MF
.addLiveIn(SystemZ::ArgFPRs
[I
],
1251 &SystemZ::FP64BitRegClass
);
1252 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, MVT::f64
);
1253 MemOps
[I
] = DAG
.getStore(ArgValue
.getValue(1), DL
, ArgValue
, FIN
,
1254 MachinePointerInfo::getFixedStack(MF
, FI
));
1256 // Join the stores, which are independent of one another.
1257 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
,
1258 makeArrayRef(&MemOps
[NumFixedFPRs
],
1259 SystemZ::NumArgFPRs
-NumFixedFPRs
));
1266 static bool canUseSiblingCall(const CCState
&ArgCCInfo
,
1267 SmallVectorImpl
<CCValAssign
> &ArgLocs
,
1268 SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
1269 // Punt if there are any indirect or stack arguments, or if the call
1270 // needs the callee-saved argument register R6, or if the call uses
1271 // the callee-saved register arguments SwiftSelf and SwiftError.
1272 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1273 CCValAssign
&VA
= ArgLocs
[I
];
1274 if (VA
.getLocInfo() == CCValAssign::Indirect
)
1278 unsigned Reg
= VA
.getLocReg();
1279 if (Reg
== SystemZ::R6H
|| Reg
== SystemZ::R6L
|| Reg
== SystemZ::R6D
)
1281 if (Outs
[I
].Flags
.isSwiftSelf() || Outs
[I
].Flags
.isSwiftError())
1288 SystemZTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
1289 SmallVectorImpl
<SDValue
> &InVals
) const {
1290 SelectionDAG
&DAG
= CLI
.DAG
;
1292 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
1293 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
1294 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
1295 SDValue Chain
= CLI
.Chain
;
1296 SDValue Callee
= CLI
.Callee
;
1297 bool &IsTailCall
= CLI
.IsTailCall
;
1298 CallingConv::ID CallConv
= CLI
.CallConv
;
1299 bool IsVarArg
= CLI
.IsVarArg
;
1300 MachineFunction
&MF
= DAG
.getMachineFunction();
1301 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
1303 // Detect unsupported vector argument and return types.
1304 if (Subtarget
.hasVector()) {
1305 VerifyVectorTypes(Outs
);
1306 VerifyVectorTypes(Ins
);
1309 // Analyze the operands of the call, assigning locations to each operand.
1310 SmallVector
<CCValAssign
, 16> ArgLocs
;
1311 SystemZCCState
ArgCCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1312 ArgCCInfo
.AnalyzeCallOperands(Outs
, CC_SystemZ
);
1314 // We don't support GuaranteedTailCallOpt, only automatically-detected
1316 if (IsTailCall
&& !canUseSiblingCall(ArgCCInfo
, ArgLocs
, Outs
))
1319 // Get a count of how many bytes are to be pushed on the stack.
1320 unsigned NumBytes
= ArgCCInfo
.getNextStackOffset();
1322 // Mark the start of the call.
1324 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, DL
);
1326 // Copy argument values to their designated locations.
1327 SmallVector
<std::pair
<unsigned, SDValue
>, 9> RegsToPass
;
1328 SmallVector
<SDValue
, 8> MemOpChains
;
1330 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1331 CCValAssign
&VA
= ArgLocs
[I
];
1332 SDValue ArgValue
= OutVals
[I
];
1334 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
1335 // Store the argument in a stack slot and pass its address.
1336 SDValue SpillSlot
= DAG
.CreateStackTemporary(Outs
[I
].ArgVT
);
1337 int FI
= cast
<FrameIndexSDNode
>(SpillSlot
)->getIndex();
1338 MemOpChains
.push_back(
1339 DAG
.getStore(Chain
, DL
, ArgValue
, SpillSlot
,
1340 MachinePointerInfo::getFixedStack(MF
, FI
)));
1341 // If the original argument was split (e.g. i128), we need
1342 // to store all parts of it here (and pass just one address).
1343 unsigned ArgIndex
= Outs
[I
].OrigArgIndex
;
1344 assert (Outs
[I
].PartOffset
== 0);
1345 while (I
+ 1 != E
&& Outs
[I
+ 1].OrigArgIndex
== ArgIndex
) {
1346 SDValue PartValue
= OutVals
[I
+ 1];
1347 unsigned PartOffset
= Outs
[I
+ 1].PartOffset
;
1348 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, SpillSlot
,
1349 DAG
.getIntPtrConstant(PartOffset
, DL
));
1350 MemOpChains
.push_back(
1351 DAG
.getStore(Chain
, DL
, PartValue
, Address
,
1352 MachinePointerInfo::getFixedStack(MF
, FI
)));
1355 ArgValue
= SpillSlot
;
1357 ArgValue
= convertValVTToLocVT(DAG
, DL
, VA
, ArgValue
);
1360 // Queue up the argument copies and emit them at the end.
1361 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), ArgValue
));
1363 assert(VA
.isMemLoc() && "Argument not register or memory");
1365 // Work out the address of the stack slot. Unpromoted ints and
1366 // floats are passed as right-justified 8-byte values.
1367 if (!StackPtr
.getNode())
1368 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R15D
, PtrVT
);
1369 unsigned Offset
= SystemZMC::CallFrameSize
+ VA
.getLocMemOffset();
1370 if (VA
.getLocVT() == MVT::i32
|| VA
.getLocVT() == MVT::f32
)
1372 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
1373 DAG
.getIntPtrConstant(Offset
, DL
));
1376 MemOpChains
.push_back(
1377 DAG
.getStore(Chain
, DL
, ArgValue
, Address
, MachinePointerInfo()));
1381 // Join the stores, which are independent of one another.
1382 if (!MemOpChains
.empty())
1383 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
1385 // Accept direct calls by converting symbolic call addresses to the
1386 // associated Target* opcodes. Force %r1 to be used for indirect
1389 if (auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1390 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), DL
, PtrVT
);
1391 Callee
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Callee
);
1392 } else if (auto *E
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
1393 Callee
= DAG
.getTargetExternalSymbol(E
->getSymbol(), PtrVT
);
1394 Callee
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Callee
);
1395 } else if (IsTailCall
) {
1396 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R1D
, Callee
, Glue
);
1397 Glue
= Chain
.getValue(1);
1398 Callee
= DAG
.getRegister(SystemZ::R1D
, Callee
.getValueType());
1401 // Build a sequence of copy-to-reg nodes, chained and glued together.
1402 for (unsigned I
= 0, E
= RegsToPass
.size(); I
!= E
; ++I
) {
1403 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegsToPass
[I
].first
,
1404 RegsToPass
[I
].second
, Glue
);
1405 Glue
= Chain
.getValue(1);
1408 // The first call operand is the chain and the second is the target address.
1409 SmallVector
<SDValue
, 8> Ops
;
1410 Ops
.push_back(Chain
);
1411 Ops
.push_back(Callee
);
1413 // Add argument registers to the end of the list so that they are
1414 // known live into the call.
1415 for (unsigned I
= 0, E
= RegsToPass
.size(); I
!= E
; ++I
)
1416 Ops
.push_back(DAG
.getRegister(RegsToPass
[I
].first
,
1417 RegsToPass
[I
].second
.getValueType()));
1419 // Add a register mask operand representing the call-preserved registers.
1420 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
1421 const uint32_t *Mask
= TRI
->getCallPreservedMask(MF
, CallConv
);
1422 assert(Mask
&& "Missing call preserved mask for calling convention");
1423 Ops
.push_back(DAG
.getRegisterMask(Mask
));
1425 // Glue the call to the argument copies, if any.
1427 Ops
.push_back(Glue
);
1430 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1432 return DAG
.getNode(SystemZISD::SIBCALL
, DL
, NodeTys
, Ops
);
1433 Chain
= DAG
.getNode(SystemZISD::CALL
, DL
, NodeTys
, Ops
);
1434 Glue
= Chain
.getValue(1);
1436 // Mark the end of the call, which is glued to the call itself.
1437 Chain
= DAG
.getCALLSEQ_END(Chain
,
1438 DAG
.getConstant(NumBytes
, DL
, PtrVT
, true),
1439 DAG
.getConstant(0, DL
, PtrVT
, true),
1441 Glue
= Chain
.getValue(1);
1443 // Assign locations to each value returned by this call.
1444 SmallVector
<CCValAssign
, 16> RetLocs
;
1445 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RetLocs
, *DAG
.getContext());
1446 RetCCInfo
.AnalyzeCallResult(Ins
, RetCC_SystemZ
);
1448 // Copy all of the result registers out of their specified physreg.
1449 for (unsigned I
= 0, E
= RetLocs
.size(); I
!= E
; ++I
) {
1450 CCValAssign
&VA
= RetLocs
[I
];
1452 // Copy the value out, gluing the copy to the end of the call sequence.
1453 SDValue RetValue
= DAG
.getCopyFromReg(Chain
, DL
, VA
.getLocReg(),
1454 VA
.getLocVT(), Glue
);
1455 Chain
= RetValue
.getValue(1);
1456 Glue
= RetValue
.getValue(2);
1458 // Convert the value of the return register into the value that's
1460 InVals
.push_back(convertLocVTToValVT(DAG
, DL
, VA
, Chain
, RetValue
));
1466 bool SystemZTargetLowering::
1467 CanLowerReturn(CallingConv::ID CallConv
,
1468 MachineFunction
&MF
, bool isVarArg
,
1469 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1470 LLVMContext
&Context
) const {
1471 // Detect unsupported vector return types.
1472 if (Subtarget
.hasVector())
1473 VerifyVectorTypes(Outs
);
1475 // Special case that we cannot easily detect in RetCC_SystemZ since
1476 // i128 is not a legal type.
1477 for (auto &Out
: Outs
)
1478 if (Out
.ArgVT
== MVT::i128
)
1481 SmallVector
<CCValAssign
, 16> RetLocs
;
1482 CCState
RetCCInfo(CallConv
, isVarArg
, MF
, RetLocs
, Context
);
1483 return RetCCInfo
.CheckReturn(Outs
, RetCC_SystemZ
);
1487 SystemZTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
1489 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1490 const SmallVectorImpl
<SDValue
> &OutVals
,
1491 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
1492 MachineFunction
&MF
= DAG
.getMachineFunction();
1494 // Detect unsupported vector return types.
1495 if (Subtarget
.hasVector())
1496 VerifyVectorTypes(Outs
);
1498 // Assign locations to each returned value.
1499 SmallVector
<CCValAssign
, 16> RetLocs
;
1500 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RetLocs
, *DAG
.getContext());
1501 RetCCInfo
.AnalyzeReturn(Outs
, RetCC_SystemZ
);
1503 // Quick exit for void returns
1504 if (RetLocs
.empty())
1505 return DAG
.getNode(SystemZISD::RET_FLAG
, DL
, MVT::Other
, Chain
);
1507 // Copy the result values into the output registers.
1509 SmallVector
<SDValue
, 4> RetOps
;
1510 RetOps
.push_back(Chain
);
1511 for (unsigned I
= 0, E
= RetLocs
.size(); I
!= E
; ++I
) {
1512 CCValAssign
&VA
= RetLocs
[I
];
1513 SDValue RetValue
= OutVals
[I
];
1515 // Make the return register live on exit.
1516 assert(VA
.isRegLoc() && "Can only return in registers!");
1518 // Promote the value as required.
1519 RetValue
= convertValVTToLocVT(DAG
, DL
, VA
, RetValue
);
1521 // Chain and glue the copies together.
1522 unsigned Reg
= VA
.getLocReg();
1523 Chain
= DAG
.getCopyToReg(Chain
, DL
, Reg
, RetValue
, Glue
);
1524 Glue
= Chain
.getValue(1);
1525 RetOps
.push_back(DAG
.getRegister(Reg
, VA
.getLocVT()));
1528 // Update chain and glue.
1531 RetOps
.push_back(Glue
);
1533 return DAG
.getNode(SystemZISD::RET_FLAG
, DL
, MVT::Other
, RetOps
);
1536 // Return true if Op is an intrinsic node with chain that returns the CC value
1537 // as its only (other) argument. Provide the associated SystemZISD opcode and
1538 // the mask of valid CC values if so.
1539 static bool isIntrinsicWithCCAndChain(SDValue Op
, unsigned &Opcode
,
1540 unsigned &CCValid
) {
1541 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
1543 case Intrinsic::s390_tbegin
:
1544 Opcode
= SystemZISD::TBEGIN
;
1545 CCValid
= SystemZ::CCMASK_TBEGIN
;
1548 case Intrinsic::s390_tbegin_nofloat
:
1549 Opcode
= SystemZISD::TBEGIN_NOFLOAT
;
1550 CCValid
= SystemZ::CCMASK_TBEGIN
;
1553 case Intrinsic::s390_tend
:
1554 Opcode
= SystemZISD::TEND
;
1555 CCValid
= SystemZ::CCMASK_TEND
;
1563 // Return true if Op is an intrinsic node without chain that returns the
1564 // CC value as its final argument. Provide the associated SystemZISD
1565 // opcode and the mask of valid CC values if so.
1566 static bool isIntrinsicWithCC(SDValue Op
, unsigned &Opcode
, unsigned &CCValid
) {
1567 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1569 case Intrinsic::s390_vpkshs
:
1570 case Intrinsic::s390_vpksfs
:
1571 case Intrinsic::s390_vpksgs
:
1572 Opcode
= SystemZISD::PACKS_CC
;
1573 CCValid
= SystemZ::CCMASK_VCMP
;
1576 case Intrinsic::s390_vpklshs
:
1577 case Intrinsic::s390_vpklsfs
:
1578 case Intrinsic::s390_vpklsgs
:
1579 Opcode
= SystemZISD::PACKLS_CC
;
1580 CCValid
= SystemZ::CCMASK_VCMP
;
1583 case Intrinsic::s390_vceqbs
:
1584 case Intrinsic::s390_vceqhs
:
1585 case Intrinsic::s390_vceqfs
:
1586 case Intrinsic::s390_vceqgs
:
1587 Opcode
= SystemZISD::VICMPES
;
1588 CCValid
= SystemZ::CCMASK_VCMP
;
1591 case Intrinsic::s390_vchbs
:
1592 case Intrinsic::s390_vchhs
:
1593 case Intrinsic::s390_vchfs
:
1594 case Intrinsic::s390_vchgs
:
1595 Opcode
= SystemZISD::VICMPHS
;
1596 CCValid
= SystemZ::CCMASK_VCMP
;
1599 case Intrinsic::s390_vchlbs
:
1600 case Intrinsic::s390_vchlhs
:
1601 case Intrinsic::s390_vchlfs
:
1602 case Intrinsic::s390_vchlgs
:
1603 Opcode
= SystemZISD::VICMPHLS
;
1604 CCValid
= SystemZ::CCMASK_VCMP
;
1607 case Intrinsic::s390_vtm
:
1608 Opcode
= SystemZISD::VTM
;
1609 CCValid
= SystemZ::CCMASK_VCMP
;
1612 case Intrinsic::s390_vfaebs
:
1613 case Intrinsic::s390_vfaehs
:
1614 case Intrinsic::s390_vfaefs
:
1615 Opcode
= SystemZISD::VFAE_CC
;
1616 CCValid
= SystemZ::CCMASK_ANY
;
1619 case Intrinsic::s390_vfaezbs
:
1620 case Intrinsic::s390_vfaezhs
:
1621 case Intrinsic::s390_vfaezfs
:
1622 Opcode
= SystemZISD::VFAEZ_CC
;
1623 CCValid
= SystemZ::CCMASK_ANY
;
1626 case Intrinsic::s390_vfeebs
:
1627 case Intrinsic::s390_vfeehs
:
1628 case Intrinsic::s390_vfeefs
:
1629 Opcode
= SystemZISD::VFEE_CC
;
1630 CCValid
= SystemZ::CCMASK_ANY
;
1633 case Intrinsic::s390_vfeezbs
:
1634 case Intrinsic::s390_vfeezhs
:
1635 case Intrinsic::s390_vfeezfs
:
1636 Opcode
= SystemZISD::VFEEZ_CC
;
1637 CCValid
= SystemZ::CCMASK_ANY
;
1640 case Intrinsic::s390_vfenebs
:
1641 case Intrinsic::s390_vfenehs
:
1642 case Intrinsic::s390_vfenefs
:
1643 Opcode
= SystemZISD::VFENE_CC
;
1644 CCValid
= SystemZ::CCMASK_ANY
;
1647 case Intrinsic::s390_vfenezbs
:
1648 case Intrinsic::s390_vfenezhs
:
1649 case Intrinsic::s390_vfenezfs
:
1650 Opcode
= SystemZISD::VFENEZ_CC
;
1651 CCValid
= SystemZ::CCMASK_ANY
;
1654 case Intrinsic::s390_vistrbs
:
1655 case Intrinsic::s390_vistrhs
:
1656 case Intrinsic::s390_vistrfs
:
1657 Opcode
= SystemZISD::VISTR_CC
;
1658 CCValid
= SystemZ::CCMASK_0
| SystemZ::CCMASK_3
;
1661 case Intrinsic::s390_vstrcbs
:
1662 case Intrinsic::s390_vstrchs
:
1663 case Intrinsic::s390_vstrcfs
:
1664 Opcode
= SystemZISD::VSTRC_CC
;
1665 CCValid
= SystemZ::CCMASK_ANY
;
1668 case Intrinsic::s390_vstrczbs
:
1669 case Intrinsic::s390_vstrczhs
:
1670 case Intrinsic::s390_vstrczfs
:
1671 Opcode
= SystemZISD::VSTRCZ_CC
;
1672 CCValid
= SystemZ::CCMASK_ANY
;
1675 case Intrinsic::s390_vfcedbs
:
1676 case Intrinsic::s390_vfcesbs
:
1677 Opcode
= SystemZISD::VFCMPES
;
1678 CCValid
= SystemZ::CCMASK_VCMP
;
1681 case Intrinsic::s390_vfchdbs
:
1682 case Intrinsic::s390_vfchsbs
:
1683 Opcode
= SystemZISD::VFCMPHS
;
1684 CCValid
= SystemZ::CCMASK_VCMP
;
1687 case Intrinsic::s390_vfchedbs
:
1688 case Intrinsic::s390_vfchesbs
:
1689 Opcode
= SystemZISD::VFCMPHES
;
1690 CCValid
= SystemZ::CCMASK_VCMP
;
1693 case Intrinsic::s390_vftcidb
:
1694 case Intrinsic::s390_vftcisb
:
1695 Opcode
= SystemZISD::VFTCI
;
1696 CCValid
= SystemZ::CCMASK_VCMP
;
1699 case Intrinsic::s390_tdc
:
1700 Opcode
= SystemZISD::TDC
;
1701 CCValid
= SystemZ::CCMASK_TDC
;
1709 // Emit an intrinsic with chain and an explicit CC register result.
1710 static SDNode
*emitIntrinsicWithCCAndChain(SelectionDAG
&DAG
, SDValue Op
,
1712 // Copy all operands except the intrinsic ID.
1713 unsigned NumOps
= Op
.getNumOperands();
1714 SmallVector
<SDValue
, 6> Ops
;
1715 Ops
.reserve(NumOps
- 1);
1716 Ops
.push_back(Op
.getOperand(0));
1717 for (unsigned I
= 2; I
< NumOps
; ++I
)
1718 Ops
.push_back(Op
.getOperand(I
));
1720 assert(Op
->getNumValues() == 2 && "Expected only CC result and chain");
1721 SDVTList RawVTs
= DAG
.getVTList(MVT::i32
, MVT::Other
);
1722 SDValue Intr
= DAG
.getNode(Opcode
, SDLoc(Op
), RawVTs
, Ops
);
1723 SDValue OldChain
= SDValue(Op
.getNode(), 1);
1724 SDValue NewChain
= SDValue(Intr
.getNode(), 1);
1725 DAG
.ReplaceAllUsesOfValueWith(OldChain
, NewChain
);
1726 return Intr
.getNode();
1729 // Emit an intrinsic with an explicit CC register result.
1730 static SDNode
*emitIntrinsicWithCC(SelectionDAG
&DAG
, SDValue Op
,
1732 // Copy all operands except the intrinsic ID.
1733 unsigned NumOps
= Op
.getNumOperands();
1734 SmallVector
<SDValue
, 6> Ops
;
1735 Ops
.reserve(NumOps
- 1);
1736 for (unsigned I
= 1; I
< NumOps
; ++I
)
1737 Ops
.push_back(Op
.getOperand(I
));
1739 SDValue Intr
= DAG
.getNode(Opcode
, SDLoc(Op
), Op
->getVTList(), Ops
);
1740 return Intr
.getNode();
1743 // CC is a comparison that will be implemented using an integer or
1744 // floating-point comparison. Return the condition code mask for
1745 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
1746 // unsigned comparisons and clear for signed ones. In the floating-point
1747 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1748 static unsigned CCMaskForCondCode(ISD::CondCode CC
) {
1750 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1751 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1752 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1756 llvm_unreachable("Invalid integer condition!");
1765 case ISD::SETO
: return SystemZ::CCMASK_CMP_O
;
1766 case ISD::SETUO
: return SystemZ::CCMASK_CMP_UO
;
1771 // If C can be converted to a comparison against zero, adjust the operands
1773 static void adjustZeroCmp(SelectionDAG
&DAG
, const SDLoc
&DL
, Comparison
&C
) {
1774 if (C
.ICmpType
== SystemZICMP::UnsignedOnly
)
1777 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
.getNode());
1781 int64_t Value
= ConstOp1
->getSExtValue();
1782 if ((Value
== -1 && C
.CCMask
== SystemZ::CCMASK_CMP_GT
) ||
1783 (Value
== -1 && C
.CCMask
== SystemZ::CCMASK_CMP_LE
) ||
1784 (Value
== 1 && C
.CCMask
== SystemZ::CCMASK_CMP_LT
) ||
1785 (Value
== 1 && C
.CCMask
== SystemZ::CCMASK_CMP_GE
)) {
1786 C
.CCMask
^= SystemZ::CCMASK_CMP_EQ
;
1787 C
.Op1
= DAG
.getConstant(0, DL
, C
.Op1
.getValueType());
1791 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1792 // adjust the operands as necessary.
1793 static void adjustSubwordCmp(SelectionDAG
&DAG
, const SDLoc
&DL
,
1795 // For us to make any changes, it must a comparison between a single-use
1796 // load and a constant.
1797 if (!C
.Op0
.hasOneUse() ||
1798 C
.Op0
.getOpcode() != ISD::LOAD
||
1799 C
.Op1
.getOpcode() != ISD::Constant
)
1802 // We must have an 8- or 16-bit load.
1803 auto *Load
= cast
<LoadSDNode
>(C
.Op0
);
1804 unsigned NumBits
= Load
->getMemoryVT().getStoreSizeInBits();
1805 if (NumBits
!= 8 && NumBits
!= 16)
1808 // The load must be an extending one and the constant must be within the
1809 // range of the unextended value.
1810 auto *ConstOp1
= cast
<ConstantSDNode
>(C
.Op1
);
1811 uint64_t Value
= ConstOp1
->getZExtValue();
1812 uint64_t Mask
= (1 << NumBits
) - 1;
1813 if (Load
->getExtensionType() == ISD::SEXTLOAD
) {
1814 // Make sure that ConstOp1 is in range of C.Op0.
1815 int64_t SignedValue
= ConstOp1
->getSExtValue();
1816 if (uint64_t(SignedValue
) + (uint64_t(1) << (NumBits
- 1)) > Mask
)
1818 if (C
.ICmpType
!= SystemZICMP::SignedOnly
) {
1819 // Unsigned comparison between two sign-extended values is equivalent
1820 // to unsigned comparison between two zero-extended values.
1822 } else if (NumBits
== 8) {
1823 // Try to treat the comparison as unsigned, so that we can use CLI.
1824 // Adjust CCMask and Value as necessary.
1825 if (Value
== 0 && C
.CCMask
== SystemZ::CCMASK_CMP_LT
)
1826 // Test whether the high bit of the byte is set.
1827 Value
= 127, C
.CCMask
= SystemZ::CCMASK_CMP_GT
;
1828 else if (Value
== 0 && C
.CCMask
== SystemZ::CCMASK_CMP_GE
)
1829 // Test whether the high bit of the byte is clear.
1830 Value
= 128, C
.CCMask
= SystemZ::CCMASK_CMP_LT
;
1832 // No instruction exists for this combination.
1834 C
.ICmpType
= SystemZICMP::UnsignedOnly
;
1836 } else if (Load
->getExtensionType() == ISD::ZEXTLOAD
) {
1839 // If the constant is in range, we can use any comparison.
1840 C
.ICmpType
= SystemZICMP::Any
;
1844 // Make sure that the first operand is an i32 of the right extension type.
1845 ISD::LoadExtType ExtType
= (C
.ICmpType
== SystemZICMP::SignedOnly
?
1848 if (C
.Op0
.getValueType() != MVT::i32
||
1849 Load
->getExtensionType() != ExtType
) {
1850 C
.Op0
= DAG
.getExtLoad(ExtType
, SDLoc(Load
), MVT::i32
, Load
->getChain(),
1851 Load
->getBasePtr(), Load
->getPointerInfo(),
1852 Load
->getMemoryVT(), Load
->getAlignment(),
1853 Load
->getMemOperand()->getFlags());
1854 // Update the chain uses.
1855 DAG
.ReplaceAllUsesOfValueWith(SDValue(Load
, 1), C
.Op0
.getValue(1));
1858 // Make sure that the second operand is an i32 with the right value.
1859 if (C
.Op1
.getValueType() != MVT::i32
||
1860 Value
!= ConstOp1
->getZExtValue())
1861 C
.Op1
= DAG
.getConstant(Value
, DL
, MVT::i32
);
1864 // Return true if Op is either an unextended load, or a load suitable
1865 // for integer register-memory comparisons of type ICmpType.
1866 static bool isNaturalMemoryOperand(SDValue Op
, unsigned ICmpType
) {
1867 auto *Load
= dyn_cast
<LoadSDNode
>(Op
.getNode());
1869 // There are no instructions to compare a register with a memory byte.
1870 if (Load
->getMemoryVT() == MVT::i8
)
1872 // Otherwise decide on extension type.
1873 switch (Load
->getExtensionType()) {
1874 case ISD::NON_EXTLOAD
:
1877 return ICmpType
!= SystemZICMP::UnsignedOnly
;
1879 return ICmpType
!= SystemZICMP::SignedOnly
;
1887 // Return true if it is better to swap the operands of C.
1888 static bool shouldSwapCmpOperands(const Comparison
&C
) {
1889 // Leave f128 comparisons alone, since they have no memory forms.
1890 if (C
.Op0
.getValueType() == MVT::f128
)
1893 // Always keep a floating-point constant second, since comparisons with
1894 // zero can use LOAD TEST and comparisons with other constants make a
1895 // natural memory operand.
1896 if (isa
<ConstantFPSDNode
>(C
.Op1
))
1899 // Never swap comparisons with zero since there are many ways to optimize
1901 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
);
1902 if (ConstOp1
&& ConstOp1
->getZExtValue() == 0)
1905 // Also keep natural memory operands second if the loaded value is
1906 // only used here. Several comparisons have memory forms.
1907 if (isNaturalMemoryOperand(C
.Op1
, C
.ICmpType
) && C
.Op1
.hasOneUse())
1910 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
1911 // In that case we generally prefer the memory to be second.
1912 if (isNaturalMemoryOperand(C
.Op0
, C
.ICmpType
) && C
.Op0
.hasOneUse()) {
1913 // The only exceptions are when the second operand is a constant and
1914 // we can use things like CHHSI.
1917 // The unsigned memory-immediate instructions can handle 16-bit
1918 // unsigned integers.
1919 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&&
1920 isUInt
<16>(ConstOp1
->getZExtValue()))
1922 // The signed memory-immediate instructions can handle 16-bit
1924 if (C
.ICmpType
!= SystemZICMP::UnsignedOnly
&&
1925 isInt
<16>(ConstOp1
->getSExtValue()))
1930 // Try to promote the use of CGFR and CLGFR.
1931 unsigned Opcode0
= C
.Op0
.getOpcode();
1932 if (C
.ICmpType
!= SystemZICMP::UnsignedOnly
&& Opcode0
== ISD::SIGN_EXTEND
)
1934 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&& Opcode0
== ISD::ZERO_EXTEND
)
1936 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&&
1937 Opcode0
== ISD::AND
&&
1938 C
.Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
1939 cast
<ConstantSDNode
>(C
.Op0
.getOperand(1))->getZExtValue() == 0xffffffff)
1945 // Return a version of comparison CC mask CCMask in which the LT and GT
1946 // actions are swapped.
1947 static unsigned reverseCCMask(unsigned CCMask
) {
1948 return ((CCMask
& SystemZ::CCMASK_CMP_EQ
) |
1949 (CCMask
& SystemZ::CCMASK_CMP_GT
? SystemZ::CCMASK_CMP_LT
: 0) |
1950 (CCMask
& SystemZ::CCMASK_CMP_LT
? SystemZ::CCMASK_CMP_GT
: 0) |
1951 (CCMask
& SystemZ::CCMASK_CMP_UO
));
1954 // Check whether C tests for equality between X and Y and whether X - Y
1955 // or Y - X is also computed. In that case it's better to compare the
1956 // result of the subtraction against zero.
1957 static void adjustForSubtraction(SelectionDAG
&DAG
, const SDLoc
&DL
,
1959 if (C
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
1960 C
.CCMask
== SystemZ::CCMASK_CMP_NE
) {
1961 for (auto I
= C
.Op0
->use_begin(), E
= C
.Op0
->use_end(); I
!= E
; ++I
) {
1963 if (N
->getOpcode() == ISD::SUB
&&
1964 ((N
->getOperand(0) == C
.Op0
&& N
->getOperand(1) == C
.Op1
) ||
1965 (N
->getOperand(0) == C
.Op1
&& N
->getOperand(1) == C
.Op0
))) {
1966 C
.Op0
= SDValue(N
, 0);
1967 C
.Op1
= DAG
.getConstant(0, DL
, N
->getValueType(0));
1974 // Check whether C compares a floating-point value with zero and if that
1975 // floating-point value is also negated. In this case we can use the
1976 // negation to set CC, so avoiding separate LOAD AND TEST and
1977 // LOAD (NEGATIVE/COMPLEMENT) instructions.
1978 static void adjustForFNeg(Comparison
&C
) {
1979 auto *C1
= dyn_cast
<ConstantFPSDNode
>(C
.Op1
);
1980 if (C1
&& C1
->isZero()) {
1981 for (auto I
= C
.Op0
->use_begin(), E
= C
.Op0
->use_end(); I
!= E
; ++I
) {
1983 if (N
->getOpcode() == ISD::FNEG
) {
1984 C
.Op0
= SDValue(N
, 0);
1985 C
.CCMask
= reverseCCMask(C
.CCMask
);
1992 // Check whether C compares (shl X, 32) with 0 and whether X is
1993 // also sign-extended. In that case it is better to test the result
1994 // of the sign extension using LTGFR.
1996 // This case is important because InstCombine transforms a comparison
1997 // with (sext (trunc X)) into a comparison with (shl X, 32).
1998 static void adjustForLTGFR(Comparison
&C
) {
1999 // Check for a comparison between (shl X, 32) and 0.
2000 if (C
.Op0
.getOpcode() == ISD::SHL
&&
2001 C
.Op0
.getValueType() == MVT::i64
&&
2002 C
.Op1
.getOpcode() == ISD::Constant
&&
2003 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2004 auto *C1
= dyn_cast
<ConstantSDNode
>(C
.Op0
.getOperand(1));
2005 if (C1
&& C1
->getZExtValue() == 32) {
2006 SDValue ShlOp0
= C
.Op0
.getOperand(0);
2007 // See whether X has any SIGN_EXTEND_INREG uses.
2008 for (auto I
= ShlOp0
->use_begin(), E
= ShlOp0
->use_end(); I
!= E
; ++I
) {
2010 if (N
->getOpcode() == ISD::SIGN_EXTEND_INREG
&&
2011 cast
<VTSDNode
>(N
->getOperand(1))->getVT() == MVT::i32
) {
2012 C
.Op0
= SDValue(N
, 0);
2020 // If C compares the truncation of an extending load, try to compare
2021 // the untruncated value instead. This exposes more opportunities to
2023 static void adjustICmpTruncate(SelectionDAG
&DAG
, const SDLoc
&DL
,
2025 if (C
.Op0
.getOpcode() == ISD::TRUNCATE
&&
2026 C
.Op0
.getOperand(0).getOpcode() == ISD::LOAD
&&
2027 C
.Op1
.getOpcode() == ISD::Constant
&&
2028 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2029 auto *L
= cast
<LoadSDNode
>(C
.Op0
.getOperand(0));
2030 if (L
->getMemoryVT().getStoreSizeInBits() <= C
.Op0
.getValueSizeInBits()) {
2031 unsigned Type
= L
->getExtensionType();
2032 if ((Type
== ISD::ZEXTLOAD
&& C
.ICmpType
!= SystemZICMP::SignedOnly
) ||
2033 (Type
== ISD::SEXTLOAD
&& C
.ICmpType
!= SystemZICMP::UnsignedOnly
)) {
2034 C
.Op0
= C
.Op0
.getOperand(0);
2035 C
.Op1
= DAG
.getConstant(0, DL
, C
.Op0
.getValueType());
2041 // Return true if shift operation N has an in-range constant shift value.
2042 // Store it in ShiftVal if so.
2043 static bool isSimpleShift(SDValue N
, unsigned &ShiftVal
) {
2044 auto *Shift
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
2048 uint64_t Amount
= Shift
->getZExtValue();
2049 if (Amount
>= N
.getValueSizeInBits())
2056 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2057 // instruction and whether the CC value is descriptive enough to handle
2058 // a comparison of type Opcode between the AND result and CmpVal.
2059 // CCMask says which comparison result is being tested and BitSize is
2060 // the number of bits in the operands. If TEST UNDER MASK can be used,
2061 // return the corresponding CC mask, otherwise return 0.
2062 static unsigned getTestUnderMaskCond(unsigned BitSize
, unsigned CCMask
,
2063 uint64_t Mask
, uint64_t CmpVal
,
2064 unsigned ICmpType
) {
2065 assert(Mask
!= 0 && "ANDs with zero should have been removed by now");
2067 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2068 if (!SystemZ::isImmLL(Mask
) && !SystemZ::isImmLH(Mask
) &&
2069 !SystemZ::isImmHL(Mask
) && !SystemZ::isImmHH(Mask
))
2072 // Work out the masks for the lowest and highest bits.
2073 unsigned HighShift
= 63 - countLeadingZeros(Mask
);
2074 uint64_t High
= uint64_t(1) << HighShift
;
2075 uint64_t Low
= uint64_t(1) << countTrailingZeros(Mask
);
2077 // Signed ordered comparisons are effectively unsigned if the sign
2079 bool EffectivelyUnsigned
= (ICmpType
!= SystemZICMP::SignedOnly
);
2081 // Check for equality comparisons with 0, or the equivalent.
2083 if (CCMask
== SystemZ::CCMASK_CMP_EQ
)
2084 return SystemZ::CCMASK_TM_ALL_0
;
2085 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
2086 return SystemZ::CCMASK_TM_SOME_1
;
2088 if (EffectivelyUnsigned
&& CmpVal
> 0 && CmpVal
<= Low
) {
2089 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2090 return SystemZ::CCMASK_TM_ALL_0
;
2091 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2092 return SystemZ::CCMASK_TM_SOME_1
;
2094 if (EffectivelyUnsigned
&& CmpVal
< Low
) {
2095 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2096 return SystemZ::CCMASK_TM_ALL_0
;
2097 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2098 return SystemZ::CCMASK_TM_SOME_1
;
2101 // Check for equality comparisons with the mask, or the equivalent.
2102 if (CmpVal
== Mask
) {
2103 if (CCMask
== SystemZ::CCMASK_CMP_EQ
)
2104 return SystemZ::CCMASK_TM_ALL_1
;
2105 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
2106 return SystemZ::CCMASK_TM_SOME_0
;
2108 if (EffectivelyUnsigned
&& CmpVal
>= Mask
- Low
&& CmpVal
< Mask
) {
2109 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2110 return SystemZ::CCMASK_TM_ALL_1
;
2111 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2112 return SystemZ::CCMASK_TM_SOME_0
;
2114 if (EffectivelyUnsigned
&& CmpVal
> Mask
- Low
&& CmpVal
<= Mask
) {
2115 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2116 return SystemZ::CCMASK_TM_ALL_1
;
2117 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2118 return SystemZ::CCMASK_TM_SOME_0
;
2121 // Check for ordered comparisons with the top bit.
2122 if (EffectivelyUnsigned
&& CmpVal
>= Mask
- High
&& CmpVal
< High
) {
2123 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2124 return SystemZ::CCMASK_TM_MSB_0
;
2125 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2126 return SystemZ::CCMASK_TM_MSB_1
;
2128 if (EffectivelyUnsigned
&& CmpVal
> Mask
- High
&& CmpVal
<= High
) {
2129 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2130 return SystemZ::CCMASK_TM_MSB_0
;
2131 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2132 return SystemZ::CCMASK_TM_MSB_1
;
2135 // If there are just two bits, we can do equality checks for Low and High
2137 if (Mask
== Low
+ High
) {
2138 if (CCMask
== SystemZ::CCMASK_CMP_EQ
&& CmpVal
== Low
)
2139 return SystemZ::CCMASK_TM_MIXED_MSB_0
;
2140 if (CCMask
== SystemZ::CCMASK_CMP_NE
&& CmpVal
== Low
)
2141 return SystemZ::CCMASK_TM_MIXED_MSB_0
^ SystemZ::CCMASK_ANY
;
2142 if (CCMask
== SystemZ::CCMASK_CMP_EQ
&& CmpVal
== High
)
2143 return SystemZ::CCMASK_TM_MIXED_MSB_1
;
2144 if (CCMask
== SystemZ::CCMASK_CMP_NE
&& CmpVal
== High
)
2145 return SystemZ::CCMASK_TM_MIXED_MSB_1
^ SystemZ::CCMASK_ANY
;
2148 // Looks like we've exhausted our options.
2152 // See whether C can be implemented as a TEST UNDER MASK instruction.
2153 // Update the arguments with the TM version if so.
2154 static void adjustForTestUnderMask(SelectionDAG
&DAG
, const SDLoc
&DL
,
2156 // Check that we have a comparison with a constant.
2157 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
);
2160 uint64_t CmpVal
= ConstOp1
->getZExtValue();
2162 // Check whether the nonconstant input is an AND with a constant mask.
2165 ConstantSDNode
*Mask
= nullptr;
2166 if (C
.Op0
.getOpcode() == ISD::AND
) {
2167 NewC
.Op0
= C
.Op0
.getOperand(0);
2168 NewC
.Op1
= C
.Op0
.getOperand(1);
2169 Mask
= dyn_cast
<ConstantSDNode
>(NewC
.Op1
);
2172 MaskVal
= Mask
->getZExtValue();
2174 // There is no instruction to compare with a 64-bit immediate
2175 // so use TMHH instead if possible. We need an unsigned ordered
2176 // comparison with an i64 immediate.
2177 if (NewC
.Op0
.getValueType() != MVT::i64
||
2178 NewC
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
2179 NewC
.CCMask
== SystemZ::CCMASK_CMP_NE
||
2180 NewC
.ICmpType
== SystemZICMP::SignedOnly
)
2182 // Convert LE and GT comparisons into LT and GE.
2183 if (NewC
.CCMask
== SystemZ::CCMASK_CMP_LE
||
2184 NewC
.CCMask
== SystemZ::CCMASK_CMP_GT
) {
2185 if (CmpVal
== uint64_t(-1))
2188 NewC
.CCMask
^= SystemZ::CCMASK_CMP_EQ
;
2190 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2191 // be masked off without changing the result.
2192 MaskVal
= -(CmpVal
& -CmpVal
);
2193 NewC
.ICmpType
= SystemZICMP::UnsignedOnly
;
2198 // Check whether the combination of mask, comparison value and comparison
2199 // type are suitable.
2200 unsigned BitSize
= NewC
.Op0
.getValueSizeInBits();
2201 unsigned NewCCMask
, ShiftVal
;
2202 if (NewC
.ICmpType
!= SystemZICMP::SignedOnly
&&
2203 NewC
.Op0
.getOpcode() == ISD::SHL
&&
2204 isSimpleShift(NewC
.Op0
, ShiftVal
) &&
2205 (MaskVal
>> ShiftVal
!= 0) &&
2206 ((CmpVal
>> ShiftVal
) << ShiftVal
) == CmpVal
&&
2207 (NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
,
2208 MaskVal
>> ShiftVal
,
2210 SystemZICMP::Any
))) {
2211 NewC
.Op0
= NewC
.Op0
.getOperand(0);
2212 MaskVal
>>= ShiftVal
;
2213 } else if (NewC
.ICmpType
!= SystemZICMP::SignedOnly
&&
2214 NewC
.Op0
.getOpcode() == ISD::SRL
&&
2215 isSimpleShift(NewC
.Op0
, ShiftVal
) &&
2216 (MaskVal
<< ShiftVal
!= 0) &&
2217 ((CmpVal
<< ShiftVal
) >> ShiftVal
) == CmpVal
&&
2218 (NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
,
2219 MaskVal
<< ShiftVal
,
2221 SystemZICMP::UnsignedOnly
))) {
2222 NewC
.Op0
= NewC
.Op0
.getOperand(0);
2223 MaskVal
<<= ShiftVal
;
2225 NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
, MaskVal
, CmpVal
,
2231 // Go ahead and make the change.
2232 C
.Opcode
= SystemZISD::TM
;
2234 if (Mask
&& Mask
->getZExtValue() == MaskVal
)
2235 C
.Op1
= SDValue(Mask
, 0);
2237 C
.Op1
= DAG
.getConstant(MaskVal
, DL
, C
.Op0
.getValueType());
2238 C
.CCValid
= SystemZ::CCMASK_TM
;
2239 C
.CCMask
= NewCCMask
;
2242 // See whether the comparison argument contains a redundant AND
2243 // and remove it if so. This sometimes happens due to the generic
2244 // BRCOND expansion.
2245 static void adjustForRedundantAnd(SelectionDAG
&DAG
, const SDLoc
&DL
,
2247 if (C
.Op0
.getOpcode() != ISD::AND
)
2249 auto *Mask
= dyn_cast
<ConstantSDNode
>(C
.Op0
.getOperand(1));
2252 KnownBits Known
= DAG
.computeKnownBits(C
.Op0
.getOperand(0));
2253 if ((~Known
.Zero
).getZExtValue() & ~Mask
->getZExtValue())
2256 C
.Op0
= C
.Op0
.getOperand(0);
2259 // Return a Comparison that tests the condition-code result of intrinsic
2260 // node Call against constant integer CC using comparison code Cond.
2261 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2262 // and CCValid is the set of possible condition-code results.
2263 static Comparison
getIntrinsicCmp(SelectionDAG
&DAG
, unsigned Opcode
,
2264 SDValue Call
, unsigned CCValid
, uint64_t CC
,
2265 ISD::CondCode Cond
) {
2266 Comparison
C(Call
, SDValue());
2268 C
.CCValid
= CCValid
;
2269 if (Cond
== ISD::SETEQ
)
2270 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2271 C
.CCMask
= CC
< 4 ? 1 << (3 - CC
) : 0;
2272 else if (Cond
== ISD::SETNE
)
2273 // ...and the inverse of that.
2274 C
.CCMask
= CC
< 4 ? ~(1 << (3 - CC
)) : -1;
2275 else if (Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
)
2276 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2277 // always true for CC>3.
2278 C
.CCMask
= CC
< 4 ? ~0U << (4 - CC
) : -1;
2279 else if (Cond
== ISD::SETGE
|| Cond
== ISD::SETUGE
)
2280 // ...and the inverse of that.
2281 C
.CCMask
= CC
< 4 ? ~(~0U << (4 - CC
)) : 0;
2282 else if (Cond
== ISD::SETLE
|| Cond
== ISD::SETULE
)
2283 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2284 // always true for CC>3.
2285 C
.CCMask
= CC
< 4 ? ~0U << (3 - CC
) : -1;
2286 else if (Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
)
2287 // ...and the inverse of that.
2288 C
.CCMask
= CC
< 4 ? ~(~0U << (3 - CC
)) : 0;
2290 llvm_unreachable("Unexpected integer comparison type");
2291 C
.CCMask
&= CCValid
;
2295 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2296 static Comparison
getCmp(SelectionDAG
&DAG
, SDValue CmpOp0
, SDValue CmpOp1
,
2297 ISD::CondCode Cond
, const SDLoc
&DL
) {
2298 if (CmpOp1
.getOpcode() == ISD::Constant
) {
2299 uint64_t Constant
= cast
<ConstantSDNode
>(CmpOp1
)->getZExtValue();
2300 unsigned Opcode
, CCValid
;
2301 if (CmpOp0
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
2302 CmpOp0
.getResNo() == 0 && CmpOp0
->hasNUsesOfValue(1, 0) &&
2303 isIntrinsicWithCCAndChain(CmpOp0
, Opcode
, CCValid
))
2304 return getIntrinsicCmp(DAG
, Opcode
, CmpOp0
, CCValid
, Constant
, Cond
);
2305 if (CmpOp0
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
2306 CmpOp0
.getResNo() == CmpOp0
->getNumValues() - 1 &&
2307 isIntrinsicWithCC(CmpOp0
, Opcode
, CCValid
))
2308 return getIntrinsicCmp(DAG
, Opcode
, CmpOp0
, CCValid
, Constant
, Cond
);
2310 Comparison
C(CmpOp0
, CmpOp1
);
2311 C
.CCMask
= CCMaskForCondCode(Cond
);
2312 if (C
.Op0
.getValueType().isFloatingPoint()) {
2313 C
.CCValid
= SystemZ::CCMASK_FCMP
;
2314 C
.Opcode
= SystemZISD::FCMP
;
2317 C
.CCValid
= SystemZ::CCMASK_ICMP
;
2318 C
.Opcode
= SystemZISD::ICMP
;
2319 // Choose the type of comparison. Equality and inequality tests can
2320 // use either signed or unsigned comparisons. The choice also doesn't
2321 // matter if both sign bits are known to be clear. In those cases we
2322 // want to give the main isel code the freedom to choose whichever
2324 if (C
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
2325 C
.CCMask
== SystemZ::CCMASK_CMP_NE
||
2326 (DAG
.SignBitIsZero(C
.Op0
) && DAG
.SignBitIsZero(C
.Op1
)))
2327 C
.ICmpType
= SystemZICMP::Any
;
2328 else if (C
.CCMask
& SystemZ::CCMASK_CMP_UO
)
2329 C
.ICmpType
= SystemZICMP::UnsignedOnly
;
2331 C
.ICmpType
= SystemZICMP::SignedOnly
;
2332 C
.CCMask
&= ~SystemZ::CCMASK_CMP_UO
;
2333 adjustForRedundantAnd(DAG
, DL
, C
);
2334 adjustZeroCmp(DAG
, DL
, C
);
2335 adjustSubwordCmp(DAG
, DL
, C
);
2336 adjustForSubtraction(DAG
, DL
, C
);
2338 adjustICmpTruncate(DAG
, DL
, C
);
2341 if (shouldSwapCmpOperands(C
)) {
2342 std::swap(C
.Op0
, C
.Op1
);
2343 C
.CCMask
= reverseCCMask(C
.CCMask
);
2346 adjustForTestUnderMask(DAG
, DL
, C
);
2350 // Emit the comparison instruction described by C.
2351 static SDValue
emitCmp(SelectionDAG
&DAG
, const SDLoc
&DL
, Comparison
&C
) {
2352 if (!C
.Op1
.getNode()) {
2354 switch (C
.Op0
.getOpcode()) {
2355 case ISD::INTRINSIC_W_CHAIN
:
2356 Node
= emitIntrinsicWithCCAndChain(DAG
, C
.Op0
, C
.Opcode
);
2357 return SDValue(Node
, 0);
2358 case ISD::INTRINSIC_WO_CHAIN
:
2359 Node
= emitIntrinsicWithCC(DAG
, C
.Op0
, C
.Opcode
);
2360 return SDValue(Node
, Node
->getNumValues() - 1);
2362 llvm_unreachable("Invalid comparison operands");
2365 if (C
.Opcode
== SystemZISD::ICMP
)
2366 return DAG
.getNode(SystemZISD::ICMP
, DL
, MVT::i32
, C
.Op0
, C
.Op1
,
2367 DAG
.getConstant(C
.ICmpType
, DL
, MVT::i32
));
2368 if (C
.Opcode
== SystemZISD::TM
) {
2369 bool RegisterOnly
= (bool(C
.CCMask
& SystemZ::CCMASK_TM_MIXED_MSB_0
) !=
2370 bool(C
.CCMask
& SystemZ::CCMASK_TM_MIXED_MSB_1
));
2371 return DAG
.getNode(SystemZISD::TM
, DL
, MVT::i32
, C
.Op0
, C
.Op1
,
2372 DAG
.getConstant(RegisterOnly
, DL
, MVT::i32
));
2374 return DAG
.getNode(C
.Opcode
, DL
, MVT::i32
, C
.Op0
, C
.Op1
);
2377 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2378 // 64 bits. Extend is the extension type to use. Store the high part
2379 // in Hi and the low part in Lo.
2380 static void lowerMUL_LOHI32(SelectionDAG
&DAG
, const SDLoc
&DL
, unsigned Extend
,
2381 SDValue Op0
, SDValue Op1
, SDValue
&Hi
,
2383 Op0
= DAG
.getNode(Extend
, DL
, MVT::i64
, Op0
);
2384 Op1
= DAG
.getNode(Extend
, DL
, MVT::i64
, Op1
);
2385 SDValue Mul
= DAG
.getNode(ISD::MUL
, DL
, MVT::i64
, Op0
, Op1
);
2386 Hi
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Mul
,
2387 DAG
.getConstant(32, DL
, MVT::i64
));
2388 Hi
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Hi
);
2389 Lo
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Mul
);
2392 // Lower a binary operation that produces two VT results, one in each
2393 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2394 // and Opcode performs the GR128 operation. Store the even register result
2395 // in Even and the odd register result in Odd.
2396 static void lowerGR128Binary(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
2397 unsigned Opcode
, SDValue Op0
, SDValue Op1
,
2398 SDValue
&Even
, SDValue
&Odd
) {
2399 SDValue Result
= DAG
.getNode(Opcode
, DL
, MVT::Untyped
, Op0
, Op1
);
2400 bool Is32Bit
= is32Bit(VT
);
2401 Even
= DAG
.getTargetExtractSubreg(SystemZ::even128(Is32Bit
), DL
, VT
, Result
);
2402 Odd
= DAG
.getTargetExtractSubreg(SystemZ::odd128(Is32Bit
), DL
, VT
, Result
);
2405 // Return an i32 value that is 1 if the CC value produced by CCReg is
2406 // in the mask CCMask and 0 otherwise. CC is known to have a value
2407 // in CCValid, so other values can be ignored.
2408 static SDValue
emitSETCC(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue CCReg
,
2409 unsigned CCValid
, unsigned CCMask
) {
2410 SDValue Ops
[] = { DAG
.getConstant(1, DL
, MVT::i32
),
2411 DAG
.getConstant(0, DL
, MVT::i32
),
2412 DAG
.getConstant(CCValid
, DL
, MVT::i32
),
2413 DAG
.getConstant(CCMask
, DL
, MVT::i32
), CCReg
};
2414 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, MVT::i32
, Ops
);
2417 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2418 // be done directly. IsFP is true if CC is for a floating-point rather than
2419 // integer comparison.
2420 static unsigned getVectorComparison(ISD::CondCode CC
, bool IsFP
) {
2424 return IsFP
? SystemZISD::VFCMPE
: SystemZISD::VICMPE
;
2428 return IsFP
? SystemZISD::VFCMPHE
: static_cast<SystemZISD::NodeType
>(0);
2432 return IsFP
? SystemZISD::VFCMPH
: SystemZISD::VICMPH
;
2435 return IsFP
? static_cast<SystemZISD::NodeType
>(0) : SystemZISD::VICMPHL
;
2442 // Return the SystemZISD vector comparison operation for CC or its inverse,
2443 // or 0 if neither can be done directly. Indicate in Invert whether the
2444 // result is for the inverse of CC. IsFP is true if CC is for a
2445 // floating-point rather than integer comparison.
2446 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC
, bool IsFP
,
2448 if (unsigned Opcode
= getVectorComparison(CC
, IsFP
)) {
2453 CC
= ISD::getSetCCInverse(CC
, !IsFP
);
2454 if (unsigned Opcode
= getVectorComparison(CC
, IsFP
)) {
2462 // Return a v2f64 that contains the extended form of elements Start and Start+1
2463 // of v4f32 value Op.
2464 static SDValue
expandV4F32ToV2F64(SelectionDAG
&DAG
, int Start
, const SDLoc
&DL
,
2466 int Mask
[] = { Start
, -1, Start
+ 1, -1 };
2467 Op
= DAG
.getVectorShuffle(MVT::v4f32
, DL
, Op
, DAG
.getUNDEF(MVT::v4f32
), Mask
);
2468 return DAG
.getNode(SystemZISD::VEXTEND
, DL
, MVT::v2f64
, Op
);
2471 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2472 // producing a result of type VT.
2473 SDValue
SystemZTargetLowering::getVectorCmp(SelectionDAG
&DAG
, unsigned Opcode
,
2474 const SDLoc
&DL
, EVT VT
,
2476 SDValue CmpOp1
) const {
2477 // There is no hardware support for v4f32 (unless we have the vector
2478 // enhancements facility 1), so extend the vector into two v2f64s
2479 // and compare those.
2480 if (CmpOp0
.getValueType() == MVT::v4f32
&&
2481 !Subtarget
.hasVectorEnhancements1()) {
2482 SDValue H0
= expandV4F32ToV2F64(DAG
, 0, DL
, CmpOp0
);
2483 SDValue L0
= expandV4F32ToV2F64(DAG
, 2, DL
, CmpOp0
);
2484 SDValue H1
= expandV4F32ToV2F64(DAG
, 0, DL
, CmpOp1
);
2485 SDValue L1
= expandV4F32ToV2F64(DAG
, 2, DL
, CmpOp1
);
2486 SDValue HRes
= DAG
.getNode(Opcode
, DL
, MVT::v2i64
, H0
, H1
);
2487 SDValue LRes
= DAG
.getNode(Opcode
, DL
, MVT::v2i64
, L0
, L1
);
2488 return DAG
.getNode(SystemZISD::PACK
, DL
, VT
, HRes
, LRes
);
2490 return DAG
.getNode(Opcode
, DL
, VT
, CmpOp0
, CmpOp1
);
2493 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2494 // an integer mask of type VT.
2495 SDValue
SystemZTargetLowering::lowerVectorSETCC(SelectionDAG
&DAG
,
2496 const SDLoc
&DL
, EVT VT
,
2499 SDValue CmpOp1
) const {
2500 bool IsFP
= CmpOp0
.getValueType().isFloatingPoint();
2501 bool Invert
= false;
2504 // Handle tests for order using (or (ogt y x) (oge x y)).
2509 assert(IsFP
&& "Unexpected integer comparison");
2510 SDValue LT
= getVectorCmp(DAG
, SystemZISD::VFCMPH
, DL
, VT
, CmpOp1
, CmpOp0
);
2511 SDValue GE
= getVectorCmp(DAG
, SystemZISD::VFCMPHE
, DL
, VT
, CmpOp0
, CmpOp1
);
2512 Cmp
= DAG
.getNode(ISD::OR
, DL
, VT
, LT
, GE
);
2516 // Handle <> tests using (or (ogt y x) (ogt x y)).
2521 assert(IsFP
&& "Unexpected integer comparison");
2522 SDValue LT
= getVectorCmp(DAG
, SystemZISD::VFCMPH
, DL
, VT
, CmpOp1
, CmpOp0
);
2523 SDValue GT
= getVectorCmp(DAG
, SystemZISD::VFCMPH
, DL
, VT
, CmpOp0
, CmpOp1
);
2524 Cmp
= DAG
.getNode(ISD::OR
, DL
, VT
, LT
, GT
);
2528 // Otherwise a single comparison is enough. It doesn't really
2529 // matter whether we try the inversion or the swap first, since
2530 // there are no cases where both work.
2532 if (unsigned Opcode
= getVectorComparisonOrInvert(CC
, IsFP
, Invert
))
2533 Cmp
= getVectorCmp(DAG
, Opcode
, DL
, VT
, CmpOp0
, CmpOp1
);
2535 CC
= ISD::getSetCCSwappedOperands(CC
);
2536 if (unsigned Opcode
= getVectorComparisonOrInvert(CC
, IsFP
, Invert
))
2537 Cmp
= getVectorCmp(DAG
, Opcode
, DL
, VT
, CmpOp1
, CmpOp0
);
2539 llvm_unreachable("Unhandled comparison");
2545 DAG
.getSplatBuildVector(VT
, DL
, DAG
.getConstant(-1, DL
, MVT::i64
));
2546 Cmp
= DAG
.getNode(ISD::XOR
, DL
, VT
, Cmp
, Mask
);
2551 SDValue
SystemZTargetLowering::lowerSETCC(SDValue Op
,
2552 SelectionDAG
&DAG
) const {
2553 SDValue CmpOp0
= Op
.getOperand(0);
2554 SDValue CmpOp1
= Op
.getOperand(1);
2555 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
2557 EVT VT
= Op
.getValueType();
2559 return lowerVectorSETCC(DAG
, DL
, VT
, CC
, CmpOp0
, CmpOp1
);
2561 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2562 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2563 return emitSETCC(DAG
, DL
, CCReg
, C
.CCValid
, C
.CCMask
);
2566 SDValue
SystemZTargetLowering::lowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const {
2567 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(1))->get();
2568 SDValue CmpOp0
= Op
.getOperand(2);
2569 SDValue CmpOp1
= Op
.getOperand(3);
2570 SDValue Dest
= Op
.getOperand(4);
2573 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2574 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2575 return DAG
.getNode(SystemZISD::BR_CCMASK
, DL
, Op
.getValueType(),
2576 Op
.getOperand(0), DAG
.getConstant(C
.CCValid
, DL
, MVT::i32
),
2577 DAG
.getConstant(C
.CCMask
, DL
, MVT::i32
), Dest
, CCReg
);
2580 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2581 // allowing Pos and Neg to be wider than CmpOp.
2582 static bool isAbsolute(SDValue CmpOp
, SDValue Pos
, SDValue Neg
) {
2583 return (Neg
.getOpcode() == ISD::SUB
&&
2584 Neg
.getOperand(0).getOpcode() == ISD::Constant
&&
2585 cast
<ConstantSDNode
>(Neg
.getOperand(0))->getZExtValue() == 0 &&
2586 Neg
.getOperand(1) == Pos
&&
2588 (Pos
.getOpcode() == ISD::SIGN_EXTEND
&&
2589 Pos
.getOperand(0) == CmpOp
)));
2592 // Return the absolute or negative absolute of Op; IsNegative decides which.
2593 static SDValue
getAbsolute(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Op
,
2595 Op
= DAG
.getNode(SystemZISD::IABS
, DL
, Op
.getValueType(), Op
);
2597 Op
= DAG
.getNode(ISD::SUB
, DL
, Op
.getValueType(),
2598 DAG
.getConstant(0, DL
, Op
.getValueType()), Op
);
2602 SDValue
SystemZTargetLowering::lowerSELECT_CC(SDValue Op
,
2603 SelectionDAG
&DAG
) const {
2604 SDValue CmpOp0
= Op
.getOperand(0);
2605 SDValue CmpOp1
= Op
.getOperand(1);
2606 SDValue TrueOp
= Op
.getOperand(2);
2607 SDValue FalseOp
= Op
.getOperand(3);
2608 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
2611 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2613 // Check for absolute and negative-absolute selections, including those
2614 // where the comparison value is sign-extended (for LPGFR and LNGFR).
2615 // This check supplements the one in DAGCombiner.
2616 if (C
.Opcode
== SystemZISD::ICMP
&&
2617 C
.CCMask
!= SystemZ::CCMASK_CMP_EQ
&&
2618 C
.CCMask
!= SystemZ::CCMASK_CMP_NE
&&
2619 C
.Op1
.getOpcode() == ISD::Constant
&&
2620 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2621 if (isAbsolute(C
.Op0
, TrueOp
, FalseOp
))
2622 return getAbsolute(DAG
, DL
, TrueOp
, C
.CCMask
& SystemZ::CCMASK_CMP_LT
);
2623 if (isAbsolute(C
.Op0
, FalseOp
, TrueOp
))
2624 return getAbsolute(DAG
, DL
, FalseOp
, C
.CCMask
& SystemZ::CCMASK_CMP_GT
);
2627 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2628 SDValue Ops
[] = {TrueOp
, FalseOp
, DAG
.getConstant(C
.CCValid
, DL
, MVT::i32
),
2629 DAG
.getConstant(C
.CCMask
, DL
, MVT::i32
), CCReg
};
2631 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, Op
.getValueType(), Ops
);
2634 SDValue
SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode
*Node
,
2635 SelectionDAG
&DAG
) const {
2637 const GlobalValue
*GV
= Node
->getGlobal();
2638 int64_t Offset
= Node
->getOffset();
2639 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2640 CodeModel::Model CM
= DAG
.getTarget().getCodeModel();
2643 if (Subtarget
.isPC32DBLSymbol(GV
, CM
)) {
2644 // Assign anchors at 1<<12 byte boundaries.
2645 uint64_t Anchor
= Offset
& ~uint64_t(0xfff);
2646 Result
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, Anchor
);
2647 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
2649 // The offset can be folded into the address if it is aligned to a halfword.
2651 if (Offset
!= 0 && (Offset
& 1) == 0) {
2652 SDValue Full
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, Anchor
+ Offset
);
2653 Result
= DAG
.getNode(SystemZISD::PCREL_OFFSET
, DL
, PtrVT
, Full
, Result
);
2657 Result
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0, SystemZII::MO_GOT
);
2658 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
2659 Result
= DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Result
,
2660 MachinePointerInfo::getGOT(DAG
.getMachineFunction()));
2663 // If there was a non-zero offset that we didn't fold, create an explicit
2666 Result
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Result
,
2667 DAG
.getConstant(Offset
, DL
, PtrVT
));
2672 SDValue
SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode
*Node
,
2675 SDValue GOTOffset
) const {
2677 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2678 SDValue Chain
= DAG
.getEntryNode();
2681 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
2682 SDValue GOT
= DAG
.getGLOBAL_OFFSET_TABLE(PtrVT
);
2683 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R12D
, GOT
, Glue
);
2684 Glue
= Chain
.getValue(1);
2685 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R2D
, GOTOffset
, Glue
);
2686 Glue
= Chain
.getValue(1);
2688 // The first call operand is the chain and the second is the TLS symbol.
2689 SmallVector
<SDValue
, 8> Ops
;
2690 Ops
.push_back(Chain
);
2691 Ops
.push_back(DAG
.getTargetGlobalAddress(Node
->getGlobal(), DL
,
2692 Node
->getValueType(0),
2695 // Add argument registers to the end of the list so that they are
2696 // known live into the call.
2697 Ops
.push_back(DAG
.getRegister(SystemZ::R2D
, PtrVT
));
2698 Ops
.push_back(DAG
.getRegister(SystemZ::R12D
, PtrVT
));
2700 // Add a register mask operand representing the call-preserved registers.
2701 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
2702 const uint32_t *Mask
=
2703 TRI
->getCallPreservedMask(DAG
.getMachineFunction(), CallingConv::C
);
2704 assert(Mask
&& "Missing call preserved mask for calling convention");
2705 Ops
.push_back(DAG
.getRegisterMask(Mask
));
2707 // Glue the call to the argument copies.
2708 Ops
.push_back(Glue
);
2711 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
2712 Chain
= DAG
.getNode(Opcode
, DL
, NodeTys
, Ops
);
2713 Glue
= Chain
.getValue(1);
2715 // Copy the return value from %r2.
2716 return DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R2D
, PtrVT
, Glue
);
2719 SDValue
SystemZTargetLowering::lowerThreadPointer(const SDLoc
&DL
,
2720 SelectionDAG
&DAG
) const {
2721 SDValue Chain
= DAG
.getEntryNode();
2722 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2724 // The high part of the thread pointer is in access register 0.
2725 SDValue TPHi
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::A0
, MVT::i32
);
2726 TPHi
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, PtrVT
, TPHi
);
2728 // The low part of the thread pointer is in access register 1.
2729 SDValue TPLo
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::A1
, MVT::i32
);
2730 TPLo
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, PtrVT
, TPLo
);
2732 // Merge them into a single 64-bit address.
2733 SDValue TPHiShifted
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, TPHi
,
2734 DAG
.getConstant(32, DL
, PtrVT
));
2735 return DAG
.getNode(ISD::OR
, DL
, PtrVT
, TPHiShifted
, TPLo
);
2738 SDValue
SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode
*Node
,
2739 SelectionDAG
&DAG
) const {
2740 if (DAG
.getTarget().useEmulatedTLS())
2741 return LowerToTLSEmulatedModel(Node
, DAG
);
2743 const GlobalValue
*GV
= Node
->getGlobal();
2744 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2745 TLSModel::Model model
= DAG
.getTarget().getTLSModel(GV
);
2747 SDValue TP
= lowerThreadPointer(DL
, DAG
);
2749 // Get the offset of GA from the thread pointer, based on the TLS model.
2752 case TLSModel::GeneralDynamic
: {
2753 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
2754 SystemZConstantPoolValue
*CPV
=
2755 SystemZConstantPoolValue::Create(GV
, SystemZCP::TLSGD
);
2757 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
2758 Offset
= DAG
.getLoad(
2759 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
2760 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
2762 // Call __tls_get_offset to retrieve the offset.
2763 Offset
= lowerTLSGetOffset(Node
, DAG
, SystemZISD::TLS_GDCALL
, Offset
);
2767 case TLSModel::LocalDynamic
: {
2768 // Load the GOT offset of the module ID.
2769 SystemZConstantPoolValue
*CPV
=
2770 SystemZConstantPoolValue::Create(GV
, SystemZCP::TLSLDM
);
2772 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
2773 Offset
= DAG
.getLoad(
2774 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
2775 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
2777 // Call __tls_get_offset to retrieve the module base offset.
2778 Offset
= lowerTLSGetOffset(Node
, DAG
, SystemZISD::TLS_LDCALL
, Offset
);
2780 // Note: The SystemZLDCleanupPass will remove redundant computations
2781 // of the module base offset. Count total number of local-dynamic
2782 // accesses to trigger execution of that pass.
2783 SystemZMachineFunctionInfo
* MFI
=
2784 DAG
.getMachineFunction().getInfo
<SystemZMachineFunctionInfo
>();
2785 MFI
->incNumLocalDynamicTLSAccesses();
2787 // Add the per-symbol offset.
2788 CPV
= SystemZConstantPoolValue::Create(GV
, SystemZCP::DTPOFF
);
2790 SDValue DTPOffset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
2791 DTPOffset
= DAG
.getLoad(
2792 PtrVT
, DL
, DAG
.getEntryNode(), DTPOffset
,
2793 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
2795 Offset
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Offset
, DTPOffset
);
2799 case TLSModel::InitialExec
: {
2800 // Load the offset from the GOT.
2801 Offset
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0,
2802 SystemZII::MO_INDNTPOFF
);
2803 Offset
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Offset
);
2805 DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
2806 MachinePointerInfo::getGOT(DAG
.getMachineFunction()));
2810 case TLSModel::LocalExec
: {
2811 // Force the offset into the constant pool and load it from there.
2812 SystemZConstantPoolValue
*CPV
=
2813 SystemZConstantPoolValue::Create(GV
, SystemZCP::NTPOFF
);
2815 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
2816 Offset
= DAG
.getLoad(
2817 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
2818 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
2823 // Add the base and offset together.
2824 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, TP
, Offset
);
2827 SDValue
SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode
*Node
,
2828 SelectionDAG
&DAG
) const {
2830 const BlockAddress
*BA
= Node
->getBlockAddress();
2831 int64_t Offset
= Node
->getOffset();
2832 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2834 SDValue Result
= DAG
.getTargetBlockAddress(BA
, PtrVT
, Offset
);
2835 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
2839 SDValue
SystemZTargetLowering::lowerJumpTable(JumpTableSDNode
*JT
,
2840 SelectionDAG
&DAG
) const {
2842 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2843 SDValue Result
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
2845 // Use LARL to load the address of the table.
2846 return DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
2849 SDValue
SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode
*CP
,
2850 SelectionDAG
&DAG
) const {
2852 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2855 if (CP
->isMachineConstantPoolEntry())
2856 Result
= DAG
.getTargetConstantPool(CP
->getMachineCPVal(), PtrVT
,
2857 CP
->getAlignment());
2859 Result
= DAG
.getTargetConstantPool(CP
->getConstVal(), PtrVT
,
2860 CP
->getAlignment(), CP
->getOffset());
2862 // Use LARL to load the address of the constant pool entry.
2863 return DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
2866 SDValue
SystemZTargetLowering::lowerFRAMEADDR(SDValue Op
,
2867 SelectionDAG
&DAG
) const {
2868 MachineFunction
&MF
= DAG
.getMachineFunction();
2869 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2870 MFI
.setFrameAddressIsTaken(true);
2873 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
2874 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2876 // If the back chain frame index has not been allocated yet, do so.
2877 SystemZMachineFunctionInfo
*FI
= MF
.getInfo
<SystemZMachineFunctionInfo
>();
2878 int BackChainIdx
= FI
->getFramePointerSaveIndex();
2879 if (!BackChainIdx
) {
2880 // By definition, the frame address is the address of the back chain.
2881 BackChainIdx
= MFI
.CreateFixedObject(8, -SystemZMC::CallFrameSize
, false);
2882 FI
->setFramePointerSaveIndex(BackChainIdx
);
2884 SDValue BackChain
= DAG
.getFrameIndex(BackChainIdx
, PtrVT
);
2886 // FIXME The frontend should detect this case.
2888 report_fatal_error("Unsupported stack frame traversal count");
2894 SDValue
SystemZTargetLowering::lowerRETURNADDR(SDValue Op
,
2895 SelectionDAG
&DAG
) const {
2896 MachineFunction
&MF
= DAG
.getMachineFunction();
2897 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2898 MFI
.setReturnAddressIsTaken(true);
2900 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
2904 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
2905 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2907 // FIXME The frontend should detect this case.
2909 report_fatal_error("Unsupported stack frame traversal count");
2912 // Return R14D, which has the return address. Mark it an implicit live-in.
2913 unsigned LinkReg
= MF
.addLiveIn(SystemZ::R14D
, &SystemZ::GR64BitRegClass
);
2914 return DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, LinkReg
, PtrVT
);
2917 SDValue
SystemZTargetLowering::lowerBITCAST(SDValue Op
,
2918 SelectionDAG
&DAG
) const {
2920 SDValue In
= Op
.getOperand(0);
2921 EVT InVT
= In
.getValueType();
2922 EVT ResVT
= Op
.getValueType();
2924 // Convert loads directly. This is normally done by DAGCombiner,
2925 // but we need this case for bitcasts that are created during lowering
2926 // and which are then lowered themselves.
2927 if (auto *LoadN
= dyn_cast
<LoadSDNode
>(In
))
2928 if (ISD::isNormalLoad(LoadN
)) {
2929 SDValue NewLoad
= DAG
.getLoad(ResVT
, DL
, LoadN
->getChain(),
2930 LoadN
->getBasePtr(), LoadN
->getMemOperand());
2931 // Update the chain uses.
2932 DAG
.ReplaceAllUsesOfValueWith(SDValue(LoadN
, 1), NewLoad
.getValue(1));
2936 if (InVT
== MVT::i32
&& ResVT
== MVT::f32
) {
2938 if (Subtarget
.hasHighWord()) {
2939 SDNode
*U64
= DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
,
2941 In64
= DAG
.getTargetInsertSubreg(SystemZ::subreg_h32
, DL
,
2942 MVT::i64
, SDValue(U64
, 0), In
);
2944 In64
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, In
);
2945 In64
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, In64
,
2946 DAG
.getConstant(32, DL
, MVT::i64
));
2948 SDValue Out64
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::f64
, In64
);
2949 return DAG
.getTargetExtractSubreg(SystemZ::subreg_h32
,
2950 DL
, MVT::f32
, Out64
);
2952 if (InVT
== MVT::f32
&& ResVT
== MVT::i32
) {
2953 SDNode
*U64
= DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MVT::f64
);
2954 SDValue In64
= DAG
.getTargetInsertSubreg(SystemZ::subreg_h32
, DL
,
2955 MVT::f64
, SDValue(U64
, 0), In
);
2956 SDValue Out64
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i64
, In64
);
2957 if (Subtarget
.hasHighWord())
2958 return DAG
.getTargetExtractSubreg(SystemZ::subreg_h32
, DL
,
2960 SDValue Shift
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Out64
,
2961 DAG
.getConstant(32, DL
, MVT::i64
));
2962 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Shift
);
2964 llvm_unreachable("Unexpected bitcast combination");
2967 SDValue
SystemZTargetLowering::lowerVASTART(SDValue Op
,
2968 SelectionDAG
&DAG
) const {
2969 MachineFunction
&MF
= DAG
.getMachineFunction();
2970 SystemZMachineFunctionInfo
*FuncInfo
=
2971 MF
.getInfo
<SystemZMachineFunctionInfo
>();
2972 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2974 SDValue Chain
= Op
.getOperand(0);
2975 SDValue Addr
= Op
.getOperand(1);
2976 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
2979 // The initial values of each field.
2980 const unsigned NumFields
= 4;
2981 SDValue Fields
[NumFields
] = {
2982 DAG
.getConstant(FuncInfo
->getVarArgsFirstGPR(), DL
, PtrVT
),
2983 DAG
.getConstant(FuncInfo
->getVarArgsFirstFPR(), DL
, PtrVT
),
2984 DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
),
2985 DAG
.getFrameIndex(FuncInfo
->getRegSaveFrameIndex(), PtrVT
)
2988 // Store each field into its respective slot.
2989 SDValue MemOps
[NumFields
];
2990 unsigned Offset
= 0;
2991 for (unsigned I
= 0; I
< NumFields
; ++I
) {
2992 SDValue FieldAddr
= Addr
;
2994 FieldAddr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, FieldAddr
,
2995 DAG
.getIntPtrConstant(Offset
, DL
));
2996 MemOps
[I
] = DAG
.getStore(Chain
, DL
, Fields
[I
], FieldAddr
,
2997 MachinePointerInfo(SV
, Offset
));
3000 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOps
);
3003 SDValue
SystemZTargetLowering::lowerVACOPY(SDValue Op
,
3004 SelectionDAG
&DAG
) const {
3005 SDValue Chain
= Op
.getOperand(0);
3006 SDValue DstPtr
= Op
.getOperand(1);
3007 SDValue SrcPtr
= Op
.getOperand(2);
3008 const Value
*DstSV
= cast
<SrcValueSDNode
>(Op
.getOperand(3))->getValue();
3009 const Value
*SrcSV
= cast
<SrcValueSDNode
>(Op
.getOperand(4))->getValue();
3012 return DAG
.getMemcpy(Chain
, DL
, DstPtr
, SrcPtr
, DAG
.getIntPtrConstant(32, DL
),
3013 /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
3014 /*isTailCall*/false,
3015 MachinePointerInfo(DstSV
), MachinePointerInfo(SrcSV
));
3018 SDValue
SystemZTargetLowering::
3019 lowerDYNAMIC_STACKALLOC(SDValue Op
, SelectionDAG
&DAG
) const {
3020 const TargetFrameLowering
*TFI
= Subtarget
.getFrameLowering();
3021 MachineFunction
&MF
= DAG
.getMachineFunction();
3022 bool RealignOpt
= !MF
.getFunction().hasFnAttribute("no-realign-stack");
3023 bool StoreBackchain
= MF
.getFunction().hasFnAttribute("backchain");
3025 SDValue Chain
= Op
.getOperand(0);
3026 SDValue Size
= Op
.getOperand(1);
3027 SDValue Align
= Op
.getOperand(2);
3030 // If user has set the no alignment function attribute, ignore
3031 // alloca alignments.
3032 uint64_t AlignVal
= (RealignOpt
?
3033 dyn_cast
<ConstantSDNode
>(Align
)->getZExtValue() : 0);
3035 uint64_t StackAlign
= TFI
->getStackAlignment();
3036 uint64_t RequiredAlign
= std::max(AlignVal
, StackAlign
);
3037 uint64_t ExtraAlignSpace
= RequiredAlign
- StackAlign
;
3039 unsigned SPReg
= getStackPointerRegisterToSaveRestore();
3040 SDValue NeededSpace
= Size
;
3042 // Get a reference to the stack pointer.
3043 SDValue OldSP
= DAG
.getCopyFromReg(Chain
, DL
, SPReg
, MVT::i64
);
3045 // If we need a backchain, save it now.
3048 Backchain
= DAG
.getLoad(MVT::i64
, DL
, Chain
, OldSP
, MachinePointerInfo());
3050 // Add extra space for alignment if needed.
3051 if (ExtraAlignSpace
)
3052 NeededSpace
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, NeededSpace
,
3053 DAG
.getConstant(ExtraAlignSpace
, DL
, MVT::i64
));
3055 // Get the new stack pointer value.
3056 SDValue NewSP
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, OldSP
, NeededSpace
);
3058 // Copy the new stack pointer back.
3059 Chain
= DAG
.getCopyToReg(Chain
, DL
, SPReg
, NewSP
);
3061 // The allocated data lives above the 160 bytes allocated for the standard
3062 // frame, plus any outgoing stack arguments. We don't know how much that
3063 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3064 SDValue ArgAdjust
= DAG
.getNode(SystemZISD::ADJDYNALLOC
, DL
, MVT::i64
);
3065 SDValue Result
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, NewSP
, ArgAdjust
);
3067 // Dynamically realign if needed.
3068 if (RequiredAlign
> StackAlign
) {
3070 DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Result
,
3071 DAG
.getConstant(ExtraAlignSpace
, DL
, MVT::i64
));
3073 DAG
.getNode(ISD::AND
, DL
, MVT::i64
, Result
,
3074 DAG
.getConstant(~(RequiredAlign
- 1), DL
, MVT::i64
));
3078 Chain
= DAG
.getStore(Chain
, DL
, Backchain
, NewSP
, MachinePointerInfo());
3080 SDValue Ops
[2] = { Result
, Chain
};
3081 return DAG
.getMergeValues(Ops
, DL
);
3084 SDValue
SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3085 SDValue Op
, SelectionDAG
&DAG
) const {
3088 return DAG
.getNode(SystemZISD::ADJDYNALLOC
, DL
, MVT::i64
);
3091 SDValue
SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op
,
3092 SelectionDAG
&DAG
) const {
3093 EVT VT
= Op
.getValueType();
3097 // Just do a normal 64-bit multiplication and extract the results.
3098 // We define this so that it can be used for constant division.
3099 lowerMUL_LOHI32(DAG
, DL
, ISD::SIGN_EXTEND
, Op
.getOperand(0),
3100 Op
.getOperand(1), Ops
[1], Ops
[0]);
3101 else if (Subtarget
.hasMiscellaneousExtensions2())
3102 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3103 // the high result in the even register. ISD::SMUL_LOHI is defined to
3104 // return the low half first, so the results are in reverse order.
3105 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::SMUL_LOHI
,
3106 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3108 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3110 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3112 // but using the fact that the upper halves are either all zeros
3115 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3117 // and grouping the right terms together since they are quicker than the
3120 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3121 SDValue C63
= DAG
.getConstant(63, DL
, MVT::i64
);
3122 SDValue LL
= Op
.getOperand(0);
3123 SDValue RL
= Op
.getOperand(1);
3124 SDValue LH
= DAG
.getNode(ISD::SRA
, DL
, VT
, LL
, C63
);
3125 SDValue RH
= DAG
.getNode(ISD::SRA
, DL
, VT
, RL
, C63
);
3126 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3127 // the high result in the even register. ISD::SMUL_LOHI is defined to
3128 // return the low half first, so the results are in reverse order.
3129 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UMUL_LOHI
,
3130 LL
, RL
, Ops
[1], Ops
[0]);
3131 SDValue NegLLTimesRH
= DAG
.getNode(ISD::AND
, DL
, VT
, LL
, RH
);
3132 SDValue NegLHTimesRL
= DAG
.getNode(ISD::AND
, DL
, VT
, LH
, RL
);
3133 SDValue NegSum
= DAG
.getNode(ISD::ADD
, DL
, VT
, NegLLTimesRH
, NegLHTimesRL
);
3134 Ops
[1] = DAG
.getNode(ISD::SUB
, DL
, VT
, Ops
[1], NegSum
);
3136 return DAG
.getMergeValues(Ops
, DL
);
3139 SDValue
SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op
,
3140 SelectionDAG
&DAG
) const {
3141 EVT VT
= Op
.getValueType();
3145 // Just do a normal 64-bit multiplication and extract the results.
3146 // We define this so that it can be used for constant division.
3147 lowerMUL_LOHI32(DAG
, DL
, ISD::ZERO_EXTEND
, Op
.getOperand(0),
3148 Op
.getOperand(1), Ops
[1], Ops
[0]);
3150 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3151 // the high result in the even register. ISD::UMUL_LOHI is defined to
3152 // return the low half first, so the results are in reverse order.
3153 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UMUL_LOHI
,
3154 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3155 return DAG
.getMergeValues(Ops
, DL
);
3158 SDValue
SystemZTargetLowering::lowerSDIVREM(SDValue Op
,
3159 SelectionDAG
&DAG
) const {
3160 SDValue Op0
= Op
.getOperand(0);
3161 SDValue Op1
= Op
.getOperand(1);
3162 EVT VT
= Op
.getValueType();
3165 // We use DSGF for 32-bit division. This means the first operand must
3166 // always be 64-bit, and the second operand should be 32-bit whenever
3167 // that is possible, to improve performance.
3169 Op0
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, Op0
);
3170 else if (DAG
.ComputeNumSignBits(Op1
) > 32)
3171 Op1
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Op1
);
3173 // DSG(F) returns the remainder in the even register and the
3174 // quotient in the odd register.
3176 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::SDIVREM
, Op0
, Op1
, Ops
[1], Ops
[0]);
3177 return DAG
.getMergeValues(Ops
, DL
);
3180 SDValue
SystemZTargetLowering::lowerUDIVREM(SDValue Op
,
3181 SelectionDAG
&DAG
) const {
3182 EVT VT
= Op
.getValueType();
3185 // DL(G) returns the remainder in the even register and the
3186 // quotient in the odd register.
3188 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UDIVREM
,
3189 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3190 return DAG
.getMergeValues(Ops
, DL
);
3193 SDValue
SystemZTargetLowering::lowerOR(SDValue Op
, SelectionDAG
&DAG
) const {
3194 assert(Op
.getValueType() == MVT::i64
&& "Should be 64-bit operation");
3196 // Get the known-zero masks for each operand.
3197 SDValue Ops
[] = {Op
.getOperand(0), Op
.getOperand(1)};
3198 KnownBits Known
[2] = {DAG
.computeKnownBits(Ops
[0]),
3199 DAG
.computeKnownBits(Ops
[1])};
3201 // See if the upper 32 bits of one operand and the lower 32 bits of the
3202 // other are known zero. They are the low and high operands respectively.
3203 uint64_t Masks
[] = { Known
[0].Zero
.getZExtValue(),
3204 Known
[1].Zero
.getZExtValue() };
3206 if ((Masks
[0] >> 32) == 0xffffffff && uint32_t(Masks
[1]) == 0xffffffff)
3208 else if ((Masks
[1] >> 32) == 0xffffffff && uint32_t(Masks
[0]) == 0xffffffff)
3213 SDValue LowOp
= Ops
[Low
];
3214 SDValue HighOp
= Ops
[High
];
3216 // If the high part is a constant, we're better off using IILH.
3217 if (HighOp
.getOpcode() == ISD::Constant
)
3220 // If the low part is a constant that is outside the range of LHI,
3221 // then we're better off using IILF.
3222 if (LowOp
.getOpcode() == ISD::Constant
) {
3223 int64_t Value
= int32_t(cast
<ConstantSDNode
>(LowOp
)->getZExtValue());
3224 if (!isInt
<16>(Value
))
3228 // Check whether the high part is an AND that doesn't change the
3229 // high 32 bits and just masks out low bits. We can skip it if so.
3230 if (HighOp
.getOpcode() == ISD::AND
&&
3231 HighOp
.getOperand(1).getOpcode() == ISD::Constant
) {
3232 SDValue HighOp0
= HighOp
.getOperand(0);
3233 uint64_t Mask
= cast
<ConstantSDNode
>(HighOp
.getOperand(1))->getZExtValue();
3234 if (DAG
.MaskedValueIsZero(HighOp0
, APInt(64, ~(Mask
| 0xffffffff))))
3238 // Take advantage of the fact that all GR32 operations only change the
3239 // low 32 bits by truncating Low to an i32 and inserting it directly
3240 // using a subreg. The interesting cases are those where the truncation
3243 SDValue Low32
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, LowOp
);
3244 return DAG
.getTargetInsertSubreg(SystemZ::subreg_l32
, DL
,
3245 MVT::i64
, HighOp
, Low32
);
3248 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3249 SDValue
SystemZTargetLowering::lowerXALUO(SDValue Op
,
3250 SelectionDAG
&DAG
) const {
3251 SDNode
*N
= Op
.getNode();
3252 SDValue LHS
= N
->getOperand(0);
3253 SDValue RHS
= N
->getOperand(1);
3255 unsigned BaseOp
= 0;
3256 unsigned CCValid
= 0;
3257 unsigned CCMask
= 0;
3259 switch (Op
.getOpcode()) {
3260 default: llvm_unreachable("Unknown instruction!");
3262 BaseOp
= SystemZISD::SADDO
;
3263 CCValid
= SystemZ::CCMASK_ARITH
;
3264 CCMask
= SystemZ::CCMASK_ARITH_OVERFLOW
;
3267 BaseOp
= SystemZISD::SSUBO
;
3268 CCValid
= SystemZ::CCMASK_ARITH
;
3269 CCMask
= SystemZ::CCMASK_ARITH_OVERFLOW
;
3272 BaseOp
= SystemZISD::UADDO
;
3273 CCValid
= SystemZ::CCMASK_LOGICAL
;
3274 CCMask
= SystemZ::CCMASK_LOGICAL_CARRY
;
3277 BaseOp
= SystemZISD::USUBO
;
3278 CCValid
= SystemZ::CCMASK_LOGICAL
;
3279 CCMask
= SystemZ::CCMASK_LOGICAL_BORROW
;
3283 SDVTList VTs
= DAG
.getVTList(N
->getValueType(0), MVT::i32
);
3284 SDValue Result
= DAG
.getNode(BaseOp
, DL
, VTs
, LHS
, RHS
);
3286 SDValue SetCC
= emitSETCC(DAG
, DL
, Result
.getValue(1), CCValid
, CCMask
);
3287 if (N
->getValueType(1) == MVT::i1
)
3288 SetCC
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, SetCC
);
3290 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, N
->getVTList(), Result
, SetCC
);
3293 // Lower ADDCARRY/SUBCARRY nodes.
3294 SDValue
SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op
,
3295 SelectionDAG
&DAG
) const {
3297 SDNode
*N
= Op
.getNode();
3298 MVT VT
= N
->getSimpleValueType(0);
3300 // Let legalize expand this if it isn't a legal type yet.
3301 if (!DAG
.getTargetLoweringInfo().isTypeLegal(VT
))
3304 SDValue LHS
= N
->getOperand(0);
3305 SDValue RHS
= N
->getOperand(1);
3306 SDValue Carry
= Op
.getOperand(2);
3308 unsigned BaseOp
= 0;
3309 unsigned CCValid
= 0;
3310 unsigned CCMask
= 0;
3312 switch (Op
.getOpcode()) {
3313 default: llvm_unreachable("Unknown instruction!");
3315 BaseOp
= SystemZISD::ADDCARRY
;
3316 CCValid
= SystemZ::CCMASK_LOGICAL
;
3317 CCMask
= SystemZ::CCMASK_LOGICAL_CARRY
;
3320 BaseOp
= SystemZISD::SUBCARRY
;
3321 CCValid
= SystemZ::CCMASK_LOGICAL
;
3322 CCMask
= SystemZ::CCMASK_LOGICAL_BORROW
;
3326 // Set the condition code from the carry flag.
3327 Carry
= DAG
.getNode(SystemZISD::GET_CCMASK
, DL
, MVT::i32
, Carry
,
3328 DAG
.getConstant(CCValid
, DL
, MVT::i32
),
3329 DAG
.getConstant(CCMask
, DL
, MVT::i32
));
3331 SDVTList VTs
= DAG
.getVTList(VT
, MVT::i32
);
3332 SDValue Result
= DAG
.getNode(BaseOp
, DL
, VTs
, LHS
, RHS
, Carry
);
3334 SDValue SetCC
= emitSETCC(DAG
, DL
, Result
.getValue(1), CCValid
, CCMask
);
3335 if (N
->getValueType(1) == MVT::i1
)
3336 SetCC
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, SetCC
);
3338 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, N
->getVTList(), Result
, SetCC
);
3341 SDValue
SystemZTargetLowering::lowerCTPOP(SDValue Op
,
3342 SelectionDAG
&DAG
) const {
3343 EVT VT
= Op
.getValueType();
3345 Op
= Op
.getOperand(0);
3347 // Handle vector types via VPOPCT.
3348 if (VT
.isVector()) {
3349 Op
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v16i8
, Op
);
3350 Op
= DAG
.getNode(SystemZISD::POPCNT
, DL
, MVT::v16i8
, Op
);
3351 switch (VT
.getScalarSizeInBits()) {
3355 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
3356 SDValue Shift
= DAG
.getConstant(8, DL
, MVT::i32
);
3357 SDValue Tmp
= DAG
.getNode(SystemZISD::VSHL_BY_SCALAR
, DL
, VT
, Op
, Shift
);
3358 Op
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op
, Tmp
);
3359 Op
= DAG
.getNode(SystemZISD::VSRL_BY_SCALAR
, DL
, VT
, Op
, Shift
);
3363 SDValue Tmp
= DAG
.getSplatBuildVector(MVT::v16i8
, DL
,
3364 DAG
.getConstant(0, DL
, MVT::i32
));
3365 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, VT
, Op
, Tmp
);
3369 SDValue Tmp
= DAG
.getSplatBuildVector(MVT::v16i8
, DL
,
3370 DAG
.getConstant(0, DL
, MVT::i32
));
3371 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, MVT::v4i32
, Op
, Tmp
);
3372 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, VT
, Op
, Tmp
);
3376 llvm_unreachable("Unexpected type");
3381 // Get the known-zero mask for the operand.
3382 KnownBits Known
= DAG
.computeKnownBits(Op
);
3383 unsigned NumSignificantBits
= (~Known
.Zero
).getActiveBits();
3384 if (NumSignificantBits
== 0)
3385 return DAG
.getConstant(0, DL
, VT
);
3387 // Skip known-zero high parts of the operand.
3388 int64_t OrigBitSize
= VT
.getSizeInBits();
3389 int64_t BitSize
= (int64_t)1 << Log2_32_Ceil(NumSignificantBits
);
3390 BitSize
= std::min(BitSize
, OrigBitSize
);
3392 // The POPCNT instruction counts the number of bits in each byte.
3393 Op
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
);
3394 Op
= DAG
.getNode(SystemZISD::POPCNT
, DL
, MVT::i64
, Op
);
3395 Op
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Op
);
3397 // Add up per-byte counts in a binary tree. All bits of Op at
3398 // position larger than BitSize remain zero throughout.
3399 for (int64_t I
= BitSize
/ 2; I
>= 8; I
= I
/ 2) {
3400 SDValue Tmp
= DAG
.getNode(ISD::SHL
, DL
, VT
, Op
, DAG
.getConstant(I
, DL
, VT
));
3401 if (BitSize
!= OrigBitSize
)
3402 Tmp
= DAG
.getNode(ISD::AND
, DL
, VT
, Tmp
,
3403 DAG
.getConstant(((uint64_t)1 << BitSize
) - 1, DL
, VT
));
3404 Op
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op
, Tmp
);
3407 // Extract overall result from high byte.
3409 Op
= DAG
.getNode(ISD::SRL
, DL
, VT
, Op
,
3410 DAG
.getConstant(BitSize
- 8, DL
, VT
));
3415 SDValue
SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op
,
3416 SelectionDAG
&DAG
) const {
3418 AtomicOrdering FenceOrdering
= static_cast<AtomicOrdering
>(
3419 cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue());
3420 SyncScope::ID FenceSSID
= static_cast<SyncScope::ID
>(
3421 cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue());
3423 // The only fence that needs an instruction is a sequentially-consistent
3424 // cross-thread fence.
3425 if (FenceOrdering
== AtomicOrdering::SequentiallyConsistent
&&
3426 FenceSSID
== SyncScope::System
) {
3427 return SDValue(DAG
.getMachineNode(SystemZ::Serialize
, DL
, MVT::Other
,
3432 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3433 return DAG
.getNode(SystemZISD::MEMBARRIER
, DL
, MVT::Other
, Op
.getOperand(0));
3436 // Op is an atomic load. Lower it into a normal volatile load.
3437 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op
,
3438 SelectionDAG
&DAG
) const {
3439 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3440 return DAG
.getExtLoad(ISD::EXTLOAD
, SDLoc(Op
), Op
.getValueType(),
3441 Node
->getChain(), Node
->getBasePtr(),
3442 Node
->getMemoryVT(), Node
->getMemOperand());
3445 // Op is an atomic store. Lower it into a normal volatile store.
3446 SDValue
SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op
,
3447 SelectionDAG
&DAG
) const {
3448 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3449 SDValue Chain
= DAG
.getTruncStore(Node
->getChain(), SDLoc(Op
), Node
->getVal(),
3450 Node
->getBasePtr(), Node
->getMemoryVT(),
3451 Node
->getMemOperand());
3452 // We have to enforce sequential consistency by performing a
3453 // serialization operation after the store.
3454 if (Node
->getOrdering() == AtomicOrdering::SequentiallyConsistent
)
3455 Chain
= SDValue(DAG
.getMachineNode(SystemZ::Serialize
, SDLoc(Op
),
3456 MVT::Other
, Chain
), 0);
3460 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3461 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3462 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op
,
3464 unsigned Opcode
) const {
3465 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3467 // 32-bit operations need no code outside the main loop.
3468 EVT NarrowVT
= Node
->getMemoryVT();
3469 EVT WideVT
= MVT::i32
;
3470 if (NarrowVT
== WideVT
)
3473 int64_t BitSize
= NarrowVT
.getSizeInBits();
3474 SDValue ChainIn
= Node
->getChain();
3475 SDValue Addr
= Node
->getBasePtr();
3476 SDValue Src2
= Node
->getVal();
3477 MachineMemOperand
*MMO
= Node
->getMemOperand();
3479 EVT PtrVT
= Addr
.getValueType();
3481 // Convert atomic subtracts of constants into additions.
3482 if (Opcode
== SystemZISD::ATOMIC_LOADW_SUB
)
3483 if (auto *Const
= dyn_cast
<ConstantSDNode
>(Src2
)) {
3484 Opcode
= SystemZISD::ATOMIC_LOADW_ADD
;
3485 Src2
= DAG
.getConstant(-Const
->getSExtValue(), DL
, Src2
.getValueType());
3488 // Get the address of the containing word.
3489 SDValue AlignedAddr
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, Addr
,
3490 DAG
.getConstant(-4, DL
, PtrVT
));
3492 // Get the number of bits that the word must be rotated left in order
3493 // to bring the field to the top bits of a GR32.
3494 SDValue BitShift
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, Addr
,
3495 DAG
.getConstant(3, DL
, PtrVT
));
3496 BitShift
= DAG
.getNode(ISD::TRUNCATE
, DL
, WideVT
, BitShift
);
3498 // Get the complementing shift amount, for rotating a field in the top
3499 // bits back to its proper position.
3500 SDValue NegBitShift
= DAG
.getNode(ISD::SUB
, DL
, WideVT
,
3501 DAG
.getConstant(0, DL
, WideVT
), BitShift
);
3503 // Extend the source operand to 32 bits and prepare it for the inner loop.
3504 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3505 // operations require the source to be shifted in advance. (This shift
3506 // can be folded if the source is constant.) For AND and NAND, the lower
3507 // bits must be set, while for other opcodes they should be left clear.
3508 if (Opcode
!= SystemZISD::ATOMIC_SWAPW
)
3509 Src2
= DAG
.getNode(ISD::SHL
, DL
, WideVT
, Src2
,
3510 DAG
.getConstant(32 - BitSize
, DL
, WideVT
));
3511 if (Opcode
== SystemZISD::ATOMIC_LOADW_AND
||
3512 Opcode
== SystemZISD::ATOMIC_LOADW_NAND
)
3513 Src2
= DAG
.getNode(ISD::OR
, DL
, WideVT
, Src2
,
3514 DAG
.getConstant(uint32_t(-1) >> BitSize
, DL
, WideVT
));
3516 // Construct the ATOMIC_LOADW_* node.
3517 SDVTList VTList
= DAG
.getVTList(WideVT
, MVT::Other
);
3518 SDValue Ops
[] = { ChainIn
, AlignedAddr
, Src2
, BitShift
, NegBitShift
,
3519 DAG
.getConstant(BitSize
, DL
, WideVT
) };
3520 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(Opcode
, DL
, VTList
, Ops
,
3523 // Rotate the result of the final CS so that the field is in the lower
3524 // bits of a GR32, then truncate it.
3525 SDValue ResultShift
= DAG
.getNode(ISD::ADD
, DL
, WideVT
, BitShift
,
3526 DAG
.getConstant(BitSize
, DL
, WideVT
));
3527 SDValue Result
= DAG
.getNode(ISD::ROTL
, DL
, WideVT
, AtomicOp
, ResultShift
);
3529 SDValue RetOps
[2] = { Result
, AtomicOp
.getValue(1) };
3530 return DAG
.getMergeValues(RetOps
, DL
);
3533 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
3534 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3535 // operations into additions.
3536 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op
,
3537 SelectionDAG
&DAG
) const {
3538 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3539 EVT MemVT
= Node
->getMemoryVT();
3540 if (MemVT
== MVT::i32
|| MemVT
== MVT::i64
) {
3541 // A full-width operation.
3542 assert(Op
.getValueType() == MemVT
&& "Mismatched VTs");
3543 SDValue Src2
= Node
->getVal();
3547 if (auto *Op2
= dyn_cast
<ConstantSDNode
>(Src2
)) {
3548 // Use an addition if the operand is constant and either LAA(G) is
3549 // available or the negative value is in the range of A(G)FHI.
3550 int64_t Value
= (-Op2
->getAPIntValue()).getSExtValue();
3551 if (isInt
<32>(Value
) || Subtarget
.hasInterlockedAccess1())
3552 NegSrc2
= DAG
.getConstant(Value
, DL
, MemVT
);
3553 } else if (Subtarget
.hasInterlockedAccess1())
3554 // Use LAA(G) if available.
3555 NegSrc2
= DAG
.getNode(ISD::SUB
, DL
, MemVT
, DAG
.getConstant(0, DL
, MemVT
),
3558 if (NegSrc2
.getNode())
3559 return DAG
.getAtomic(ISD::ATOMIC_LOAD_ADD
, DL
, MemVT
,
3560 Node
->getChain(), Node
->getBasePtr(), NegSrc2
,
3561 Node
->getMemOperand());
3563 // Use the node as-is.
3567 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_SUB
);
3570 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
3571 SDValue
SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op
,
3572 SelectionDAG
&DAG
) const {
3573 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3574 SDValue ChainIn
= Node
->getOperand(0);
3575 SDValue Addr
= Node
->getOperand(1);
3576 SDValue CmpVal
= Node
->getOperand(2);
3577 SDValue SwapVal
= Node
->getOperand(3);
3578 MachineMemOperand
*MMO
= Node
->getMemOperand();
3581 // We have native support for 32-bit and 64-bit compare and swap, but we
3582 // still need to expand extracting the "success" result from the CC.
3583 EVT NarrowVT
= Node
->getMemoryVT();
3584 EVT WideVT
= NarrowVT
== MVT::i64
? MVT::i64
: MVT::i32
;
3585 if (NarrowVT
== WideVT
) {
3586 SDVTList Tys
= DAG
.getVTList(WideVT
, MVT::i32
, MVT::Other
);
3587 SDValue Ops
[] = { ChainIn
, Addr
, CmpVal
, SwapVal
};
3588 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP
,
3589 DL
, Tys
, Ops
, NarrowVT
, MMO
);
3590 SDValue Success
= emitSETCC(DAG
, DL
, AtomicOp
.getValue(1),
3591 SystemZ::CCMASK_CS
, SystemZ::CCMASK_CS_EQ
);
3593 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(0), AtomicOp
.getValue(0));
3594 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(1), Success
);
3595 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(2), AtomicOp
.getValue(2));
3599 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
3600 // via a fullword ATOMIC_CMP_SWAPW operation.
3601 int64_t BitSize
= NarrowVT
.getSizeInBits();
3602 EVT PtrVT
= Addr
.getValueType();
3604 // Get the address of the containing word.
3605 SDValue AlignedAddr
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, Addr
,
3606 DAG
.getConstant(-4, DL
, PtrVT
));
3608 // Get the number of bits that the word must be rotated left in order
3609 // to bring the field to the top bits of a GR32.
3610 SDValue BitShift
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, Addr
,
3611 DAG
.getConstant(3, DL
, PtrVT
));
3612 BitShift
= DAG
.getNode(ISD::TRUNCATE
, DL
, WideVT
, BitShift
);
3614 // Get the complementing shift amount, for rotating a field in the top
3615 // bits back to its proper position.
3616 SDValue NegBitShift
= DAG
.getNode(ISD::SUB
, DL
, WideVT
,
3617 DAG
.getConstant(0, DL
, WideVT
), BitShift
);
3619 // Construct the ATOMIC_CMP_SWAPW node.
3620 SDVTList VTList
= DAG
.getVTList(WideVT
, MVT::i32
, MVT::Other
);
3621 SDValue Ops
[] = { ChainIn
, AlignedAddr
, CmpVal
, SwapVal
, BitShift
,
3622 NegBitShift
, DAG
.getConstant(BitSize
, DL
, WideVT
) };
3623 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW
, DL
,
3624 VTList
, Ops
, NarrowVT
, MMO
);
3625 SDValue Success
= emitSETCC(DAG
, DL
, AtomicOp
.getValue(1),
3626 SystemZ::CCMASK_ICMP
, SystemZ::CCMASK_CMP_EQ
);
3628 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(0), AtomicOp
.getValue(0));
3629 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(1), Success
);
3630 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(2), AtomicOp
.getValue(2));
3634 SDValue
SystemZTargetLowering::lowerSTACKSAVE(SDValue Op
,
3635 SelectionDAG
&DAG
) const {
3636 MachineFunction
&MF
= DAG
.getMachineFunction();
3637 MF
.getInfo
<SystemZMachineFunctionInfo
>()->setManipulatesSP(true);
3638 return DAG
.getCopyFromReg(Op
.getOperand(0), SDLoc(Op
),
3639 SystemZ::R15D
, Op
.getValueType());
3642 SDValue
SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op
,
3643 SelectionDAG
&DAG
) const {
3644 MachineFunction
&MF
= DAG
.getMachineFunction();
3645 MF
.getInfo
<SystemZMachineFunctionInfo
>()->setManipulatesSP(true);
3646 bool StoreBackchain
= MF
.getFunction().hasFnAttribute("backchain");
3648 SDValue Chain
= Op
.getOperand(0);
3649 SDValue NewSP
= Op
.getOperand(1);
3653 if (StoreBackchain
) {
3654 SDValue OldSP
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R15D
, MVT::i64
);
3655 Backchain
= DAG
.getLoad(MVT::i64
, DL
, Chain
, OldSP
, MachinePointerInfo());
3658 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R15D
, NewSP
);
3661 Chain
= DAG
.getStore(Chain
, DL
, Backchain
, NewSP
, MachinePointerInfo());
3666 SDValue
SystemZTargetLowering::lowerPREFETCH(SDValue Op
,
3667 SelectionDAG
&DAG
) const {
3668 bool IsData
= cast
<ConstantSDNode
>(Op
.getOperand(4))->getZExtValue();
3670 // Just preserve the chain.
3671 return Op
.getOperand(0);
3674 bool IsWrite
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue();
3675 unsigned Code
= IsWrite
? SystemZ::PFD_WRITE
: SystemZ::PFD_READ
;
3676 auto *Node
= cast
<MemIntrinsicSDNode
>(Op
.getNode());
3679 DAG
.getConstant(Code
, DL
, MVT::i32
),
3682 return DAG
.getMemIntrinsicNode(SystemZISD::PREFETCH
, DL
,
3683 Node
->getVTList(), Ops
,
3684 Node
->getMemoryVT(), Node
->getMemOperand());
3687 // Convert condition code in CCReg to an i32 value.
3688 static SDValue
getCCResult(SelectionDAG
&DAG
, SDValue CCReg
) {
3690 SDValue IPM
= DAG
.getNode(SystemZISD::IPM
, DL
, MVT::i32
, CCReg
);
3691 return DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, IPM
,
3692 DAG
.getConstant(SystemZ::IPM_CC
, DL
, MVT::i32
));
3696 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op
,
3697 SelectionDAG
&DAG
) const {
3698 unsigned Opcode
, CCValid
;
3699 if (isIntrinsicWithCCAndChain(Op
, Opcode
, CCValid
)) {
3700 assert(Op
->getNumValues() == 2 && "Expected only CC result and chain");
3701 SDNode
*Node
= emitIntrinsicWithCCAndChain(DAG
, Op
, Opcode
);
3702 SDValue CC
= getCCResult(DAG
, SDValue(Node
, 0));
3703 DAG
.ReplaceAllUsesOfValueWith(SDValue(Op
.getNode(), 0), CC
);
3711 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op
,
3712 SelectionDAG
&DAG
) const {
3713 unsigned Opcode
, CCValid
;
3714 if (isIntrinsicWithCC(Op
, Opcode
, CCValid
)) {
3715 SDNode
*Node
= emitIntrinsicWithCC(DAG
, Op
, Opcode
);
3716 if (Op
->getNumValues() == 1)
3717 return getCCResult(DAG
, SDValue(Node
, 0));
3718 assert(Op
->getNumValues() == 2 && "Expected a CC and non-CC result");
3719 return DAG
.getNode(ISD::MERGE_VALUES
, SDLoc(Op
), Op
->getVTList(),
3720 SDValue(Node
, 0), getCCResult(DAG
, SDValue(Node
, 1)));
3723 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
3725 case Intrinsic::thread_pointer
:
3726 return lowerThreadPointer(SDLoc(Op
), DAG
);
3728 case Intrinsic::s390_vpdi
:
3729 return DAG
.getNode(SystemZISD::PERMUTE_DWORDS
, SDLoc(Op
), Op
.getValueType(),
3730 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
3732 case Intrinsic::s390_vperm
:
3733 return DAG
.getNode(SystemZISD::PERMUTE
, SDLoc(Op
), Op
.getValueType(),
3734 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
3736 case Intrinsic::s390_vuphb
:
3737 case Intrinsic::s390_vuphh
:
3738 case Intrinsic::s390_vuphf
:
3739 return DAG
.getNode(SystemZISD::UNPACK_HIGH
, SDLoc(Op
), Op
.getValueType(),
3742 case Intrinsic::s390_vuplhb
:
3743 case Intrinsic::s390_vuplhh
:
3744 case Intrinsic::s390_vuplhf
:
3745 return DAG
.getNode(SystemZISD::UNPACKL_HIGH
, SDLoc(Op
), Op
.getValueType(),
3748 case Intrinsic::s390_vuplb
:
3749 case Intrinsic::s390_vuplhw
:
3750 case Intrinsic::s390_vuplf
:
3751 return DAG
.getNode(SystemZISD::UNPACK_LOW
, SDLoc(Op
), Op
.getValueType(),
3754 case Intrinsic::s390_vupllb
:
3755 case Intrinsic::s390_vupllh
:
3756 case Intrinsic::s390_vupllf
:
3757 return DAG
.getNode(SystemZISD::UNPACKL_LOW
, SDLoc(Op
), Op
.getValueType(),
3760 case Intrinsic::s390_vsumb
:
3761 case Intrinsic::s390_vsumh
:
3762 case Intrinsic::s390_vsumgh
:
3763 case Intrinsic::s390_vsumgf
:
3764 case Intrinsic::s390_vsumqf
:
3765 case Intrinsic::s390_vsumqg
:
3766 return DAG
.getNode(SystemZISD::VSUM
, SDLoc(Op
), Op
.getValueType(),
3767 Op
.getOperand(1), Op
.getOperand(2));
3774 // Says that SystemZISD operation Opcode can be used to perform the equivalent
3775 // of a VPERM with permute vector Bytes. If Opcode takes three operands,
3776 // Operand is the constant third operand, otherwise it is the number of
3777 // bytes in each element of the result.
3781 unsigned char Bytes
[SystemZ::VectorBytes
];
3785 static const Permute PermuteForms
[] = {
3787 { SystemZISD::MERGE_HIGH
, 8,
3788 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
3790 { SystemZISD::MERGE_HIGH
, 4,
3791 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
3793 { SystemZISD::MERGE_HIGH
, 2,
3794 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
3796 { SystemZISD::MERGE_HIGH
, 1,
3797 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
3799 { SystemZISD::MERGE_LOW
, 8,
3800 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
3802 { SystemZISD::MERGE_LOW
, 4,
3803 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
3805 { SystemZISD::MERGE_LOW
, 2,
3806 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
3808 { SystemZISD::MERGE_LOW
, 1,
3809 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
3811 { SystemZISD::PACK
, 4,
3812 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
3814 { SystemZISD::PACK
, 2,
3815 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
3817 { SystemZISD::PACK
, 1,
3818 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
3819 // VPDI V1, V2, 4 (low half of V1, high half of V2)
3820 { SystemZISD::PERMUTE_DWORDS
, 4,
3821 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
3822 // VPDI V1, V2, 1 (high half of V1, low half of V2)
3823 { SystemZISD::PERMUTE_DWORDS
, 1,
3824 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
3827 // Called after matching a vector shuffle against a particular pattern.
3828 // Both the original shuffle and the pattern have two vector operands.
3829 // OpNos[0] is the operand of the original shuffle that should be used for
3830 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
3831 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
3832 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
3833 // for operands 0 and 1 of the pattern.
3834 static bool chooseShuffleOpNos(int *OpNos
, unsigned &OpNo0
, unsigned &OpNo1
) {
3838 OpNo0
= OpNo1
= OpNos
[1];
3839 } else if (OpNos
[1] < 0) {
3840 OpNo0
= OpNo1
= OpNos
[0];
3848 // Bytes is a VPERM-like permute vector, except that -1 is used for
3849 // undefined bytes. Return true if the VPERM can be implemented using P.
3850 // When returning true set OpNo0 to the VPERM operand that should be
3851 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
3853 // For example, if swapping the VPERM operands allows P to match, OpNo0
3854 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
3855 // operand, but rewriting it to use two duplicated operands allows it to
3856 // match P, then OpNo0 and OpNo1 will be the same.
3857 static bool matchPermute(const SmallVectorImpl
<int> &Bytes
, const Permute
&P
,
3858 unsigned &OpNo0
, unsigned &OpNo1
) {
3859 int OpNos
[] = { -1, -1 };
3860 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
) {
3863 // Make sure that the two permute vectors use the same suboperand
3864 // byte number. Only the operand numbers (the high bits) are
3865 // allowed to differ.
3866 if ((Elt
^ P
.Bytes
[I
]) & (SystemZ::VectorBytes
- 1))
3868 int ModelOpNo
= P
.Bytes
[I
] / SystemZ::VectorBytes
;
3869 int RealOpNo
= unsigned(Elt
) / SystemZ::VectorBytes
;
3870 // Make sure that the operand mappings are consistent with previous
3872 if (OpNos
[ModelOpNo
] == 1 - RealOpNo
)
3874 OpNos
[ModelOpNo
] = RealOpNo
;
3877 return chooseShuffleOpNos(OpNos
, OpNo0
, OpNo1
);
3880 // As above, but search for a matching permute.
3881 static const Permute
*matchPermute(const SmallVectorImpl
<int> &Bytes
,
3882 unsigned &OpNo0
, unsigned &OpNo1
) {
3883 for (auto &P
: PermuteForms
)
3884 if (matchPermute(Bytes
, P
, OpNo0
, OpNo1
))
3889 // Bytes is a VPERM-like permute vector, except that -1 is used for
3890 // undefined bytes. This permute is an operand of an outer permute.
3891 // See whether redistributing the -1 bytes gives a shuffle that can be
3892 // implemented using P. If so, set Transform to a VPERM-like permute vector
3893 // that, when applied to the result of P, gives the original permute in Bytes.
3894 static bool matchDoublePermute(const SmallVectorImpl
<int> &Bytes
,
3896 SmallVectorImpl
<int> &Transform
) {
3898 for (unsigned From
= 0; From
< SystemZ::VectorBytes
; ++From
) {
3899 int Elt
= Bytes
[From
];
3901 // Byte number From of the result is undefined.
3902 Transform
[From
] = -1;
3904 while (P
.Bytes
[To
] != Elt
) {
3906 if (To
== SystemZ::VectorBytes
)
3909 Transform
[From
] = To
;
3915 // As above, but search for a matching permute.
3916 static const Permute
*matchDoublePermute(const SmallVectorImpl
<int> &Bytes
,
3917 SmallVectorImpl
<int> &Transform
) {
3918 for (auto &P
: PermuteForms
)
3919 if (matchDoublePermute(Bytes
, P
, Transform
))
3924 // Convert the mask of the given shuffle op into a byte-level mask,
3925 // as if it had type vNi8.
3926 static bool getVPermMask(SDValue ShuffleOp
,
3927 SmallVectorImpl
<int> &Bytes
) {
3928 EVT VT
= ShuffleOp
.getValueType();
3929 unsigned NumElements
= VT
.getVectorNumElements();
3930 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
3932 if (auto *VSN
= dyn_cast
<ShuffleVectorSDNode
>(ShuffleOp
)) {
3933 Bytes
.resize(NumElements
* BytesPerElement
, -1);
3934 for (unsigned I
= 0; I
< NumElements
; ++I
) {
3935 int Index
= VSN
->getMaskElt(I
);
3937 for (unsigned J
= 0; J
< BytesPerElement
; ++J
)
3938 Bytes
[I
* BytesPerElement
+ J
] = Index
* BytesPerElement
+ J
;
3942 if (SystemZISD::SPLAT
== ShuffleOp
.getOpcode() &&
3943 isa
<ConstantSDNode
>(ShuffleOp
.getOperand(1))) {
3944 unsigned Index
= ShuffleOp
.getConstantOperandVal(1);
3945 Bytes
.resize(NumElements
* BytesPerElement
, -1);
3946 for (unsigned I
= 0; I
< NumElements
; ++I
)
3947 for (unsigned J
= 0; J
< BytesPerElement
; ++J
)
3948 Bytes
[I
* BytesPerElement
+ J
] = Index
* BytesPerElement
+ J
;
3954 // Bytes is a VPERM-like permute vector, except that -1 is used for
3955 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
3956 // the result come from a contiguous sequence of bytes from one input.
3957 // Set Base to the selector for the first byte if so.
3958 static bool getShuffleInput(const SmallVectorImpl
<int> &Bytes
, unsigned Start
,
3959 unsigned BytesPerElement
, int &Base
) {
3961 for (unsigned I
= 0; I
< BytesPerElement
; ++I
) {
3962 if (Bytes
[Start
+ I
] >= 0) {
3963 unsigned Elem
= Bytes
[Start
+ I
];
3966 // Make sure the bytes would come from one input operand.
3967 if (unsigned(Base
) % Bytes
.size() + BytesPerElement
> Bytes
.size())
3969 } else if (unsigned(Base
) != Elem
- I
)
3976 // Bytes is a VPERM-like permute vector, except that -1 is used for
3977 // undefined bytes. Return true if it can be performed using VSLDI.
3978 // When returning true, set StartIndex to the shift amount and OpNo0
3979 // and OpNo1 to the VPERM operands that should be used as the first
3980 // and second shift operand respectively.
3981 static bool isShlDoublePermute(const SmallVectorImpl
<int> &Bytes
,
3982 unsigned &StartIndex
, unsigned &OpNo0
,
3984 int OpNos
[] = { -1, -1 };
3986 for (unsigned I
= 0; I
< 16; ++I
) {
3987 int Index
= Bytes
[I
];
3989 int ExpectedShift
= (Index
- I
) % SystemZ::VectorBytes
;
3990 int ModelOpNo
= unsigned(ExpectedShift
+ I
) / SystemZ::VectorBytes
;
3991 int RealOpNo
= unsigned(Index
) / SystemZ::VectorBytes
;
3993 Shift
= ExpectedShift
;
3994 else if (Shift
!= ExpectedShift
)
3996 // Make sure that the operand mappings are consistent with previous
3998 if (OpNos
[ModelOpNo
] == 1 - RealOpNo
)
4000 OpNos
[ModelOpNo
] = RealOpNo
;
4004 return chooseShuffleOpNos(OpNos
, OpNo0
, OpNo1
);
4007 // Create a node that performs P on operands Op0 and Op1, casting the
4008 // operands to the appropriate type. The type of the result is determined by P.
4009 static SDValue
getPermuteNode(SelectionDAG
&DAG
, const SDLoc
&DL
,
4010 const Permute
&P
, SDValue Op0
, SDValue Op1
) {
4011 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4012 // elements of a PACK are twice as wide as the outputs.
4013 unsigned InBytes
= (P
.Opcode
== SystemZISD::PERMUTE_DWORDS
? 8 :
4014 P
.Opcode
== SystemZISD::PACK
? P
.Operand
* 2 :
4016 // Cast both operands to the appropriate type.
4017 MVT InVT
= MVT::getVectorVT(MVT::getIntegerVT(InBytes
* 8),
4018 SystemZ::VectorBytes
/ InBytes
);
4019 Op0
= DAG
.getNode(ISD::BITCAST
, DL
, InVT
, Op0
);
4020 Op1
= DAG
.getNode(ISD::BITCAST
, DL
, InVT
, Op1
);
4022 if (P
.Opcode
== SystemZISD::PERMUTE_DWORDS
) {
4023 SDValue Op2
= DAG
.getConstant(P
.Operand
, DL
, MVT::i32
);
4024 Op
= DAG
.getNode(SystemZISD::PERMUTE_DWORDS
, DL
, InVT
, Op0
, Op1
, Op2
);
4025 } else if (P
.Opcode
== SystemZISD::PACK
) {
4026 MVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(P
.Operand
* 8),
4027 SystemZ::VectorBytes
/ P
.Operand
);
4028 Op
= DAG
.getNode(SystemZISD::PACK
, DL
, OutVT
, Op0
, Op1
);
4030 Op
= DAG
.getNode(P
.Opcode
, DL
, InVT
, Op0
, Op1
);
4035 // Bytes is a VPERM-like permute vector, except that -1 is used for
4036 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4038 static SDValue
getGeneralPermuteNode(SelectionDAG
&DAG
, const SDLoc
&DL
,
4040 const SmallVectorImpl
<int> &Bytes
) {
4041 for (unsigned I
= 0; I
< 2; ++I
)
4042 Ops
[I
] = DAG
.getNode(ISD::BITCAST
, DL
, MVT::v16i8
, Ops
[I
]);
4044 // First see whether VSLDI can be used.
4045 unsigned StartIndex
, OpNo0
, OpNo1
;
4046 if (isShlDoublePermute(Bytes
, StartIndex
, OpNo0
, OpNo1
))
4047 return DAG
.getNode(SystemZISD::SHL_DOUBLE
, DL
, MVT::v16i8
, Ops
[OpNo0
],
4048 Ops
[OpNo1
], DAG
.getConstant(StartIndex
, DL
, MVT::i32
));
4050 // Fall back on VPERM. Construct an SDNode for the permute vector.
4051 SDValue IndexNodes
[SystemZ::VectorBytes
];
4052 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
)
4054 IndexNodes
[I
] = DAG
.getConstant(Bytes
[I
], DL
, MVT::i32
);
4056 IndexNodes
[I
] = DAG
.getUNDEF(MVT::i32
);
4057 SDValue Op2
= DAG
.getBuildVector(MVT::v16i8
, DL
, IndexNodes
);
4058 return DAG
.getNode(SystemZISD::PERMUTE
, DL
, MVT::v16i8
, Ops
[0], Ops
[1], Op2
);
4062 // Describes a general N-operand vector shuffle.
4063 struct GeneralShuffle
{
4064 GeneralShuffle(EVT vt
) : VT(vt
) {}
4066 bool add(SDValue
, unsigned);
4067 SDValue
getNode(SelectionDAG
&, const SDLoc
&);
4069 // The operands of the shuffle.
4070 SmallVector
<SDValue
, SystemZ::VectorBytes
> Ops
;
4072 // Index I is -1 if byte I of the result is undefined. Otherwise the
4073 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4074 // Bytes[I] / SystemZ::VectorBytes.
4075 SmallVector
<int, SystemZ::VectorBytes
> Bytes
;
4077 // The type of the shuffle result.
4082 // Add an extra undefined element to the shuffle.
4083 void GeneralShuffle::addUndef() {
4084 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
4085 for (unsigned I
= 0; I
< BytesPerElement
; ++I
)
4086 Bytes
.push_back(-1);
4089 // Add an extra element to the shuffle, taking it from element Elem of Op.
4090 // A null Op indicates a vector input whose value will be calculated later;
4091 // there is at most one such input per shuffle and it always has the same
4092 // type as the result. Aborts and returns false if the source vector elements
4093 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4094 // LLVM they become implicitly extended, but this is rare and not optimized.
4095 bool GeneralShuffle::add(SDValue Op
, unsigned Elem
) {
4096 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
4098 // The source vector can have wider elements than the result,
4099 // either through an explicit TRUNCATE or because of type legalization.
4100 // We want the least significant part.
4101 EVT FromVT
= Op
.getNode() ? Op
.getValueType() : VT
;
4102 unsigned FromBytesPerElement
= FromVT
.getVectorElementType().getStoreSize();
4104 // Return false if the source elements are smaller than their destination
4106 if (FromBytesPerElement
< BytesPerElement
)
4109 unsigned Byte
= ((Elem
* FromBytesPerElement
) % SystemZ::VectorBytes
+
4110 (FromBytesPerElement
- BytesPerElement
));
4112 // Look through things like shuffles and bitcasts.
4113 while (Op
.getNode()) {
4114 if (Op
.getOpcode() == ISD::BITCAST
)
4115 Op
= Op
.getOperand(0);
4116 else if (Op
.getOpcode() == ISD::VECTOR_SHUFFLE
&& Op
.hasOneUse()) {
4117 // See whether the bytes we need come from a contiguous part of one
4119 SmallVector
<int, SystemZ::VectorBytes
> OpBytes
;
4120 if (!getVPermMask(Op
, OpBytes
))
4123 if (!getShuffleInput(OpBytes
, Byte
, BytesPerElement
, NewByte
))
4129 Op
= Op
.getOperand(unsigned(NewByte
) / SystemZ::VectorBytes
);
4130 Byte
= unsigned(NewByte
) % SystemZ::VectorBytes
;
4131 } else if (Op
.isUndef()) {
4138 // Make sure that the source of the extraction is in Ops.
4140 for (; OpNo
< Ops
.size(); ++OpNo
)
4141 if (Ops
[OpNo
] == Op
)
4143 if (OpNo
== Ops
.size())
4146 // Add the element to Bytes.
4147 unsigned Base
= OpNo
* SystemZ::VectorBytes
+ Byte
;
4148 for (unsigned I
= 0; I
< BytesPerElement
; ++I
)
4149 Bytes
.push_back(Base
+ I
);
4154 // Return SDNodes for the completed shuffle.
4155 SDValue
GeneralShuffle::getNode(SelectionDAG
&DAG
, const SDLoc
&DL
) {
4156 assert(Bytes
.size() == SystemZ::VectorBytes
&& "Incomplete vector");
4158 if (Ops
.size() == 0)
4159 return DAG
.getUNDEF(VT
);
4161 // Make sure that there are at least two shuffle operands.
4162 if (Ops
.size() == 1)
4163 Ops
.push_back(DAG
.getUNDEF(MVT::v16i8
));
4165 // Create a tree of shuffles, deferring root node until after the loop.
4166 // Try to redistribute the undefined elements of non-root nodes so that
4167 // the non-root shuffles match something like a pack or merge, then adjust
4168 // the parent node's permute vector to compensate for the new order.
4169 // Among other things, this copes with vectors like <2 x i16> that were
4170 // padded with undefined elements during type legalization.
4172 // In the best case this redistribution will lead to the whole tree
4173 // using packs and merges. It should rarely be a loss in other cases.
4174 unsigned Stride
= 1;
4175 for (; Stride
* 2 < Ops
.size(); Stride
*= 2) {
4176 for (unsigned I
= 0; I
< Ops
.size() - Stride
; I
+= Stride
* 2) {
4177 SDValue SubOps
[] = { Ops
[I
], Ops
[I
+ Stride
] };
4179 // Create a mask for just these two operands.
4180 SmallVector
<int, SystemZ::VectorBytes
> NewBytes(SystemZ::VectorBytes
);
4181 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
) {
4182 unsigned OpNo
= unsigned(Bytes
[J
]) / SystemZ::VectorBytes
;
4183 unsigned Byte
= unsigned(Bytes
[J
]) % SystemZ::VectorBytes
;
4186 else if (OpNo
== I
+ Stride
)
4187 NewBytes
[J
] = SystemZ::VectorBytes
+ Byte
;
4191 // See if it would be better to reorganize NewMask to avoid using VPERM.
4192 SmallVector
<int, SystemZ::VectorBytes
> NewBytesMap(SystemZ::VectorBytes
);
4193 if (const Permute
*P
= matchDoublePermute(NewBytes
, NewBytesMap
)) {
4194 Ops
[I
] = getPermuteNode(DAG
, DL
, *P
, SubOps
[0], SubOps
[1]);
4195 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4196 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
) {
4197 if (NewBytes
[J
] >= 0) {
4198 assert(unsigned(NewBytesMap
[J
]) < SystemZ::VectorBytes
&&
4199 "Invalid double permute");
4200 Bytes
[J
] = I
* SystemZ::VectorBytes
+ NewBytesMap
[J
];
4202 assert(NewBytesMap
[J
] < 0 && "Invalid double permute");
4205 // Just use NewBytes on the operands.
4206 Ops
[I
] = getGeneralPermuteNode(DAG
, DL
, SubOps
, NewBytes
);
4207 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
)
4208 if (NewBytes
[J
] >= 0)
4209 Bytes
[J
] = I
* SystemZ::VectorBytes
+ J
;
4214 // Now we just have 2 inputs. Put the second operand in Ops[1].
4216 Ops
[1] = Ops
[Stride
];
4217 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
)
4218 if (Bytes
[I
] >= int(SystemZ::VectorBytes
))
4219 Bytes
[I
] -= (Stride
- 1) * SystemZ::VectorBytes
;
4222 // Look for an instruction that can do the permute without resorting
4224 unsigned OpNo0
, OpNo1
;
4226 if (const Permute
*P
= matchPermute(Bytes
, OpNo0
, OpNo1
))
4227 Op
= getPermuteNode(DAG
, DL
, *P
, Ops
[OpNo0
], Ops
[OpNo1
]);
4229 Op
= getGeneralPermuteNode(DAG
, DL
, &Ops
[0], Bytes
);
4230 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
4233 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4234 static bool isScalarToVector(SDValue Op
) {
4235 for (unsigned I
= 1, E
= Op
.getNumOperands(); I
!= E
; ++I
)
4236 if (!Op
.getOperand(I
).isUndef())
4241 // Return a vector of type VT that contains Value in the first element.
4242 // The other elements don't matter.
4243 static SDValue
buildScalarToVector(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4245 // If we have a constant, replicate it to all elements and let the
4246 // BUILD_VECTOR lowering take care of it.
4247 if (Value
.getOpcode() == ISD::Constant
||
4248 Value
.getOpcode() == ISD::ConstantFP
) {
4249 SmallVector
<SDValue
, 16> Ops(VT
.getVectorNumElements(), Value
);
4250 return DAG
.getBuildVector(VT
, DL
, Ops
);
4252 if (Value
.isUndef())
4253 return DAG
.getUNDEF(VT
);
4254 return DAG
.getNode(ISD::SCALAR_TO_VECTOR
, DL
, VT
, Value
);
4257 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4258 // element 1. Used for cases in which replication is cheap.
4259 static SDValue
buildMergeScalars(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4260 SDValue Op0
, SDValue Op1
) {
4261 if (Op0
.isUndef()) {
4263 return DAG
.getUNDEF(VT
);
4264 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op1
);
4267 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op0
);
4268 return DAG
.getNode(SystemZISD::MERGE_HIGH
, DL
, VT
,
4269 buildScalarToVector(DAG
, DL
, VT
, Op0
),
4270 buildScalarToVector(DAG
, DL
, VT
, Op1
));
4273 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4275 static SDValue
joinDwords(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Op0
,
4277 if (Op0
.isUndef() && Op1
.isUndef())
4278 return DAG
.getUNDEF(MVT::v2i64
);
4279 // If one of the two inputs is undefined then replicate the other one,
4280 // in order to avoid using another register unnecessarily.
4282 Op0
= Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op1
);
4283 else if (Op1
.isUndef())
4284 Op0
= Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
4286 Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
4287 Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op1
);
4289 return DAG
.getNode(SystemZISD::JOIN_DWORDS
, DL
, MVT::v2i64
, Op0
, Op1
);
4292 // Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style
4293 // mask. Store the mask value in Mask on success.
4294 bool SystemZTargetLowering::
4295 tryBuildVectorByteMask(BuildVectorSDNode
*BVN
, uint64_t &Mask
) {
4296 EVT ElemVT
= BVN
->getValueType(0).getVectorElementType();
4297 unsigned BytesPerElement
= ElemVT
.getStoreSize();
4298 for (unsigned I
= 0, E
= BVN
->getNumOperands(); I
!= E
; ++I
) {
4299 SDValue Op
= BVN
->getOperand(I
);
4300 if (!Op
.isUndef()) {
4302 if (Op
.getOpcode() == ISD::Constant
)
4303 Value
= cast
<ConstantSDNode
>(Op
)->getZExtValue();
4304 else if (Op
.getOpcode() == ISD::ConstantFP
)
4305 Value
= (cast
<ConstantFPSDNode
>(Op
)->getValueAPF().bitcastToAPInt()
4309 for (unsigned J
= 0; J
< BytesPerElement
; ++J
) {
4310 uint64_t Byte
= (Value
>> (J
* 8)) & 0xff;
4312 Mask
|= 1ULL << ((E
- I
- 1) * BytesPerElement
+ J
);
4321 // Try to load a vector constant in which BitsPerElement-bit value Value
4322 // is replicated to fill the vector. VT is the type of the resulting
4323 // constant, which may have elements of a different size from BitsPerElement.
4324 // Return the SDValue of the constant on success, otherwise return
4326 static SDValue
tryBuildVectorReplicate(SelectionDAG
&DAG
,
4327 const SystemZInstrInfo
*TII
,
4328 const SDLoc
&DL
, EVT VT
, uint64_t Value
,
4329 unsigned BitsPerElement
) {
4330 // Signed 16-bit values can be replicated using VREPI.
4331 // Mark the constants as opaque or DAGCombiner will convert back to
4333 int64_t SignedValue
= SignExtend64(Value
, BitsPerElement
);
4334 if (isInt
<16>(SignedValue
)) {
4335 MVT VecVT
= MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement
),
4336 SystemZ::VectorBits
/ BitsPerElement
);
4337 SDValue Op
= DAG
.getNode(
4338 SystemZISD::REPLICATE
, DL
, VecVT
,
4339 DAG
.getConstant(SignedValue
, DL
, MVT::i32
, false, true /*isOpaque*/));
4340 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
4342 // See whether rotating the constant left some N places gives a value that
4343 // is one less than a power of 2 (i.e. all zeros followed by all ones).
4344 // If so we can use VGM.
4345 unsigned Start
, End
;
4346 if (TII
->isRxSBGMask(Value
, BitsPerElement
, Start
, End
)) {
4347 // isRxSBGMask returns the bit numbers for a full 64-bit value,
4348 // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
4349 // bit numbers for an BitsPerElement value, so that 0 denotes
4350 // 1 << (BitsPerElement-1).
4351 Start
-= 64 - BitsPerElement
;
4352 End
-= 64 - BitsPerElement
;
4353 MVT VecVT
= MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement
),
4354 SystemZ::VectorBits
/ BitsPerElement
);
4355 SDValue Op
= DAG
.getNode(
4356 SystemZISD::ROTATE_MASK
, DL
, VecVT
,
4357 DAG
.getConstant(Start
, DL
, MVT::i32
, false, true /*isOpaque*/),
4358 DAG
.getConstant(End
, DL
, MVT::i32
, false, true /*isOpaque*/));
4359 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
4364 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4365 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4366 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
4367 // would benefit from this representation and return it if so.
4368 static SDValue
tryBuildVectorShuffle(SelectionDAG
&DAG
,
4369 BuildVectorSDNode
*BVN
) {
4370 EVT VT
= BVN
->getValueType(0);
4371 unsigned NumElements
= VT
.getVectorNumElements();
4373 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4374 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
4375 // need a BUILD_VECTOR, add an additional placeholder operand for that
4376 // BUILD_VECTOR and store its operands in ResidueOps.
4377 GeneralShuffle
GS(VT
);
4378 SmallVector
<SDValue
, SystemZ::VectorBytes
> ResidueOps
;
4379 bool FoundOne
= false;
4380 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4381 SDValue Op
= BVN
->getOperand(I
);
4382 if (Op
.getOpcode() == ISD::TRUNCATE
)
4383 Op
= Op
.getOperand(0);
4384 if (Op
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
4385 Op
.getOperand(1).getOpcode() == ISD::Constant
) {
4386 unsigned Elem
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
4387 if (!GS
.add(Op
.getOperand(0), Elem
))
4390 } else if (Op
.isUndef()) {
4393 if (!GS
.add(SDValue(), ResidueOps
.size()))
4395 ResidueOps
.push_back(BVN
->getOperand(I
));
4399 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
4403 // Create the BUILD_VECTOR for the remaining elements, if any.
4404 if (!ResidueOps
.empty()) {
4405 while (ResidueOps
.size() < NumElements
)
4406 ResidueOps
.push_back(DAG
.getUNDEF(ResidueOps
[0].getValueType()));
4407 for (auto &Op
: GS
.Ops
) {
4408 if (!Op
.getNode()) {
4409 Op
= DAG
.getBuildVector(VT
, SDLoc(BVN
), ResidueOps
);
4414 return GS
.getNode(DAG
, SDLoc(BVN
));
4417 // Combine GPR scalar values Elems into a vector of type VT.
4418 static SDValue
buildVector(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4419 SmallVectorImpl
<SDValue
> &Elems
) {
4420 // See whether there is a single replicated value.
4422 unsigned int NumElements
= Elems
.size();
4423 unsigned int Count
= 0;
4424 for (auto Elem
: Elems
) {
4425 if (!Elem
.isUndef()) {
4426 if (!Single
.getNode())
4428 else if (Elem
!= Single
) {
4435 // There are three cases here:
4437 // - if the only defined element is a loaded one, the best sequence
4438 // is a replicating load.
4440 // - otherwise, if the only defined element is an i64 value, we will
4441 // end up with the same VLVGP sequence regardless of whether we short-cut
4442 // for replication or fall through to the later code.
4444 // - otherwise, if the only defined element is an i32 or smaller value,
4445 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
4446 // This is only a win if the single defined element is used more than once.
4447 // In other cases we're better off using a single VLVGx.
4448 if (Single
.getNode() && (Count
> 1 || Single
.getOpcode() == ISD::LOAD
))
4449 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Single
);
4451 // If all elements are loads, use VLREP/VLEs (below).
4452 bool AllLoads
= true;
4453 for (auto Elem
: Elems
)
4454 if (Elem
.getOpcode() != ISD::LOAD
|| cast
<LoadSDNode
>(Elem
)->isIndexed()) {
4459 // The best way of building a v2i64 from two i64s is to use VLVGP.
4460 if (VT
== MVT::v2i64
&& !AllLoads
)
4461 return joinDwords(DAG
, DL
, Elems
[0], Elems
[1]);
4463 // Use a 64-bit merge high to combine two doubles.
4464 if (VT
== MVT::v2f64
&& !AllLoads
)
4465 return buildMergeScalars(DAG
, DL
, VT
, Elems
[0], Elems
[1]);
4467 // Build v4f32 values directly from the FPRs:
4469 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
4474 if (VT
== MVT::v4f32
&& !AllLoads
) {
4475 SDValue Op01
= buildMergeScalars(DAG
, DL
, VT
, Elems
[0], Elems
[1]);
4476 SDValue Op23
= buildMergeScalars(DAG
, DL
, VT
, Elems
[2], Elems
[3]);
4477 // Avoid unnecessary undefs by reusing the other operand.
4480 else if (Op23
.isUndef())
4482 // Merging identical replications is a no-op.
4483 if (Op01
.getOpcode() == SystemZISD::REPLICATE
&& Op01
== Op23
)
4485 Op01
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Op01
);
4486 Op23
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Op23
);
4487 SDValue Op
= DAG
.getNode(SystemZISD::MERGE_HIGH
,
4488 DL
, MVT::v2i64
, Op01
, Op23
);
4489 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
4492 // Collect the constant terms.
4493 SmallVector
<SDValue
, SystemZ::VectorBytes
> Constants(NumElements
, SDValue());
4494 SmallVector
<bool, SystemZ::VectorBytes
> Done(NumElements
, false);
4496 unsigned NumConstants
= 0;
4497 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4498 SDValue Elem
= Elems
[I
];
4499 if (Elem
.getOpcode() == ISD::Constant
||
4500 Elem
.getOpcode() == ISD::ConstantFP
) {
4502 Constants
[I
] = Elem
;
4506 // If there was at least one constant, fill in the other elements of
4507 // Constants with undefs to get a full vector constant and use that
4508 // as the starting point.
4510 SDValue ReplicatedVal
;
4511 if (NumConstants
> 0) {
4512 for (unsigned I
= 0; I
< NumElements
; ++I
)
4513 if (!Constants
[I
].getNode())
4514 Constants
[I
] = DAG
.getUNDEF(Elems
[I
].getValueType());
4515 Result
= DAG
.getBuildVector(VT
, DL
, Constants
);
4517 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
4518 // avoid a false dependency on any previous contents of the vector
4521 // Use a VLREP if at least one element is a load. Make sure to replicate
4522 // the load with the most elements having its value.
4523 std::map
<const SDNode
*, unsigned> UseCounts
;
4524 SDNode
*LoadMaxUses
= nullptr;
4525 for (unsigned I
= 0; I
< NumElements
; ++I
)
4526 if (Elems
[I
].getOpcode() == ISD::LOAD
&&
4527 cast
<LoadSDNode
>(Elems
[I
])->isUnindexed()) {
4528 SDNode
*Ld
= Elems
[I
].getNode();
4530 if (LoadMaxUses
== nullptr || UseCounts
[LoadMaxUses
] < UseCounts
[Ld
])
4533 if (LoadMaxUses
!= nullptr) {
4534 ReplicatedVal
= SDValue(LoadMaxUses
, 0);
4535 Result
= DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, ReplicatedVal
);
4537 // Try to use VLVGP.
4538 unsigned I1
= NumElements
/ 2 - 1;
4539 unsigned I2
= NumElements
- 1;
4540 bool Def1
= !Elems
[I1
].isUndef();
4541 bool Def2
= !Elems
[I2
].isUndef();
4543 SDValue Elem1
= Elems
[Def1
? I1
: I2
];
4544 SDValue Elem2
= Elems
[Def2
? I2
: I1
];
4545 Result
= DAG
.getNode(ISD::BITCAST
, DL
, VT
,
4546 joinDwords(DAG
, DL
, Elem1
, Elem2
));
4550 Result
= DAG
.getUNDEF(VT
);
4554 // Use VLVGx to insert the other elements.
4555 for (unsigned I
= 0; I
< NumElements
; ++I
)
4556 if (!Done
[I
] && !Elems
[I
].isUndef() && Elems
[I
] != ReplicatedVal
)
4557 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
, Result
, Elems
[I
],
4558 DAG
.getConstant(I
, DL
, MVT::i32
));
4562 SDValue
SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op
,
4563 SelectionDAG
&DAG
) const {
4564 const SystemZInstrInfo
*TII
=
4565 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
4566 auto *BVN
= cast
<BuildVectorSDNode
>(Op
.getNode());
4568 EVT VT
= Op
.getValueType();
4570 if (BVN
->isConstant()) {
4571 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
4572 // preferred way of creating all-zero and all-one vectors so give it
4573 // priority over other methods below.
4575 if (ISD::isBuildVectorAllZeros(Op
.getNode()) ||
4576 ISD::isBuildVectorAllOnes(Op
.getNode()) ||
4577 (VT
.isInteger() && tryBuildVectorByteMask(BVN
, Mask
)))
4580 // Try using some form of replication.
4581 APInt SplatBits
, SplatUndef
;
4582 unsigned SplatBitSize
;
4584 if (BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
4586 SplatBitSize
<= 64) {
4587 // First try assuming that any undefined bits above the highest set bit
4588 // and below the lowest set bit are 1s. This increases the likelihood of
4589 // being able to use a sign-extended element value in VECTOR REPLICATE
4590 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
4591 uint64_t SplatBitsZ
= SplatBits
.getZExtValue();
4592 uint64_t SplatUndefZ
= SplatUndef
.getZExtValue();
4593 uint64_t Lower
= (SplatUndefZ
4594 & ((uint64_t(1) << findFirstSet(SplatBitsZ
)) - 1));
4595 uint64_t Upper
= (SplatUndefZ
4596 & ~((uint64_t(1) << findLastSet(SplatBitsZ
)) - 1));
4597 uint64_t Value
= SplatBitsZ
| Upper
| Lower
;
4598 SDValue Op
= tryBuildVectorReplicate(DAG
, TII
, DL
, VT
, Value
,
4603 // Now try assuming that any undefined bits between the first and
4604 // last defined set bits are set. This increases the chances of
4605 // using a non-wraparound mask.
4606 uint64_t Middle
= SplatUndefZ
& ~Upper
& ~Lower
;
4607 Value
= SplatBitsZ
| Middle
;
4608 Op
= tryBuildVectorReplicate(DAG
, TII
, DL
, VT
, Value
, SplatBitSize
);
4613 // Fall back to loading it from memory.
4617 // See if we should use shuffles to construct the vector from other vectors.
4618 if (SDValue Res
= tryBuildVectorShuffle(DAG
, BVN
))
4621 // Detect SCALAR_TO_VECTOR conversions.
4622 if (isOperationLegal(ISD::SCALAR_TO_VECTOR
, VT
) && isScalarToVector(Op
))
4623 return buildScalarToVector(DAG
, DL
, VT
, Op
.getOperand(0));
4625 // Otherwise use buildVector to build the vector up from GPRs.
4626 unsigned NumElements
= Op
.getNumOperands();
4627 SmallVector
<SDValue
, SystemZ::VectorBytes
> Ops(NumElements
);
4628 for (unsigned I
= 0; I
< NumElements
; ++I
)
4629 Ops
[I
] = Op
.getOperand(I
);
4630 return buildVector(DAG
, DL
, VT
, Ops
);
4633 SDValue
SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op
,
4634 SelectionDAG
&DAG
) const {
4635 auto *VSN
= cast
<ShuffleVectorSDNode
>(Op
.getNode());
4637 EVT VT
= Op
.getValueType();
4638 unsigned NumElements
= VT
.getVectorNumElements();
4640 if (VSN
->isSplat()) {
4641 SDValue Op0
= Op
.getOperand(0);
4642 unsigned Index
= VSN
->getSplatIndex();
4643 assert(Index
< VT
.getVectorNumElements() &&
4644 "Splat index should be defined and in first operand");
4645 // See whether the value we're splatting is directly available as a scalar.
4646 if ((Index
== 0 && Op0
.getOpcode() == ISD::SCALAR_TO_VECTOR
) ||
4647 Op0
.getOpcode() == ISD::BUILD_VECTOR
)
4648 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op0
.getOperand(Index
));
4649 // Otherwise keep it as a vector-to-vector operation.
4650 return DAG
.getNode(SystemZISD::SPLAT
, DL
, VT
, Op
.getOperand(0),
4651 DAG
.getConstant(Index
, DL
, MVT::i32
));
4654 GeneralShuffle
GS(VT
);
4655 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4656 int Elt
= VSN
->getMaskElt(I
);
4659 else if (!GS
.add(Op
.getOperand(unsigned(Elt
) / NumElements
),
4660 unsigned(Elt
) % NumElements
))
4663 return GS
.getNode(DAG
, SDLoc(VSN
));
4666 SDValue
SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op
,
4667 SelectionDAG
&DAG
) const {
4669 // Just insert the scalar into element 0 of an undefined vector.
4670 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
4671 Op
.getValueType(), DAG
.getUNDEF(Op
.getValueType()),
4672 Op
.getOperand(0), DAG
.getConstant(0, DL
, MVT::i32
));
4675 SDValue
SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op
,
4676 SelectionDAG
&DAG
) const {
4677 // Handle insertions of floating-point values.
4679 SDValue Op0
= Op
.getOperand(0);
4680 SDValue Op1
= Op
.getOperand(1);
4681 SDValue Op2
= Op
.getOperand(2);
4682 EVT VT
= Op
.getValueType();
4684 // Insertions into constant indices of a v2f64 can be done using VPDI.
4685 // However, if the inserted value is a bitcast or a constant then it's
4686 // better to use GPRs, as below.
4687 if (VT
== MVT::v2f64
&&
4688 Op1
.getOpcode() != ISD::BITCAST
&&
4689 Op1
.getOpcode() != ISD::ConstantFP
&&
4690 Op2
.getOpcode() == ISD::Constant
) {
4691 uint64_t Index
= cast
<ConstantSDNode
>(Op2
)->getZExtValue();
4692 unsigned Mask
= VT
.getVectorNumElements() - 1;
4697 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
4698 MVT IntVT
= MVT::getIntegerVT(VT
.getScalarSizeInBits());
4699 MVT IntVecVT
= MVT::getVectorVT(IntVT
, VT
.getVectorNumElements());
4700 SDValue Res
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, IntVecVT
,
4701 DAG
.getNode(ISD::BITCAST
, DL
, IntVecVT
, Op0
),
4702 DAG
.getNode(ISD::BITCAST
, DL
, IntVT
, Op1
), Op2
);
4703 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Res
);
4707 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op
,
4708 SelectionDAG
&DAG
) const {
4709 // Handle extractions of floating-point values.
4711 SDValue Op0
= Op
.getOperand(0);
4712 SDValue Op1
= Op
.getOperand(1);
4713 EVT VT
= Op
.getValueType();
4714 EVT VecVT
= Op0
.getValueType();
4716 // Extractions of constant indices can be done directly.
4717 if (auto *CIndexN
= dyn_cast
<ConstantSDNode
>(Op1
)) {
4718 uint64_t Index
= CIndexN
->getZExtValue();
4719 unsigned Mask
= VecVT
.getVectorNumElements() - 1;
4724 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
4725 MVT IntVT
= MVT::getIntegerVT(VT
.getSizeInBits());
4726 MVT IntVecVT
= MVT::getVectorVT(IntVT
, VecVT
.getVectorNumElements());
4727 SDValue Res
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, IntVT
,
4728 DAG
.getNode(ISD::BITCAST
, DL
, IntVecVT
, Op0
), Op1
);
4729 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Res
);
4733 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op
, SelectionDAG
&DAG
,
4734 unsigned UnpackHigh
) const {
4735 SDValue PackedOp
= Op
.getOperand(0);
4736 EVT OutVT
= Op
.getValueType();
4737 EVT InVT
= PackedOp
.getValueType();
4738 unsigned ToBits
= OutVT
.getScalarSizeInBits();
4739 unsigned FromBits
= InVT
.getScalarSizeInBits();
4742 EVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(FromBits
),
4743 SystemZ::VectorBits
/ FromBits
);
4744 PackedOp
= DAG
.getNode(UnpackHigh
, SDLoc(PackedOp
), OutVT
, PackedOp
);
4745 } while (FromBits
!= ToBits
);
4749 SDValue
SystemZTargetLowering::lowerShift(SDValue Op
, SelectionDAG
&DAG
,
4750 unsigned ByScalar
) const {
4751 // Look for cases where a vector shift can use the *_BY_SCALAR form.
4752 SDValue Op0
= Op
.getOperand(0);
4753 SDValue Op1
= Op
.getOperand(1);
4755 EVT VT
= Op
.getValueType();
4756 unsigned ElemBitSize
= VT
.getScalarSizeInBits();
4758 // See whether the shift vector is a splat represented as BUILD_VECTOR.
4759 if (auto *BVN
= dyn_cast
<BuildVectorSDNode
>(Op1
)) {
4760 APInt SplatBits
, SplatUndef
;
4761 unsigned SplatBitSize
;
4763 // Check for constant splats. Use ElemBitSize as the minimum element
4764 // width and reject splats that need wider elements.
4765 if (BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
4766 ElemBitSize
, true) &&
4767 SplatBitSize
== ElemBitSize
) {
4768 SDValue Shift
= DAG
.getConstant(SplatBits
.getZExtValue() & 0xfff,
4770 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
4772 // Check for variable splats.
4773 BitVector UndefElements
;
4774 SDValue Splat
= BVN
->getSplatValue(&UndefElements
);
4776 // Since i32 is the smallest legal type, we either need a no-op
4778 SDValue Shift
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Splat
);
4779 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
4783 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
4784 // and the shift amount is directly available in a GPR.
4785 if (auto *VSN
= dyn_cast
<ShuffleVectorSDNode
>(Op1
)) {
4786 if (VSN
->isSplat()) {
4787 SDValue VSNOp0
= VSN
->getOperand(0);
4788 unsigned Index
= VSN
->getSplatIndex();
4789 assert(Index
< VT
.getVectorNumElements() &&
4790 "Splat index should be defined and in first operand");
4791 if ((Index
== 0 && VSNOp0
.getOpcode() == ISD::SCALAR_TO_VECTOR
) ||
4792 VSNOp0
.getOpcode() == ISD::BUILD_VECTOR
) {
4793 // Since i32 is the smallest legal type, we either need a no-op
4795 SDValue Shift
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
,
4796 VSNOp0
.getOperand(Index
));
4797 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
4802 // Otherwise just treat the current form as legal.
4806 SDValue
SystemZTargetLowering::LowerOperation(SDValue Op
,
4807 SelectionDAG
&DAG
) const {
4808 switch (Op
.getOpcode()) {
4809 case ISD::FRAMEADDR
:
4810 return lowerFRAMEADDR(Op
, DAG
);
4811 case ISD::RETURNADDR
:
4812 return lowerRETURNADDR(Op
, DAG
);
4814 return lowerBR_CC(Op
, DAG
);
4815 case ISD::SELECT_CC
:
4816 return lowerSELECT_CC(Op
, DAG
);
4818 return lowerSETCC(Op
, DAG
);
4819 case ISD::GlobalAddress
:
4820 return lowerGlobalAddress(cast
<GlobalAddressSDNode
>(Op
), DAG
);
4821 case ISD::GlobalTLSAddress
:
4822 return lowerGlobalTLSAddress(cast
<GlobalAddressSDNode
>(Op
), DAG
);
4823 case ISD::BlockAddress
:
4824 return lowerBlockAddress(cast
<BlockAddressSDNode
>(Op
), DAG
);
4825 case ISD::JumpTable
:
4826 return lowerJumpTable(cast
<JumpTableSDNode
>(Op
), DAG
);
4827 case ISD::ConstantPool
:
4828 return lowerConstantPool(cast
<ConstantPoolSDNode
>(Op
), DAG
);
4830 return lowerBITCAST(Op
, DAG
);
4832 return lowerVASTART(Op
, DAG
);
4834 return lowerVACOPY(Op
, DAG
);
4835 case ISD::DYNAMIC_STACKALLOC
:
4836 return lowerDYNAMIC_STACKALLOC(Op
, DAG
);
4837 case ISD::GET_DYNAMIC_AREA_OFFSET
:
4838 return lowerGET_DYNAMIC_AREA_OFFSET(Op
, DAG
);
4839 case ISD::SMUL_LOHI
:
4840 return lowerSMUL_LOHI(Op
, DAG
);
4841 case ISD::UMUL_LOHI
:
4842 return lowerUMUL_LOHI(Op
, DAG
);
4844 return lowerSDIVREM(Op
, DAG
);
4846 return lowerUDIVREM(Op
, DAG
);
4851 return lowerXALUO(Op
, DAG
);
4854 return lowerADDSUBCARRY(Op
, DAG
);
4856 return lowerOR(Op
, DAG
);
4858 return lowerCTPOP(Op
, DAG
);
4859 case ISD::ATOMIC_FENCE
:
4860 return lowerATOMIC_FENCE(Op
, DAG
);
4861 case ISD::ATOMIC_SWAP
:
4862 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_SWAPW
);
4863 case ISD::ATOMIC_STORE
:
4864 return lowerATOMIC_STORE(Op
, DAG
);
4865 case ISD::ATOMIC_LOAD
:
4866 return lowerATOMIC_LOAD(Op
, DAG
);
4867 case ISD::ATOMIC_LOAD_ADD
:
4868 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_ADD
);
4869 case ISD::ATOMIC_LOAD_SUB
:
4870 return lowerATOMIC_LOAD_SUB(Op
, DAG
);
4871 case ISD::ATOMIC_LOAD_AND
:
4872 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_AND
);
4873 case ISD::ATOMIC_LOAD_OR
:
4874 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_OR
);
4875 case ISD::ATOMIC_LOAD_XOR
:
4876 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_XOR
);
4877 case ISD::ATOMIC_LOAD_NAND
:
4878 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_NAND
);
4879 case ISD::ATOMIC_LOAD_MIN
:
4880 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_MIN
);
4881 case ISD::ATOMIC_LOAD_MAX
:
4882 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_MAX
);
4883 case ISD::ATOMIC_LOAD_UMIN
:
4884 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_UMIN
);
4885 case ISD::ATOMIC_LOAD_UMAX
:
4886 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_UMAX
);
4887 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
:
4888 return lowerATOMIC_CMP_SWAP(Op
, DAG
);
4889 case ISD::STACKSAVE
:
4890 return lowerSTACKSAVE(Op
, DAG
);
4891 case ISD::STACKRESTORE
:
4892 return lowerSTACKRESTORE(Op
, DAG
);
4894 return lowerPREFETCH(Op
, DAG
);
4895 case ISD::INTRINSIC_W_CHAIN
:
4896 return lowerINTRINSIC_W_CHAIN(Op
, DAG
);
4897 case ISD::INTRINSIC_WO_CHAIN
:
4898 return lowerINTRINSIC_WO_CHAIN(Op
, DAG
);
4899 case ISD::BUILD_VECTOR
:
4900 return lowerBUILD_VECTOR(Op
, DAG
);
4901 case ISD::VECTOR_SHUFFLE
:
4902 return lowerVECTOR_SHUFFLE(Op
, DAG
);
4903 case ISD::SCALAR_TO_VECTOR
:
4904 return lowerSCALAR_TO_VECTOR(Op
, DAG
);
4905 case ISD::INSERT_VECTOR_ELT
:
4906 return lowerINSERT_VECTOR_ELT(Op
, DAG
);
4907 case ISD::EXTRACT_VECTOR_ELT
:
4908 return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
4909 case ISD::SIGN_EXTEND_VECTOR_INREG
:
4910 return lowerExtendVectorInreg(Op
, DAG
, SystemZISD::UNPACK_HIGH
);
4911 case ISD::ZERO_EXTEND_VECTOR_INREG
:
4912 return lowerExtendVectorInreg(Op
, DAG
, SystemZISD::UNPACKL_HIGH
);
4914 return lowerShift(Op
, DAG
, SystemZISD::VSHL_BY_SCALAR
);
4916 return lowerShift(Op
, DAG
, SystemZISD::VSRL_BY_SCALAR
);
4918 return lowerShift(Op
, DAG
, SystemZISD::VSRA_BY_SCALAR
);
4920 llvm_unreachable("Unexpected node to lower");
4924 // Lower operations with invalid operand or result types (currently used
4925 // only for 128-bit integer types).
4927 static SDValue
lowerI128ToGR128(SelectionDAG
&DAG
, SDValue In
) {
4929 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i64
, In
,
4930 DAG
.getIntPtrConstant(0, DL
));
4931 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i64
, In
,
4932 DAG
.getIntPtrConstant(1, DL
));
4933 SDNode
*Pair
= DAG
.getMachineNode(SystemZ::PAIR128
, DL
,
4934 MVT::Untyped
, Hi
, Lo
);
4935 return SDValue(Pair
, 0);
4938 static SDValue
lowerGR128ToI128(SelectionDAG
&DAG
, SDValue In
) {
4940 SDValue Hi
= DAG
.getTargetExtractSubreg(SystemZ::subreg_h64
,
4942 SDValue Lo
= DAG
.getTargetExtractSubreg(SystemZ::subreg_l64
,
4944 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i128
, Lo
, Hi
);
4948 SystemZTargetLowering::LowerOperationWrapper(SDNode
*N
,
4949 SmallVectorImpl
<SDValue
> &Results
,
4950 SelectionDAG
&DAG
) const {
4951 switch (N
->getOpcode()) {
4952 case ISD::ATOMIC_LOAD
: {
4954 SDVTList Tys
= DAG
.getVTList(MVT::Untyped
, MVT::Other
);
4955 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1) };
4956 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
4957 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128
,
4958 DL
, Tys
, Ops
, MVT::i128
, MMO
);
4959 Results
.push_back(lowerGR128ToI128(DAG
, Res
));
4960 Results
.push_back(Res
.getValue(1));
4963 case ISD::ATOMIC_STORE
: {
4965 SDVTList Tys
= DAG
.getVTList(MVT::Other
);
4966 SDValue Ops
[] = { N
->getOperand(0),
4967 lowerI128ToGR128(DAG
, N
->getOperand(2)),
4969 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
4970 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128
,
4971 DL
, Tys
, Ops
, MVT::i128
, MMO
);
4972 // We have to enforce sequential consistency by performing a
4973 // serialization operation after the store.
4974 if (cast
<AtomicSDNode
>(N
)->getOrdering() ==
4975 AtomicOrdering::SequentiallyConsistent
)
4976 Res
= SDValue(DAG
.getMachineNode(SystemZ::Serialize
, DL
,
4977 MVT::Other
, Res
), 0);
4978 Results
.push_back(Res
);
4981 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
: {
4983 SDVTList Tys
= DAG
.getVTList(MVT::Untyped
, MVT::i32
, MVT::Other
);
4984 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1),
4985 lowerI128ToGR128(DAG
, N
->getOperand(2)),
4986 lowerI128ToGR128(DAG
, N
->getOperand(3)) };
4987 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
4988 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128
,
4989 DL
, Tys
, Ops
, MVT::i128
, MMO
);
4990 SDValue Success
= emitSETCC(DAG
, DL
, Res
.getValue(1),
4991 SystemZ::CCMASK_CS
, SystemZ::CCMASK_CS_EQ
);
4992 Success
= DAG
.getZExtOrTrunc(Success
, DL
, N
->getValueType(1));
4993 Results
.push_back(lowerGR128ToI128(DAG
, Res
));
4994 Results
.push_back(Success
);
4995 Results
.push_back(Res
.getValue(2));
4999 llvm_unreachable("Unexpected node to lower");
5004 SystemZTargetLowering::ReplaceNodeResults(SDNode
*N
,
5005 SmallVectorImpl
<SDValue
> &Results
,
5006 SelectionDAG
&DAG
) const {
5007 return LowerOperationWrapper(N
, Results
, DAG
);
5010 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode
) const {
5011 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
5012 switch ((SystemZISD::NodeType
)Opcode
) {
5013 case SystemZISD::FIRST_NUMBER
: break;
5019 OPCODE(PCREL_WRAPPER
);
5020 OPCODE(PCREL_OFFSET
);
5026 OPCODE(SELECT_CCMASK
);
5027 OPCODE(ADJDYNALLOC
);
5052 OPCODE(SEARCH_STRING
);
5056 OPCODE(TBEGIN_NOFLOAT
);
5058 OPCODE(ROTATE_MASK
);
5060 OPCODE(JOIN_DWORDS
);
5065 OPCODE(PERMUTE_DWORDS
);
5070 OPCODE(UNPACK_HIGH
);
5071 OPCODE(UNPACKL_HIGH
);
5073 OPCODE(UNPACKL_LOW
);
5074 OPCODE(VSHL_BY_SCALAR
);
5075 OPCODE(VSRL_BY_SCALAR
);
5076 OPCODE(VSRA_BY_SCALAR
);
5104 OPCODE(ATOMIC_SWAPW
);
5105 OPCODE(ATOMIC_LOADW_ADD
);
5106 OPCODE(ATOMIC_LOADW_SUB
);
5107 OPCODE(ATOMIC_LOADW_AND
);
5108 OPCODE(ATOMIC_LOADW_OR
);
5109 OPCODE(ATOMIC_LOADW_XOR
);
5110 OPCODE(ATOMIC_LOADW_NAND
);
5111 OPCODE(ATOMIC_LOADW_MIN
);
5112 OPCODE(ATOMIC_LOADW_MAX
);
5113 OPCODE(ATOMIC_LOADW_UMIN
);
5114 OPCODE(ATOMIC_LOADW_UMAX
);
5115 OPCODE(ATOMIC_CMP_SWAPW
);
5116 OPCODE(ATOMIC_CMP_SWAP
);
5117 OPCODE(ATOMIC_LOAD_128
);
5118 OPCODE(ATOMIC_STORE_128
);
5119 OPCODE(ATOMIC_CMP_SWAP_128
);
5128 // Return true if VT is a vector whose elements are a whole number of bytes
5129 // in width. Also check for presence of vector support.
5130 bool SystemZTargetLowering::canTreatAsByteVector(EVT VT
) const {
5131 if (!Subtarget
.hasVector())
5134 return VT
.isVector() && VT
.getScalarSizeInBits() % 8 == 0 && VT
.isSimple();
5137 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
5138 // producing a result of type ResVT. Op is a possibly bitcast version
5139 // of the input vector and Index is the index (based on type VecVT) that
5140 // should be extracted. Return the new extraction if a simplification
5141 // was possible or if Force is true.
5142 SDValue
SystemZTargetLowering::combineExtract(const SDLoc
&DL
, EVT ResVT
,
5143 EVT VecVT
, SDValue Op
,
5145 DAGCombinerInfo
&DCI
,
5147 SelectionDAG
&DAG
= DCI
.DAG
;
5149 // The number of bytes being extracted.
5150 unsigned BytesPerElement
= VecVT
.getVectorElementType().getStoreSize();
5153 unsigned Opcode
= Op
.getOpcode();
5154 if (Opcode
== ISD::BITCAST
)
5155 // Look through bitcasts.
5156 Op
= Op
.getOperand(0);
5157 else if ((Opcode
== ISD::VECTOR_SHUFFLE
|| Opcode
== SystemZISD::SPLAT
) &&
5158 canTreatAsByteVector(Op
.getValueType())) {
5159 // Get a VPERM-like permute mask and see whether the bytes covered
5160 // by the extracted element are a contiguous sequence from one
5162 SmallVector
<int, SystemZ::VectorBytes
> Bytes
;
5163 if (!getVPermMask(Op
, Bytes
))
5166 if (!getShuffleInput(Bytes
, Index
* BytesPerElement
,
5167 BytesPerElement
, First
))
5170 return DAG
.getUNDEF(ResVT
);
5171 // Make sure the contiguous sequence starts at a multiple of the
5172 // original element size.
5173 unsigned Byte
= unsigned(First
) % Bytes
.size();
5174 if (Byte
% BytesPerElement
!= 0)
5176 // We can get the extracted value directly from an input.
5177 Index
= Byte
/ BytesPerElement
;
5178 Op
= Op
.getOperand(unsigned(First
) / Bytes
.size());
5180 } else if (Opcode
== ISD::BUILD_VECTOR
&&
5181 canTreatAsByteVector(Op
.getValueType())) {
5182 // We can only optimize this case if the BUILD_VECTOR elements are
5183 // at least as wide as the extracted value.
5184 EVT OpVT
= Op
.getValueType();
5185 unsigned OpBytesPerElement
= OpVT
.getVectorElementType().getStoreSize();
5186 if (OpBytesPerElement
< BytesPerElement
)
5188 // Make sure that the least-significant bit of the extracted value
5189 // is the least significant bit of an input.
5190 unsigned End
= (Index
+ 1) * BytesPerElement
;
5191 if (End
% OpBytesPerElement
!= 0)
5193 // We're extracting the low part of one operand of the BUILD_VECTOR.
5194 Op
= Op
.getOperand(End
/ OpBytesPerElement
- 1);
5195 if (!Op
.getValueType().isInteger()) {
5196 EVT VT
= MVT::getIntegerVT(Op
.getValueSizeInBits());
5197 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
5198 DCI
.AddToWorklist(Op
.getNode());
5200 EVT VT
= MVT::getIntegerVT(ResVT
.getSizeInBits());
5201 Op
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Op
);
5203 DCI
.AddToWorklist(Op
.getNode());
5204 Op
= DAG
.getNode(ISD::BITCAST
, DL
, ResVT
, Op
);
5207 } else if ((Opcode
== ISD::SIGN_EXTEND_VECTOR_INREG
||
5208 Opcode
== ISD::ZERO_EXTEND_VECTOR_INREG
||
5209 Opcode
== ISD::ANY_EXTEND_VECTOR_INREG
) &&
5210 canTreatAsByteVector(Op
.getValueType()) &&
5211 canTreatAsByteVector(Op
.getOperand(0).getValueType())) {
5212 // Make sure that only the unextended bits are significant.
5213 EVT ExtVT
= Op
.getValueType();
5214 EVT OpVT
= Op
.getOperand(0).getValueType();
5215 unsigned ExtBytesPerElement
= ExtVT
.getVectorElementType().getStoreSize();
5216 unsigned OpBytesPerElement
= OpVT
.getVectorElementType().getStoreSize();
5217 unsigned Byte
= Index
* BytesPerElement
;
5218 unsigned SubByte
= Byte
% ExtBytesPerElement
;
5219 unsigned MinSubByte
= ExtBytesPerElement
- OpBytesPerElement
;
5220 if (SubByte
< MinSubByte
||
5221 SubByte
+ BytesPerElement
> ExtBytesPerElement
)
5223 // Get the byte offset of the unextended element
5224 Byte
= Byte
/ ExtBytesPerElement
* OpBytesPerElement
;
5225 // ...then add the byte offset relative to that element.
5226 Byte
+= SubByte
- MinSubByte
;
5227 if (Byte
% BytesPerElement
!= 0)
5229 Op
= Op
.getOperand(0);
5230 Index
= Byte
/ BytesPerElement
;
5236 if (Op
.getValueType() != VecVT
) {
5237 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VecVT
, Op
);
5238 DCI
.AddToWorklist(Op
.getNode());
5240 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, ResVT
, Op
,
5241 DAG
.getConstant(Index
, DL
, MVT::i32
));
5246 // Optimize vector operations in scalar value Op on the basis that Op
5247 // is truncated to TruncVT.
5248 SDValue
SystemZTargetLowering::combineTruncateExtract(
5249 const SDLoc
&DL
, EVT TruncVT
, SDValue Op
, DAGCombinerInfo
&DCI
) const {
5250 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
5251 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
5253 if (Op
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5254 TruncVT
.getSizeInBits() % 8 == 0) {
5255 SDValue Vec
= Op
.getOperand(0);
5256 EVT VecVT
= Vec
.getValueType();
5257 if (canTreatAsByteVector(VecVT
)) {
5258 if (auto *IndexN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
5259 unsigned BytesPerElement
= VecVT
.getVectorElementType().getStoreSize();
5260 unsigned TruncBytes
= TruncVT
.getStoreSize();
5261 if (BytesPerElement
% TruncBytes
== 0) {
5262 // Calculate the value of Y' in the above description. We are
5263 // splitting the original elements into Scale equal-sized pieces
5264 // and for truncation purposes want the last (least-significant)
5265 // of these pieces for IndexN. This is easiest to do by calculating
5266 // the start index of the following element and then subtracting 1.
5267 unsigned Scale
= BytesPerElement
/ TruncBytes
;
5268 unsigned NewIndex
= (IndexN
->getZExtValue() + 1) * Scale
- 1;
5270 // Defer the creation of the bitcast from X to combineExtract,
5271 // which might be able to optimize the extraction.
5272 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(TruncBytes
* 8),
5273 VecVT
.getStoreSize() / TruncBytes
);
5274 EVT ResVT
= (TruncBytes
< 4 ? MVT::i32
: TruncVT
);
5275 return combineExtract(DL
, ResVT
, VecVT
, Vec
, NewIndex
, DCI
, true);
5283 SDValue
SystemZTargetLowering::combineZERO_EXTEND(
5284 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5285 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
5286 SelectionDAG
&DAG
= DCI
.DAG
;
5287 SDValue N0
= N
->getOperand(0);
5288 EVT VT
= N
->getValueType(0);
5289 if (N0
.getOpcode() == SystemZISD::SELECT_CCMASK
) {
5290 auto *TrueOp
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(0));
5291 auto *FalseOp
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5292 if (TrueOp
&& FalseOp
) {
5294 SDValue Ops
[] = { DAG
.getConstant(TrueOp
->getZExtValue(), DL
, VT
),
5295 DAG
.getConstant(FalseOp
->getZExtValue(), DL
, VT
),
5296 N0
.getOperand(2), N0
.getOperand(3), N0
.getOperand(4) };
5297 SDValue NewSelect
= DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, VT
, Ops
);
5298 // If N0 has multiple uses, change other uses as well.
5299 if (!N0
.hasOneUse()) {
5300 SDValue TruncSelect
=
5301 DAG
.getNode(ISD::TRUNCATE
, DL
, N0
.getValueType(), NewSelect
);
5302 DCI
.CombineTo(N0
.getNode(), TruncSelect
);
5310 SDValue
SystemZTargetLowering::combineSIGN_EXTEND_INREG(
5311 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5312 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
5313 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
5314 // into (select_cc LHS, RHS, -1, 0, COND)
5315 SelectionDAG
&DAG
= DCI
.DAG
;
5316 SDValue N0
= N
->getOperand(0);
5317 EVT VT
= N
->getValueType(0);
5318 EVT EVT
= cast
<VTSDNode
>(N
->getOperand(1))->getVT();
5319 if (N0
.hasOneUse() && N0
.getOpcode() == ISD::ANY_EXTEND
)
5320 N0
= N0
.getOperand(0);
5321 if (EVT
== MVT::i1
&& N0
.hasOneUse() && N0
.getOpcode() == ISD::SETCC
) {
5323 SDValue Ops
[] = { N0
.getOperand(0), N0
.getOperand(1),
5324 DAG
.getConstant(-1, DL
, VT
), DAG
.getConstant(0, DL
, VT
),
5326 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, Ops
);
5331 SDValue
SystemZTargetLowering::combineSIGN_EXTEND(
5332 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5333 // Convert (sext (ashr (shl X, C1), C2)) to
5334 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
5335 // cheap as narrower ones.
5336 SelectionDAG
&DAG
= DCI
.DAG
;
5337 SDValue N0
= N
->getOperand(0);
5338 EVT VT
= N
->getValueType(0);
5339 if (N0
.hasOneUse() && N0
.getOpcode() == ISD::SRA
) {
5340 auto *SraAmt
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5341 SDValue Inner
= N0
.getOperand(0);
5342 if (SraAmt
&& Inner
.hasOneUse() && Inner
.getOpcode() == ISD::SHL
) {
5343 if (auto *ShlAmt
= dyn_cast
<ConstantSDNode
>(Inner
.getOperand(1))) {
5344 unsigned Extra
= (VT
.getSizeInBits() - N0
.getValueSizeInBits());
5345 unsigned NewShlAmt
= ShlAmt
->getZExtValue() + Extra
;
5346 unsigned NewSraAmt
= SraAmt
->getZExtValue() + Extra
;
5347 EVT ShiftVT
= N0
.getOperand(1).getValueType();
5348 SDValue Ext
= DAG
.getNode(ISD::ANY_EXTEND
, SDLoc(Inner
), VT
,
5349 Inner
.getOperand(0));
5350 SDValue Shl
= DAG
.getNode(ISD::SHL
, SDLoc(Inner
), VT
, Ext
,
5351 DAG
.getConstant(NewShlAmt
, SDLoc(Inner
),
5353 return DAG
.getNode(ISD::SRA
, SDLoc(N0
), VT
, Shl
,
5354 DAG
.getConstant(NewSraAmt
, SDLoc(N0
), ShiftVT
));
5361 SDValue
SystemZTargetLowering::combineMERGE(
5362 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5363 SelectionDAG
&DAG
= DCI
.DAG
;
5364 unsigned Opcode
= N
->getOpcode();
5365 SDValue Op0
= N
->getOperand(0);
5366 SDValue Op1
= N
->getOperand(1);
5367 if (Op0
.getOpcode() == ISD::BITCAST
)
5368 Op0
= Op0
.getOperand(0);
5369 if (ISD::isBuildVectorAllZeros(Op0
.getNode())) {
5370 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
5372 if (Op1
== N
->getOperand(0))
5374 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
5375 EVT VT
= Op1
.getValueType();
5376 unsigned ElemBytes
= VT
.getVectorElementType().getStoreSize();
5377 if (ElemBytes
<= 4) {
5378 Opcode
= (Opcode
== SystemZISD::MERGE_HIGH
?
5379 SystemZISD::UNPACKL_HIGH
: SystemZISD::UNPACKL_LOW
);
5380 EVT InVT
= VT
.changeVectorElementTypeToInteger();
5381 EVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(ElemBytes
* 16),
5382 SystemZ::VectorBytes
/ ElemBytes
/ 2);
5384 Op1
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), InVT
, Op1
);
5385 DCI
.AddToWorklist(Op1
.getNode());
5387 SDValue Op
= DAG
.getNode(Opcode
, SDLoc(N
), OutVT
, Op1
);
5388 DCI
.AddToWorklist(Op
.getNode());
5389 return DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VT
, Op
);
5395 SDValue
SystemZTargetLowering::combineLOAD(
5396 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5397 SelectionDAG
&DAG
= DCI
.DAG
;
5398 EVT LdVT
= N
->getValueType(0);
5399 if (LdVT
.isVector() || LdVT
.isInteger())
5401 // Transform a scalar load that is REPLICATEd as well as having other
5402 // use(s) to the form where the other use(s) use the first element of the
5403 // REPLICATE instead of the load. Otherwise instruction selection will not
5404 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
5408 SmallVector
<SDNode
*, 8> OtherUses
;
5409 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
5411 if (UI
->getOpcode() == SystemZISD::REPLICATE
) {
5413 return SDValue(); // Should never happen
5414 Replicate
= SDValue(*UI
, 0);
5416 else if (UI
.getUse().getResNo() == 0)
5417 OtherUses
.push_back(*UI
);
5419 if (!Replicate
|| OtherUses
.empty())
5423 SDValue Extract0
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, LdVT
,
5424 Replicate
, DAG
.getConstant(0, DL
, MVT::i32
));
5425 // Update uses of the loaded Value while preserving old chains.
5426 for (SDNode
*U
: OtherUses
) {
5427 SmallVector
<SDValue
, 8> Ops
;
5428 for (SDValue Op
: U
->ops())
5429 Ops
.push_back((Op
.getNode() == N
&& Op
.getResNo() == 0) ? Extract0
: Op
);
5430 DAG
.UpdateNodeOperands(U
, Ops
);
5432 return SDValue(N
, 0);
5435 SDValue
SystemZTargetLowering::combineSTORE(
5436 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5437 SelectionDAG
&DAG
= DCI
.DAG
;
5438 auto *SN
= cast
<StoreSDNode
>(N
);
5439 auto &Op1
= N
->getOperand(1);
5440 EVT MemVT
= SN
->getMemoryVT();
5441 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
5442 // for the extraction to be done on a vMiN value, so that we can use VSTE.
5443 // If X has wider elements then convert it to:
5444 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
5445 if (MemVT
.isInteger() && SN
->isTruncatingStore()) {
5447 combineTruncateExtract(SDLoc(N
), MemVT
, SN
->getValue(), DCI
)) {
5448 DCI
.AddToWorklist(Value
.getNode());
5450 // Rewrite the store with the new form of stored value.
5451 return DAG
.getTruncStore(SN
->getChain(), SDLoc(SN
), Value
,
5452 SN
->getBasePtr(), SN
->getMemoryVT(),
5453 SN
->getMemOperand());
5456 // Combine STORE (BSWAP) into STRVH/STRV/STRVG
5457 if (!SN
->isTruncatingStore() &&
5458 Op1
.getOpcode() == ISD::BSWAP
&&
5459 Op1
.getNode()->hasOneUse() &&
5460 (Op1
.getValueType() == MVT::i16
||
5461 Op1
.getValueType() == MVT::i32
||
5462 Op1
.getValueType() == MVT::i64
)) {
5464 SDValue BSwapOp
= Op1
.getOperand(0);
5466 if (BSwapOp
.getValueType() == MVT::i16
)
5467 BSwapOp
= DAG
.getNode(ISD::ANY_EXTEND
, SDLoc(N
), MVT::i32
, BSwapOp
);
5470 N
->getOperand(0), BSwapOp
, N
->getOperand(2)
5474 DAG
.getMemIntrinsicNode(SystemZISD::STRV
, SDLoc(N
), DAG
.getVTList(MVT::Other
),
5475 Ops
, MemVT
, SN
->getMemOperand());
5480 SDValue
SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
5481 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5483 if (!Subtarget
.hasVector())
5486 // Try to simplify a vector extraction.
5487 if (auto *IndexN
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
5488 SDValue Op0
= N
->getOperand(0);
5489 EVT VecVT
= Op0
.getValueType();
5490 return combineExtract(SDLoc(N
), N
->getValueType(0), VecVT
, Op0
,
5491 IndexN
->getZExtValue(), DCI
, false);
5496 SDValue
SystemZTargetLowering::combineJOIN_DWORDS(
5497 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5498 SelectionDAG
&DAG
= DCI
.DAG
;
5499 // (join_dwords X, X) == (replicate X)
5500 if (N
->getOperand(0) == N
->getOperand(1))
5501 return DAG
.getNode(SystemZISD::REPLICATE
, SDLoc(N
), N
->getValueType(0),
5506 SDValue
SystemZTargetLowering::combineFP_ROUND(
5507 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5509 if (!Subtarget
.hasVector())
5512 // (fpround (extract_vector_elt X 0))
5513 // (fpround (extract_vector_elt X 1)) ->
5514 // (extract_vector_elt (VROUND X) 0)
5515 // (extract_vector_elt (VROUND X) 2)
5517 // This is a special case since the target doesn't really support v2f32s.
5518 SelectionDAG
&DAG
= DCI
.DAG
;
5519 SDValue Op0
= N
->getOperand(0);
5520 if (N
->getValueType(0) == MVT::f32
&&
5522 Op0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5523 Op0
.getOperand(0).getValueType() == MVT::v2f64
&&
5524 Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
5525 cast
<ConstantSDNode
>(Op0
.getOperand(1))->getZExtValue() == 0) {
5526 SDValue Vec
= Op0
.getOperand(0);
5527 for (auto *U
: Vec
->uses()) {
5528 if (U
!= Op0
.getNode() &&
5530 U
->getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5531 U
->getOperand(0) == Vec
&&
5532 U
->getOperand(1).getOpcode() == ISD::Constant
&&
5533 cast
<ConstantSDNode
>(U
->getOperand(1))->getZExtValue() == 1) {
5534 SDValue OtherRound
= SDValue(*U
->use_begin(), 0);
5535 if (OtherRound
.getOpcode() == ISD::FP_ROUND
&&
5536 OtherRound
.getOperand(0) == SDValue(U
, 0) &&
5537 OtherRound
.getValueType() == MVT::f32
) {
5538 SDValue VRound
= DAG
.getNode(SystemZISD::VROUND
, SDLoc(N
),
5540 DCI
.AddToWorklist(VRound
.getNode());
5542 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(U
), MVT::f32
,
5543 VRound
, DAG
.getConstant(2, SDLoc(U
), MVT::i32
));
5544 DCI
.AddToWorklist(Extract1
.getNode());
5545 DAG
.ReplaceAllUsesOfValueWith(OtherRound
, Extract1
);
5547 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op0
), MVT::f32
,
5548 VRound
, DAG
.getConstant(0, SDLoc(Op0
), MVT::i32
));
5557 SDValue
SystemZTargetLowering::combineFP_EXTEND(
5558 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5560 if (!Subtarget
.hasVector())
5563 // (fpextend (extract_vector_elt X 0))
5564 // (fpextend (extract_vector_elt X 2)) ->
5565 // (extract_vector_elt (VEXTEND X) 0)
5566 // (extract_vector_elt (VEXTEND X) 1)
5568 // This is a special case since the target doesn't really support v2f32s.
5569 SelectionDAG
&DAG
= DCI
.DAG
;
5570 SDValue Op0
= N
->getOperand(0);
5571 if (N
->getValueType(0) == MVT::f64
&&
5573 Op0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5574 Op0
.getOperand(0).getValueType() == MVT::v4f32
&&
5575 Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
5576 cast
<ConstantSDNode
>(Op0
.getOperand(1))->getZExtValue() == 0) {
5577 SDValue Vec
= Op0
.getOperand(0);
5578 for (auto *U
: Vec
->uses()) {
5579 if (U
!= Op0
.getNode() &&
5581 U
->getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5582 U
->getOperand(0) == Vec
&&
5583 U
->getOperand(1).getOpcode() == ISD::Constant
&&
5584 cast
<ConstantSDNode
>(U
->getOperand(1))->getZExtValue() == 2) {
5585 SDValue OtherExtend
= SDValue(*U
->use_begin(), 0);
5586 if (OtherExtend
.getOpcode() == ISD::FP_EXTEND
&&
5587 OtherExtend
.getOperand(0) == SDValue(U
, 0) &&
5588 OtherExtend
.getValueType() == MVT::f64
) {
5589 SDValue VExtend
= DAG
.getNode(SystemZISD::VEXTEND
, SDLoc(N
),
5591 DCI
.AddToWorklist(VExtend
.getNode());
5593 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(U
), MVT::f64
,
5594 VExtend
, DAG
.getConstant(1, SDLoc(U
), MVT::i32
));
5595 DCI
.AddToWorklist(Extract1
.getNode());
5596 DAG
.ReplaceAllUsesOfValueWith(OtherExtend
, Extract1
);
5598 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op0
), MVT::f64
,
5599 VExtend
, DAG
.getConstant(0, SDLoc(Op0
), MVT::i32
));
5608 SDValue
SystemZTargetLowering::combineBSWAP(
5609 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5610 SelectionDAG
&DAG
= DCI
.DAG
;
5611 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG
5612 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
5613 N
->getOperand(0).hasOneUse() &&
5614 (N
->getValueType(0) == MVT::i16
|| N
->getValueType(0) == MVT::i32
||
5615 N
->getValueType(0) == MVT::i64
)) {
5616 SDValue Load
= N
->getOperand(0);
5617 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
5619 // Create the byte-swapping load.
5621 LD
->getChain(), // Chain
5622 LD
->getBasePtr() // Ptr
5624 EVT LoadVT
= N
->getValueType(0);
5625 if (LoadVT
== MVT::i16
)
5628 DAG
.getMemIntrinsicNode(SystemZISD::LRV
, SDLoc(N
),
5629 DAG
.getVTList(LoadVT
, MVT::Other
),
5630 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
5632 // If this is an i16 load, insert the truncate.
5633 SDValue ResVal
= BSLoad
;
5634 if (N
->getValueType(0) == MVT::i16
)
5635 ResVal
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), MVT::i16
, BSLoad
);
5637 // First, combine the bswap away. This makes the value produced by the
5639 DCI
.CombineTo(N
, ResVal
);
5641 // Next, combine the load away, we give it a bogus result value but a real
5642 // chain result. The result value is dead because the bswap is dead.
5643 DCI
.CombineTo(Load
.getNode(), ResVal
, BSLoad
.getValue(1));
5645 // Return N so it doesn't get rechecked!
5646 return SDValue(N
, 0);
5651 static bool combineCCMask(SDValue
&CCReg
, int &CCValid
, int &CCMask
) {
5652 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
5653 // set by the CCReg instruction using the CCValid / CCMask masks,
5654 // If the CCReg instruction is itself a ICMP testing the condition
5655 // code set by some other instruction, see whether we can directly
5656 // use that condition code.
5658 // Verify that we have an ICMP against some constant.
5659 if (CCValid
!= SystemZ::CCMASK_ICMP
)
5661 auto *ICmp
= CCReg
.getNode();
5662 if (ICmp
->getOpcode() != SystemZISD::ICMP
)
5664 auto *CompareLHS
= ICmp
->getOperand(0).getNode();
5665 auto *CompareRHS
= dyn_cast
<ConstantSDNode
>(ICmp
->getOperand(1));
5669 // Optimize the case where CompareLHS is a SELECT_CCMASK.
5670 if (CompareLHS
->getOpcode() == SystemZISD::SELECT_CCMASK
) {
5671 // Verify that we have an appropriate mask for a EQ or NE comparison.
5672 bool Invert
= false;
5673 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
5675 else if (CCMask
!= SystemZ::CCMASK_CMP_EQ
)
5678 // Verify that the ICMP compares against one of select values.
5679 auto *TrueVal
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(0));
5682 auto *FalseVal
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(1));
5685 if (CompareRHS
->getZExtValue() == FalseVal
->getZExtValue())
5687 else if (CompareRHS
->getZExtValue() != TrueVal
->getZExtValue())
5690 // Compute the effective CC mask for the new branch or select.
5691 auto *NewCCValid
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(2));
5692 auto *NewCCMask
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(3));
5693 if (!NewCCValid
|| !NewCCMask
)
5695 CCValid
= NewCCValid
->getZExtValue();
5696 CCMask
= NewCCMask
->getZExtValue();
5700 // Return the updated CCReg link.
5701 CCReg
= CompareLHS
->getOperand(4);
5705 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
5706 if (CompareLHS
->getOpcode() == ISD::SRA
) {
5707 auto *SRACount
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(1));
5708 if (!SRACount
|| SRACount
->getZExtValue() != 30)
5710 auto *SHL
= CompareLHS
->getOperand(0).getNode();
5711 if (SHL
->getOpcode() != ISD::SHL
)
5713 auto *SHLCount
= dyn_cast
<ConstantSDNode
>(SHL
->getOperand(1));
5714 if (!SHLCount
|| SHLCount
->getZExtValue() != 30 - SystemZ::IPM_CC
)
5716 auto *IPM
= SHL
->getOperand(0).getNode();
5717 if (IPM
->getOpcode() != SystemZISD::IPM
)
5720 // Avoid introducing CC spills (because SRA would clobber CC).
5721 if (!CompareLHS
->hasOneUse())
5723 // Verify that the ICMP compares against zero.
5724 if (CompareRHS
->getZExtValue() != 0)
5727 // Compute the effective CC mask for the new branch or select.
5729 case SystemZ::CCMASK_CMP_EQ
: break;
5730 case SystemZ::CCMASK_CMP_NE
: break;
5731 case SystemZ::CCMASK_CMP_LT
: CCMask
= SystemZ::CCMASK_CMP_GT
; break;
5732 case SystemZ::CCMASK_CMP_GT
: CCMask
= SystemZ::CCMASK_CMP_LT
; break;
5733 case SystemZ::CCMASK_CMP_LE
: CCMask
= SystemZ::CCMASK_CMP_GE
; break;
5734 case SystemZ::CCMASK_CMP_GE
: CCMask
= SystemZ::CCMASK_CMP_LE
; break;
5735 default: return false;
5738 // Return the updated CCReg link.
5739 CCReg
= IPM
->getOperand(0);
5746 SDValue
SystemZTargetLowering::combineBR_CCMASK(
5747 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5748 SelectionDAG
&DAG
= DCI
.DAG
;
5750 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
5751 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
5752 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
5753 if (!CCValid
|| !CCMask
)
5756 int CCValidVal
= CCValid
->getZExtValue();
5757 int CCMaskVal
= CCMask
->getZExtValue();
5758 SDValue Chain
= N
->getOperand(0);
5759 SDValue CCReg
= N
->getOperand(4);
5761 if (combineCCMask(CCReg
, CCValidVal
, CCMaskVal
))
5762 return DAG
.getNode(SystemZISD::BR_CCMASK
, SDLoc(N
), N
->getValueType(0),
5764 DAG
.getConstant(CCValidVal
, SDLoc(N
), MVT::i32
),
5765 DAG
.getConstant(CCMaskVal
, SDLoc(N
), MVT::i32
),
5766 N
->getOperand(3), CCReg
);
5770 SDValue
SystemZTargetLowering::combineSELECT_CCMASK(
5771 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5772 SelectionDAG
&DAG
= DCI
.DAG
;
5774 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
5775 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
5776 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3));
5777 if (!CCValid
|| !CCMask
)
5780 int CCValidVal
= CCValid
->getZExtValue();
5781 int CCMaskVal
= CCMask
->getZExtValue();
5782 SDValue CCReg
= N
->getOperand(4);
5784 if (combineCCMask(CCReg
, CCValidVal
, CCMaskVal
))
5785 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, SDLoc(N
), N
->getValueType(0),
5788 DAG
.getConstant(CCValidVal
, SDLoc(N
), MVT::i32
),
5789 DAG
.getConstant(CCMaskVal
, SDLoc(N
), MVT::i32
),
5795 SDValue
SystemZTargetLowering::combineGET_CCMASK(
5796 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5798 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
5799 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
5800 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
5801 if (!CCValid
|| !CCMask
)
5803 int CCValidVal
= CCValid
->getZExtValue();
5804 int CCMaskVal
= CCMask
->getZExtValue();
5806 SDValue Select
= N
->getOperand(0);
5807 if (Select
->getOpcode() != SystemZISD::SELECT_CCMASK
)
5810 auto *SelectCCValid
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(2));
5811 auto *SelectCCMask
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(3));
5812 if (!SelectCCValid
|| !SelectCCMask
)
5814 int SelectCCValidVal
= SelectCCValid
->getZExtValue();
5815 int SelectCCMaskVal
= SelectCCMask
->getZExtValue();
5817 auto *TrueVal
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(0));
5818 auto *FalseVal
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(1));
5819 if (!TrueVal
|| !FalseVal
)
5821 if (TrueVal
->getZExtValue() != 0 && FalseVal
->getZExtValue() == 0)
5823 else if (TrueVal
->getZExtValue() == 0 && FalseVal
->getZExtValue() != 0)
5824 SelectCCMaskVal
^= SelectCCValidVal
;
5828 if (SelectCCValidVal
& ~CCValidVal
)
5830 if (SelectCCMaskVal
!= (CCMaskVal
& SelectCCValidVal
))
5833 return Select
->getOperand(4);
5836 SDValue
SystemZTargetLowering::combineIntDIVREM(
5837 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5838 SelectionDAG
&DAG
= DCI
.DAG
;
5839 EVT VT
= N
->getValueType(0);
5840 // In the case where the divisor is a vector of constants a cheaper
5841 // sequence of instructions can replace the divide. BuildSDIV is called to
5842 // do this during DAG combining, but it only succeeds when it can build a
5843 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
5844 // since it is not Legal but Custom it can only happen before
5845 // legalization. Therefore we must scalarize this early before Combine
5846 // 1. For widened vectors, this is already the result of type legalization.
5847 if (VT
.isVector() && isTypeLegal(VT
) &&
5848 DAG
.isConstantIntBuildVectorOrConstantInt(N
->getOperand(1)))
5849 return DAG
.UnrollVectorOp(N
);
5853 SDValue
SystemZTargetLowering::unwrapAddress(SDValue N
) const {
5854 if (N
->getOpcode() == SystemZISD::PCREL_WRAPPER
)
5855 return N
->getOperand(0);
5859 SDValue
SystemZTargetLowering::PerformDAGCombine(SDNode
*N
,
5860 DAGCombinerInfo
&DCI
) const {
5861 switch(N
->getOpcode()) {
5863 case ISD::ZERO_EXTEND
: return combineZERO_EXTEND(N
, DCI
);
5864 case ISD::SIGN_EXTEND
: return combineSIGN_EXTEND(N
, DCI
);
5865 case ISD::SIGN_EXTEND_INREG
: return combineSIGN_EXTEND_INREG(N
, DCI
);
5866 case SystemZISD::MERGE_HIGH
:
5867 case SystemZISD::MERGE_LOW
: return combineMERGE(N
, DCI
);
5868 case ISD::LOAD
: return combineLOAD(N
, DCI
);
5869 case ISD::STORE
: return combineSTORE(N
, DCI
);
5870 case ISD::EXTRACT_VECTOR_ELT
: return combineEXTRACT_VECTOR_ELT(N
, DCI
);
5871 case SystemZISD::JOIN_DWORDS
: return combineJOIN_DWORDS(N
, DCI
);
5872 case ISD::FP_ROUND
: return combineFP_ROUND(N
, DCI
);
5873 case ISD::FP_EXTEND
: return combineFP_EXTEND(N
, DCI
);
5874 case ISD::BSWAP
: return combineBSWAP(N
, DCI
);
5875 case SystemZISD::BR_CCMASK
: return combineBR_CCMASK(N
, DCI
);
5876 case SystemZISD::SELECT_CCMASK
: return combineSELECT_CCMASK(N
, DCI
);
5877 case SystemZISD::GET_CCMASK
: return combineGET_CCMASK(N
, DCI
);
5881 case ISD::UREM
: return combineIntDIVREM(N
, DCI
);
5887 // Return the demanded elements for the OpNo source operand of Op. DemandedElts
5889 static APInt
getDemandedSrcElements(SDValue Op
, const APInt
&DemandedElts
,
5891 EVT VT
= Op
.getValueType();
5892 unsigned NumElts
= (VT
.isVector() ? VT
.getVectorNumElements() : 1);
5894 unsigned Opcode
= Op
.getOpcode();
5895 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
5896 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
5898 case Intrinsic::s390_vpksh
: // PACKS
5899 case Intrinsic::s390_vpksf
:
5900 case Intrinsic::s390_vpksg
:
5901 case Intrinsic::s390_vpkshs
: // PACKS_CC
5902 case Intrinsic::s390_vpksfs
:
5903 case Intrinsic::s390_vpksgs
:
5904 case Intrinsic::s390_vpklsh
: // PACKLS
5905 case Intrinsic::s390_vpklsf
:
5906 case Intrinsic::s390_vpklsg
:
5907 case Intrinsic::s390_vpklshs
: // PACKLS_CC
5908 case Intrinsic::s390_vpklsfs
:
5909 case Intrinsic::s390_vpklsgs
:
5910 // VECTOR PACK truncates the elements of two source vectors into one.
5911 SrcDemE
= DemandedElts
;
5913 SrcDemE
.lshrInPlace(NumElts
/ 2);
5914 SrcDemE
= SrcDemE
.trunc(NumElts
/ 2);
5916 // VECTOR UNPACK extends half the elements of the source vector.
5917 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
5918 case Intrinsic::s390_vuphh
:
5919 case Intrinsic::s390_vuphf
:
5920 case Intrinsic::s390_vuplhb
: // VECTOR UNPACK LOGICAL HIGH
5921 case Intrinsic::s390_vuplhh
:
5922 case Intrinsic::s390_vuplhf
:
5923 SrcDemE
= APInt(NumElts
* 2, 0);
5924 SrcDemE
.insertBits(DemandedElts
, 0);
5926 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
5927 case Intrinsic::s390_vuplhw
:
5928 case Intrinsic::s390_vuplf
:
5929 case Intrinsic::s390_vupllb
: // VECTOR UNPACK LOGICAL LOW
5930 case Intrinsic::s390_vupllh
:
5931 case Intrinsic::s390_vupllf
:
5932 SrcDemE
= APInt(NumElts
* 2, 0);
5933 SrcDemE
.insertBits(DemandedElts
, NumElts
);
5935 case Intrinsic::s390_vpdi
: {
5936 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
5937 SrcDemE
= APInt(NumElts
, 0);
5938 if (!DemandedElts
[OpNo
- 1])
5940 unsigned Mask
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
5941 unsigned MaskBit
= ((OpNo
- 1) ? 1 : 4);
5942 // Demand input element 0 or 1, given by the mask bit value.
5943 SrcDemE
.setBit((Mask
& MaskBit
)? 1 : 0);
5946 case Intrinsic::s390_vsldb
: {
5947 // VECTOR SHIFT LEFT DOUBLE BY BYTE
5948 assert(VT
== MVT::v16i8
&& "Unexpected type.");
5949 unsigned FirstIdx
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
5950 assert (FirstIdx
> 0 && FirstIdx
< 16 && "Unused operand.");
5951 unsigned NumSrc0Els
= 16 - FirstIdx
;
5952 SrcDemE
= APInt(NumElts
, 0);
5954 APInt DemEls
= DemandedElts
.trunc(NumSrc0Els
);
5955 SrcDemE
.insertBits(DemEls
, FirstIdx
);
5957 APInt DemEls
= DemandedElts
.lshr(NumSrc0Els
);
5958 SrcDemE
.insertBits(DemEls
, 0);
5962 case Intrinsic::s390_vperm
:
5963 SrcDemE
= APInt(NumElts
, 1);
5966 llvm_unreachable("Unhandled intrinsic.");
5971 case SystemZISD::JOIN_DWORDS
:
5973 SrcDemE
= APInt(1, 1);
5975 case SystemZISD::SELECT_CCMASK
:
5976 SrcDemE
= DemandedElts
;
5979 llvm_unreachable("Unhandled opcode.");
5986 static void computeKnownBitsBinOp(const SDValue Op
, KnownBits
&Known
,
5987 const APInt
&DemandedElts
,
5988 const SelectionDAG
&DAG
, unsigned Depth
,
5990 APInt Src0DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
);
5991 APInt Src1DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
+ 1);
5992 KnownBits LHSKnown
=
5993 DAG
.computeKnownBits(Op
.getOperand(OpNo
), Src0DemE
, Depth
+ 1);
5994 KnownBits RHSKnown
=
5995 DAG
.computeKnownBits(Op
.getOperand(OpNo
+ 1), Src1DemE
, Depth
+ 1);
5996 Known
.Zero
= LHSKnown
.Zero
& RHSKnown
.Zero
;
5997 Known
.One
= LHSKnown
.One
& RHSKnown
.One
;
6001 SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
6003 const APInt
&DemandedElts
,
6004 const SelectionDAG
&DAG
,
6005 unsigned Depth
) const {
6008 // Intrinsic CC result is returned in the two low bits.
6009 unsigned tmp0
, tmp1
; // not used
6010 if (Op
.getResNo() == 1 && isIntrinsicWithCC(Op
, tmp0
, tmp1
)) {
6011 Known
.Zero
.setBitsFrom(2);
6014 EVT VT
= Op
.getValueType();
6015 if (Op
.getResNo() != 0 || VT
== MVT::Untyped
)
6017 assert (Known
.getBitWidth() == VT
.getScalarSizeInBits() &&
6018 "KnownBits does not match VT in bitwidth");
6019 assert ((!VT
.isVector() ||
6020 (DemandedElts
.getBitWidth() == VT
.getVectorNumElements())) &&
6021 "DemandedElts does not match VT number of elements");
6022 unsigned BitWidth
= Known
.getBitWidth();
6023 unsigned Opcode
= Op
.getOpcode();
6024 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
6025 bool IsLogical
= false;
6026 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6028 case Intrinsic::s390_vpksh
: // PACKS
6029 case Intrinsic::s390_vpksf
:
6030 case Intrinsic::s390_vpksg
:
6031 case Intrinsic::s390_vpkshs
: // PACKS_CC
6032 case Intrinsic::s390_vpksfs
:
6033 case Intrinsic::s390_vpksgs
:
6034 case Intrinsic::s390_vpklsh
: // PACKLS
6035 case Intrinsic::s390_vpklsf
:
6036 case Intrinsic::s390_vpklsg
:
6037 case Intrinsic::s390_vpklshs
: // PACKLS_CC
6038 case Intrinsic::s390_vpklsfs
:
6039 case Intrinsic::s390_vpklsgs
:
6040 case Intrinsic::s390_vpdi
:
6041 case Intrinsic::s390_vsldb
:
6042 case Intrinsic::s390_vperm
:
6043 computeKnownBitsBinOp(Op
, Known
, DemandedElts
, DAG
, Depth
, 1);
6045 case Intrinsic::s390_vuplhb
: // VECTOR UNPACK LOGICAL HIGH
6046 case Intrinsic::s390_vuplhh
:
6047 case Intrinsic::s390_vuplhf
:
6048 case Intrinsic::s390_vupllb
: // VECTOR UNPACK LOGICAL LOW
6049 case Intrinsic::s390_vupllh
:
6050 case Intrinsic::s390_vupllf
:
6053 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
6054 case Intrinsic::s390_vuphh
:
6055 case Intrinsic::s390_vuphf
:
6056 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
6057 case Intrinsic::s390_vuplhw
:
6058 case Intrinsic::s390_vuplf
: {
6059 SDValue SrcOp
= Op
.getOperand(1);
6060 unsigned SrcBitWidth
= SrcOp
.getScalarValueSizeInBits();
6061 APInt SrcDemE
= getDemandedSrcElements(Op
, DemandedElts
, 0);
6062 Known
= DAG
.computeKnownBits(SrcOp
, SrcDemE
, Depth
+ 1);
6064 Known
= Known
.zext(BitWidth
);
6065 Known
.Zero
.setBitsFrom(SrcBitWidth
);
6067 Known
= Known
.sext(BitWidth
);
6075 case SystemZISD::JOIN_DWORDS
:
6076 case SystemZISD::SELECT_CCMASK
:
6077 computeKnownBitsBinOp(Op
, Known
, DemandedElts
, DAG
, Depth
, 0);
6079 case SystemZISD::REPLICATE
: {
6080 SDValue SrcOp
= Op
.getOperand(0);
6081 Known
= DAG
.computeKnownBits(SrcOp
, Depth
+ 1);
6082 if (Known
.getBitWidth() < BitWidth
&& isa
<ConstantSDNode
>(SrcOp
))
6083 Known
= Known
.sext(BitWidth
); // VREPI sign extends the immedate.
6091 // Known has the width of the source operand(s). Adjust if needed to match
6092 // the passed bitwidth.
6093 if (Known
.getBitWidth() != BitWidth
)
6094 Known
= Known
.zextOrTrunc(BitWidth
);
6097 static unsigned computeNumSignBitsBinOp(SDValue Op
, const APInt
&DemandedElts
,
6098 const SelectionDAG
&DAG
, unsigned Depth
,
6100 APInt Src0DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
);
6101 unsigned LHS
= DAG
.ComputeNumSignBits(Op
.getOperand(OpNo
), Src0DemE
, Depth
+ 1);
6102 if (LHS
== 1) return 1; // Early out.
6103 APInt Src1DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
+ 1);
6104 unsigned RHS
= DAG
.ComputeNumSignBits(Op
.getOperand(OpNo
+ 1), Src1DemE
, Depth
+ 1);
6105 if (RHS
== 1) return 1; // Early out.
6106 unsigned Common
= std::min(LHS
, RHS
);
6107 unsigned SrcBitWidth
= Op
.getOperand(OpNo
).getScalarValueSizeInBits();
6108 EVT VT
= Op
.getValueType();
6109 unsigned VTBits
= VT
.getScalarSizeInBits();
6110 if (SrcBitWidth
> VTBits
) { // PACK
6111 unsigned SrcExtraBits
= SrcBitWidth
- VTBits
;
6112 if (Common
> SrcExtraBits
)
6113 return (Common
- SrcExtraBits
);
6116 assert (SrcBitWidth
== VTBits
&& "Expected operands of same bitwidth.");
6121 SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
6122 SDValue Op
, const APInt
&DemandedElts
, const SelectionDAG
&DAG
,
6123 unsigned Depth
) const {
6124 if (Op
.getResNo() != 0)
6126 unsigned Opcode
= Op
.getOpcode();
6127 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
6128 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6130 case Intrinsic::s390_vpksh
: // PACKS
6131 case Intrinsic::s390_vpksf
:
6132 case Intrinsic::s390_vpksg
:
6133 case Intrinsic::s390_vpkshs
: // PACKS_CC
6134 case Intrinsic::s390_vpksfs
:
6135 case Intrinsic::s390_vpksgs
:
6136 case Intrinsic::s390_vpklsh
: // PACKLS
6137 case Intrinsic::s390_vpklsf
:
6138 case Intrinsic::s390_vpklsg
:
6139 case Intrinsic::s390_vpklshs
: // PACKLS_CC
6140 case Intrinsic::s390_vpklsfs
:
6141 case Intrinsic::s390_vpklsgs
:
6142 case Intrinsic::s390_vpdi
:
6143 case Intrinsic::s390_vsldb
:
6144 case Intrinsic::s390_vperm
:
6145 return computeNumSignBitsBinOp(Op
, DemandedElts
, DAG
, Depth
, 1);
6146 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
6147 case Intrinsic::s390_vuphh
:
6148 case Intrinsic::s390_vuphf
:
6149 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
6150 case Intrinsic::s390_vuplhw
:
6151 case Intrinsic::s390_vuplf
: {
6152 SDValue PackedOp
= Op
.getOperand(1);
6153 APInt SrcDemE
= getDemandedSrcElements(Op
, DemandedElts
, 1);
6154 unsigned Tmp
= DAG
.ComputeNumSignBits(PackedOp
, SrcDemE
, Depth
+ 1);
6155 EVT VT
= Op
.getValueType();
6156 unsigned VTBits
= VT
.getScalarSizeInBits();
6157 Tmp
+= VTBits
- PackedOp
.getScalarValueSizeInBits();
6165 case SystemZISD::SELECT_CCMASK
:
6166 return computeNumSignBitsBinOp(Op
, DemandedElts
, DAG
, Depth
, 0);
6175 //===----------------------------------------------------------------------===//
6177 //===----------------------------------------------------------------------===//
6179 // Create a new basic block after MBB.
6180 static MachineBasicBlock
*emitBlockAfter(MachineBasicBlock
*MBB
) {
6181 MachineFunction
&MF
= *MBB
->getParent();
6182 MachineBasicBlock
*NewMBB
= MF
.CreateMachineBasicBlock(MBB
->getBasicBlock());
6183 MF
.insert(std::next(MachineFunction::iterator(MBB
)), NewMBB
);
6187 // Split MBB after MI and return the new block (the one that contains
6188 // instructions after MI).
6189 static MachineBasicBlock
*splitBlockAfter(MachineBasicBlock::iterator MI
,
6190 MachineBasicBlock
*MBB
) {
6191 MachineBasicBlock
*NewMBB
= emitBlockAfter(MBB
);
6192 NewMBB
->splice(NewMBB
->begin(), MBB
,
6193 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
6194 NewMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
6198 // Split MBB before MI and return the new block (the one that contains MI).
6199 static MachineBasicBlock
*splitBlockBefore(MachineBasicBlock::iterator MI
,
6200 MachineBasicBlock
*MBB
) {
6201 MachineBasicBlock
*NewMBB
= emitBlockAfter(MBB
);
6202 NewMBB
->splice(NewMBB
->begin(), MBB
, MI
, MBB
->end());
6203 NewMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
6207 // Force base value Base into a register before MI. Return the register.
6208 static unsigned forceReg(MachineInstr
&MI
, MachineOperand
&Base
,
6209 const SystemZInstrInfo
*TII
) {
6211 return Base
.getReg();
6213 MachineBasicBlock
*MBB
= MI
.getParent();
6214 MachineFunction
&MF
= *MBB
->getParent();
6215 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6217 unsigned Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
6218 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LA
), Reg
)
6225 // The CC operand of MI might be missing a kill marker because there
6226 // were multiple uses of CC, and ISel didn't know which to mark.
6227 // Figure out whether MI should have had a kill marker.
6228 static bool checkCCKill(MachineInstr
&MI
, MachineBasicBlock
*MBB
) {
6229 // Scan forward through BB for a use/def of CC.
6230 MachineBasicBlock::iterator
miI(std::next(MachineBasicBlock::iterator(MI
)));
6231 for (MachineBasicBlock::iterator miE
= MBB
->end(); miI
!= miE
; ++miI
) {
6232 const MachineInstr
& mi
= *miI
;
6233 if (mi
.readsRegister(SystemZ::CC
))
6235 if (mi
.definesRegister(SystemZ::CC
))
6236 break; // Should have kill-flag - update below.
6239 // If we hit the end of the block, check whether CC is live into a
6241 if (miI
== MBB
->end()) {
6242 for (auto SI
= MBB
->succ_begin(), SE
= MBB
->succ_end(); SI
!= SE
; ++SI
)
6243 if ((*SI
)->isLiveIn(SystemZ::CC
))
6250 // Return true if it is OK for this Select pseudo-opcode to be cascaded
6251 // together with other Select pseudo-opcodes into a single basic-block with
6252 // a conditional jump around it.
6253 static bool isSelectPseudo(MachineInstr
&MI
) {
6254 switch (MI
.getOpcode()) {
6255 case SystemZ::Select32
:
6256 case SystemZ::Select64
:
6257 case SystemZ::SelectF32
:
6258 case SystemZ::SelectF64
:
6259 case SystemZ::SelectF128
:
6260 case SystemZ::SelectVR32
:
6261 case SystemZ::SelectVR64
:
6262 case SystemZ::SelectVR128
:
6270 // Helper function, which inserts PHI functions into SinkMBB:
6271 // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
6272 // where %FalseValue(i) and %TrueValue(i) are taken from the consequent Selects
6273 // in [MIItBegin, MIItEnd) range.
6274 static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin
,
6275 MachineBasicBlock::iterator MIItEnd
,
6276 MachineBasicBlock
*TrueMBB
,
6277 MachineBasicBlock
*FalseMBB
,
6278 MachineBasicBlock
*SinkMBB
) {
6279 MachineFunction
*MF
= TrueMBB
->getParent();
6280 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
6282 unsigned CCValid
= MIItBegin
->getOperand(3).getImm();
6283 unsigned CCMask
= MIItBegin
->getOperand(4).getImm();
6284 DebugLoc DL
= MIItBegin
->getDebugLoc();
6286 MachineBasicBlock::iterator SinkInsertionPoint
= SinkMBB
->begin();
6288 // As we are creating the PHIs, we have to be careful if there is more than
6289 // one. Later Selects may reference the results of earlier Selects, but later
6290 // PHIs have to reference the individual true/false inputs from earlier PHIs.
6291 // That also means that PHI construction must work forward from earlier to
6292 // later, and that the code must maintain a mapping from earlier PHI's
6293 // destination registers, and the registers that went into the PHI.
6294 DenseMap
<unsigned, std::pair
<unsigned, unsigned>> RegRewriteTable
;
6296 for (MachineBasicBlock::iterator MIIt
= MIItBegin
; MIIt
!= MIItEnd
;
6297 MIIt
= skipDebugInstructionsForward(++MIIt
, MIItEnd
)) {
6298 unsigned DestReg
= MIIt
->getOperand(0).getReg();
6299 unsigned TrueReg
= MIIt
->getOperand(1).getReg();
6300 unsigned FalseReg
= MIIt
->getOperand(2).getReg();
6302 // If this Select we are generating is the opposite condition from
6303 // the jump we generated, then we have to swap the operands for the
6304 // PHI that is going to be generated.
6305 if (MIIt
->getOperand(4).getImm() == (CCValid
^ CCMask
))
6306 std::swap(TrueReg
, FalseReg
);
6308 if (RegRewriteTable
.find(TrueReg
) != RegRewriteTable
.end())
6309 TrueReg
= RegRewriteTable
[TrueReg
].first
;
6311 if (RegRewriteTable
.find(FalseReg
) != RegRewriteTable
.end())
6312 FalseReg
= RegRewriteTable
[FalseReg
].second
;
6314 BuildMI(*SinkMBB
, SinkInsertionPoint
, DL
, TII
->get(SystemZ::PHI
), DestReg
)
6315 .addReg(TrueReg
).addMBB(TrueMBB
)
6316 .addReg(FalseReg
).addMBB(FalseMBB
);
6318 // Add this PHI to the rewrite table.
6319 RegRewriteTable
[DestReg
] = std::make_pair(TrueReg
, FalseReg
);
6322 MF
->getProperties().reset(MachineFunctionProperties::Property::NoPHIs
);
6325 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
6327 SystemZTargetLowering::emitSelect(MachineInstr
&MI
,
6328 MachineBasicBlock
*MBB
) const {
6329 const SystemZInstrInfo
*TII
=
6330 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6332 unsigned CCValid
= MI
.getOperand(3).getImm();
6333 unsigned CCMask
= MI
.getOperand(4).getImm();
6334 DebugLoc DL
= MI
.getDebugLoc();
6336 // If we have a sequence of Select* pseudo instructions using the
6337 // same condition code value, we want to expand all of them into
6338 // a single pair of basic blocks using the same condition.
6339 MachineInstr
*LastMI
= &MI
;
6340 MachineBasicBlock::iterator NextMIIt
= skipDebugInstructionsForward(
6341 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
6343 if (isSelectPseudo(MI
))
6344 while (NextMIIt
!= MBB
->end() && isSelectPseudo(*NextMIIt
) &&
6345 NextMIIt
->getOperand(3).getImm() == CCValid
&&
6346 (NextMIIt
->getOperand(4).getImm() == CCMask
||
6347 NextMIIt
->getOperand(4).getImm() == (CCValid
^ CCMask
))) {
6348 LastMI
= &*NextMIIt
;
6349 NextMIIt
= skipDebugInstructionsForward(++NextMIIt
, MBB
->end());
6352 MachineBasicBlock
*StartMBB
= MBB
;
6353 MachineBasicBlock
*JoinMBB
= splitBlockBefore(MI
, MBB
);
6354 MachineBasicBlock
*FalseMBB
= emitBlockAfter(StartMBB
);
6356 // Unless CC was killed in the last Select instruction, mark it as
6357 // live-in to both FalseMBB and JoinMBB.
6358 if (!LastMI
->killsRegister(SystemZ::CC
) && !checkCCKill(*LastMI
, JoinMBB
)) {
6359 FalseMBB
->addLiveIn(SystemZ::CC
);
6360 JoinMBB
->addLiveIn(SystemZ::CC
);
6364 // BRC CCMask, JoinMBB
6365 // # fallthrough to FalseMBB
6367 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6368 .addImm(CCValid
).addImm(CCMask
).addMBB(JoinMBB
);
6369 MBB
->addSuccessor(JoinMBB
);
6370 MBB
->addSuccessor(FalseMBB
);
6373 // # fallthrough to JoinMBB
6375 MBB
->addSuccessor(JoinMBB
);
6378 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
6381 MachineBasicBlock::iterator MIItBegin
= MachineBasicBlock::iterator(MI
);
6382 MachineBasicBlock::iterator MIItEnd
= skipDebugInstructionsForward(
6383 std::next(MachineBasicBlock::iterator(LastMI
)), MBB
->end());
6384 createPHIsForSelects(MIItBegin
, MIItEnd
, StartMBB
, FalseMBB
, MBB
);
6386 StartMBB
->erase(MIItBegin
, MIItEnd
);
6390 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
6391 // StoreOpcode is the store to use and Invert says whether the store should
6392 // happen when the condition is false rather than true. If a STORE ON
6393 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
6394 MachineBasicBlock
*SystemZTargetLowering::emitCondStore(MachineInstr
&MI
,
6395 MachineBasicBlock
*MBB
,
6396 unsigned StoreOpcode
,
6397 unsigned STOCOpcode
,
6398 bool Invert
) const {
6399 const SystemZInstrInfo
*TII
=
6400 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6402 unsigned SrcReg
= MI
.getOperand(0).getReg();
6403 MachineOperand Base
= MI
.getOperand(1);
6404 int64_t Disp
= MI
.getOperand(2).getImm();
6405 unsigned IndexReg
= MI
.getOperand(3).getReg();
6406 unsigned CCValid
= MI
.getOperand(4).getImm();
6407 unsigned CCMask
= MI
.getOperand(5).getImm();
6408 DebugLoc DL
= MI
.getDebugLoc();
6410 StoreOpcode
= TII
->getOpcodeForOffset(StoreOpcode
, Disp
);
6412 // Use STOCOpcode if possible. We could use different store patterns in
6413 // order to avoid matching the index register, but the performance trade-offs
6414 // might be more complicated in that case.
6415 if (STOCOpcode
&& !IndexReg
&& Subtarget
.hasLoadStoreOnCond()) {
6419 // ISel pattern matching also adds a load memory operand of the same
6420 // address, so take special care to find the storing memory operand.
6421 MachineMemOperand
*MMO
= nullptr;
6422 for (auto *I
: MI
.memoperands())
6428 BuildMI(*MBB
, MI
, DL
, TII
->get(STOCOpcode
))
6434 .addMemOperand(MMO
);
6436 MI
.eraseFromParent();
6440 // Get the condition needed to branch around the store.
6444 MachineBasicBlock
*StartMBB
= MBB
;
6445 MachineBasicBlock
*JoinMBB
= splitBlockBefore(MI
, MBB
);
6446 MachineBasicBlock
*FalseMBB
= emitBlockAfter(StartMBB
);
6448 // Unless CC was killed in the CondStore instruction, mark it as
6449 // live-in to both FalseMBB and JoinMBB.
6450 if (!MI
.killsRegister(SystemZ::CC
) && !checkCCKill(MI
, JoinMBB
)) {
6451 FalseMBB
->addLiveIn(SystemZ::CC
);
6452 JoinMBB
->addLiveIn(SystemZ::CC
);
6456 // BRC CCMask, JoinMBB
6457 // # fallthrough to FalseMBB
6459 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6460 .addImm(CCValid
).addImm(CCMask
).addMBB(JoinMBB
);
6461 MBB
->addSuccessor(JoinMBB
);
6462 MBB
->addSuccessor(FalseMBB
);
6465 // store %SrcReg, %Disp(%Index,%Base)
6466 // # fallthrough to JoinMBB
6468 BuildMI(MBB
, DL
, TII
->get(StoreOpcode
))
6473 MBB
->addSuccessor(JoinMBB
);
6475 MI
.eraseFromParent();
6479 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
6480 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
6481 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
6482 // BitSize is the width of the field in bits, or 0 if this is a partword
6483 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
6484 // is one of the operands. Invert says whether the field should be
6485 // inverted after performing BinOpcode (e.g. for NAND).
6486 MachineBasicBlock
*SystemZTargetLowering::emitAtomicLoadBinary(
6487 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned BinOpcode
,
6488 unsigned BitSize
, bool Invert
) const {
6489 MachineFunction
&MF
= *MBB
->getParent();
6490 const SystemZInstrInfo
*TII
=
6491 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6492 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6493 bool IsSubWord
= (BitSize
< 32);
6495 // Extract the operands. Base can be a register or a frame index.
6496 // Src2 can be a register or immediate.
6497 unsigned Dest
= MI
.getOperand(0).getReg();
6498 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
6499 int64_t Disp
= MI
.getOperand(2).getImm();
6500 MachineOperand Src2
= earlyUseOperand(MI
.getOperand(3));
6501 unsigned BitShift
= (IsSubWord
? MI
.getOperand(4).getReg() : 0);
6502 unsigned NegBitShift
= (IsSubWord
? MI
.getOperand(5).getReg() : 0);
6503 DebugLoc DL
= MI
.getDebugLoc();
6505 BitSize
= MI
.getOperand(6).getImm();
6507 // Subword operations use 32-bit registers.
6508 const TargetRegisterClass
*RC
= (BitSize
<= 32 ?
6509 &SystemZ::GR32BitRegClass
:
6510 &SystemZ::GR64BitRegClass
);
6511 unsigned LOpcode
= BitSize
<= 32 ? SystemZ::L
: SystemZ::LG
;
6512 unsigned CSOpcode
= BitSize
<= 32 ? SystemZ::CS
: SystemZ::CSG
;
6514 // Get the right opcodes for the displacement.
6515 LOpcode
= TII
->getOpcodeForOffset(LOpcode
, Disp
);
6516 CSOpcode
= TII
->getOpcodeForOffset(CSOpcode
, Disp
);
6517 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
6519 // Create virtual registers for temporary results.
6520 unsigned OrigVal
= MRI
.createVirtualRegister(RC
);
6521 unsigned OldVal
= MRI
.createVirtualRegister(RC
);
6522 unsigned NewVal
= (BinOpcode
|| IsSubWord
?
6523 MRI
.createVirtualRegister(RC
) : Src2
.getReg());
6524 unsigned RotatedOldVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : OldVal
);
6525 unsigned RotatedNewVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : NewVal
);
6527 // Insert a basic block for the main loop.
6528 MachineBasicBlock
*StartMBB
= MBB
;
6529 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
6530 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
6534 // %OrigVal = L Disp(%Base)
6535 // # fall through to LoopMMB
6537 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigVal
).add(Base
).addImm(Disp
).addReg(0);
6538 MBB
->addSuccessor(LoopMBB
);
6541 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
6542 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
6543 // %RotatedNewVal = OP %RotatedOldVal, %Src2
6544 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
6545 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
6547 // # fall through to DoneMMB
6549 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
6550 .addReg(OrigVal
).addMBB(StartMBB
)
6551 .addReg(Dest
).addMBB(LoopMBB
);
6553 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), RotatedOldVal
)
6554 .addReg(OldVal
).addReg(BitShift
).addImm(0);
6556 // Perform the operation normally and then invert every bit of the field.
6557 unsigned Tmp
= MRI
.createVirtualRegister(RC
);
6558 BuildMI(MBB
, DL
, TII
->get(BinOpcode
), Tmp
).addReg(RotatedOldVal
).add(Src2
);
6560 // XILF with the upper BitSize bits set.
6561 BuildMI(MBB
, DL
, TII
->get(SystemZ::XILF
), RotatedNewVal
)
6562 .addReg(Tmp
).addImm(-1U << (32 - BitSize
));
6564 // Use LCGR and add -1 to the result, which is more compact than
6565 // an XILF, XILH pair.
6566 unsigned Tmp2
= MRI
.createVirtualRegister(RC
);
6567 BuildMI(MBB
, DL
, TII
->get(SystemZ::LCGR
), Tmp2
).addReg(Tmp
);
6568 BuildMI(MBB
, DL
, TII
->get(SystemZ::AGHI
), RotatedNewVal
)
6569 .addReg(Tmp2
).addImm(-1);
6571 } else if (BinOpcode
)
6572 // A simply binary operation.
6573 BuildMI(MBB
, DL
, TII
->get(BinOpcode
), RotatedNewVal
)
6574 .addReg(RotatedOldVal
)
6577 // Use RISBG to rotate Src2 into position and use it to replace the
6578 // field in RotatedOldVal.
6579 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RotatedNewVal
)
6580 .addReg(RotatedOldVal
).addReg(Src2
.getReg())
6581 .addImm(32).addImm(31 + BitSize
).addImm(32 - BitSize
);
6583 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), NewVal
)
6584 .addReg(RotatedNewVal
).addReg(NegBitShift
).addImm(0);
6585 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), Dest
)
6590 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6591 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
6592 MBB
->addSuccessor(LoopMBB
);
6593 MBB
->addSuccessor(DoneMBB
);
6595 MI
.eraseFromParent();
6599 // Implement EmitInstrWithCustomInserter for pseudo
6600 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
6601 // instruction that should be used to compare the current field with the
6602 // minimum or maximum value. KeepOldMask is the BRC condition-code mask
6603 // for when the current field should be kept. BitSize is the width of
6604 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
6605 MachineBasicBlock
*SystemZTargetLowering::emitAtomicLoadMinMax(
6606 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned CompareOpcode
,
6607 unsigned KeepOldMask
, unsigned BitSize
) const {
6608 MachineFunction
&MF
= *MBB
->getParent();
6609 const SystemZInstrInfo
*TII
=
6610 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6611 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6612 bool IsSubWord
= (BitSize
< 32);
6614 // Extract the operands. Base can be a register or a frame index.
6615 unsigned Dest
= MI
.getOperand(0).getReg();
6616 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
6617 int64_t Disp
= MI
.getOperand(2).getImm();
6618 unsigned Src2
= MI
.getOperand(3).getReg();
6619 unsigned BitShift
= (IsSubWord
? MI
.getOperand(4).getReg() : 0);
6620 unsigned NegBitShift
= (IsSubWord
? MI
.getOperand(5).getReg() : 0);
6621 DebugLoc DL
= MI
.getDebugLoc();
6623 BitSize
= MI
.getOperand(6).getImm();
6625 // Subword operations use 32-bit registers.
6626 const TargetRegisterClass
*RC
= (BitSize
<= 32 ?
6627 &SystemZ::GR32BitRegClass
:
6628 &SystemZ::GR64BitRegClass
);
6629 unsigned LOpcode
= BitSize
<= 32 ? SystemZ::L
: SystemZ::LG
;
6630 unsigned CSOpcode
= BitSize
<= 32 ? SystemZ::CS
: SystemZ::CSG
;
6632 // Get the right opcodes for the displacement.
6633 LOpcode
= TII
->getOpcodeForOffset(LOpcode
, Disp
);
6634 CSOpcode
= TII
->getOpcodeForOffset(CSOpcode
, Disp
);
6635 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
6637 // Create virtual registers for temporary results.
6638 unsigned OrigVal
= MRI
.createVirtualRegister(RC
);
6639 unsigned OldVal
= MRI
.createVirtualRegister(RC
);
6640 unsigned NewVal
= MRI
.createVirtualRegister(RC
);
6641 unsigned RotatedOldVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : OldVal
);
6642 unsigned RotatedAltVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : Src2
);
6643 unsigned RotatedNewVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : NewVal
);
6645 // Insert 3 basic blocks for the loop.
6646 MachineBasicBlock
*StartMBB
= MBB
;
6647 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
6648 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
6649 MachineBasicBlock
*UseAltMBB
= emitBlockAfter(LoopMBB
);
6650 MachineBasicBlock
*UpdateMBB
= emitBlockAfter(UseAltMBB
);
6654 // %OrigVal = L Disp(%Base)
6655 // # fall through to LoopMMB
6657 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigVal
).add(Base
).addImm(Disp
).addReg(0);
6658 MBB
->addSuccessor(LoopMBB
);
6661 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
6662 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
6663 // CompareOpcode %RotatedOldVal, %Src2
6664 // BRC KeepOldMask, UpdateMBB
6666 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
6667 .addReg(OrigVal
).addMBB(StartMBB
)
6668 .addReg(Dest
).addMBB(UpdateMBB
);
6670 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), RotatedOldVal
)
6671 .addReg(OldVal
).addReg(BitShift
).addImm(0);
6672 BuildMI(MBB
, DL
, TII
->get(CompareOpcode
))
6673 .addReg(RotatedOldVal
).addReg(Src2
);
6674 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6675 .addImm(SystemZ::CCMASK_ICMP
).addImm(KeepOldMask
).addMBB(UpdateMBB
);
6676 MBB
->addSuccessor(UpdateMBB
);
6677 MBB
->addSuccessor(UseAltMBB
);
6680 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
6681 // # fall through to UpdateMMB
6684 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RotatedAltVal
)
6685 .addReg(RotatedOldVal
).addReg(Src2
)
6686 .addImm(32).addImm(31 + BitSize
).addImm(0);
6687 MBB
->addSuccessor(UpdateMBB
);
6690 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
6691 // [ %RotatedAltVal, UseAltMBB ]
6692 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
6693 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
6695 // # fall through to DoneMMB
6697 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), RotatedNewVal
)
6698 .addReg(RotatedOldVal
).addMBB(LoopMBB
)
6699 .addReg(RotatedAltVal
).addMBB(UseAltMBB
);
6701 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), NewVal
)
6702 .addReg(RotatedNewVal
).addReg(NegBitShift
).addImm(0);
6703 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), Dest
)
6708 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6709 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
6710 MBB
->addSuccessor(LoopMBB
);
6711 MBB
->addSuccessor(DoneMBB
);
6713 MI
.eraseFromParent();
6717 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
6720 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr
&MI
,
6721 MachineBasicBlock
*MBB
) const {
6723 MachineFunction
&MF
= *MBB
->getParent();
6724 const SystemZInstrInfo
*TII
=
6725 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6726 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6728 // Extract the operands. Base can be a register or a frame index.
6729 unsigned Dest
= MI
.getOperand(0).getReg();
6730 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
6731 int64_t Disp
= MI
.getOperand(2).getImm();
6732 unsigned OrigCmpVal
= MI
.getOperand(3).getReg();
6733 unsigned OrigSwapVal
= MI
.getOperand(4).getReg();
6734 unsigned BitShift
= MI
.getOperand(5).getReg();
6735 unsigned NegBitShift
= MI
.getOperand(6).getReg();
6736 int64_t BitSize
= MI
.getOperand(7).getImm();
6737 DebugLoc DL
= MI
.getDebugLoc();
6739 const TargetRegisterClass
*RC
= &SystemZ::GR32BitRegClass
;
6741 // Get the right opcodes for the displacement.
6742 unsigned LOpcode
= TII
->getOpcodeForOffset(SystemZ::L
, Disp
);
6743 unsigned CSOpcode
= TII
->getOpcodeForOffset(SystemZ::CS
, Disp
);
6744 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
6746 // Create virtual registers for temporary results.
6747 unsigned OrigOldVal
= MRI
.createVirtualRegister(RC
);
6748 unsigned OldVal
= MRI
.createVirtualRegister(RC
);
6749 unsigned CmpVal
= MRI
.createVirtualRegister(RC
);
6750 unsigned SwapVal
= MRI
.createVirtualRegister(RC
);
6751 unsigned StoreVal
= MRI
.createVirtualRegister(RC
);
6752 unsigned RetryOldVal
= MRI
.createVirtualRegister(RC
);
6753 unsigned RetryCmpVal
= MRI
.createVirtualRegister(RC
);
6754 unsigned RetrySwapVal
= MRI
.createVirtualRegister(RC
);
6756 // Insert 2 basic blocks for the loop.
6757 MachineBasicBlock
*StartMBB
= MBB
;
6758 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
6759 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
6760 MachineBasicBlock
*SetMBB
= emitBlockAfter(LoopMBB
);
6764 // %OrigOldVal = L Disp(%Base)
6765 // # fall through to LoopMMB
6767 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigOldVal
)
6771 MBB
->addSuccessor(LoopMBB
);
6774 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
6775 // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
6776 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
6777 // %Dest = RLL %OldVal, BitSize(%BitShift)
6778 // ^^ The low BitSize bits contain the field
6780 // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
6781 // ^^ Replace the upper 32-BitSize bits of the
6782 // comparison value with those that we loaded,
6783 // so that we can use a full word comparison.
6784 // CR %Dest, %RetryCmpVal
6786 // # Fall through to SetMBB
6788 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
6789 .addReg(OrigOldVal
).addMBB(StartMBB
)
6790 .addReg(RetryOldVal
).addMBB(SetMBB
);
6791 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), CmpVal
)
6792 .addReg(OrigCmpVal
).addMBB(StartMBB
)
6793 .addReg(RetryCmpVal
).addMBB(SetMBB
);
6794 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), SwapVal
)
6795 .addReg(OrigSwapVal
).addMBB(StartMBB
)
6796 .addReg(RetrySwapVal
).addMBB(SetMBB
);
6797 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), Dest
)
6798 .addReg(OldVal
).addReg(BitShift
).addImm(BitSize
);
6799 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RetryCmpVal
)
6800 .addReg(CmpVal
).addReg(Dest
).addImm(32).addImm(63 - BitSize
).addImm(0);
6801 BuildMI(MBB
, DL
, TII
->get(SystemZ::CR
))
6802 .addReg(Dest
).addReg(RetryCmpVal
);
6803 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6804 .addImm(SystemZ::CCMASK_ICMP
)
6805 .addImm(SystemZ::CCMASK_CMP_NE
).addMBB(DoneMBB
);
6806 MBB
->addSuccessor(DoneMBB
);
6807 MBB
->addSuccessor(SetMBB
);
6810 // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
6811 // ^^ Replace the upper 32-BitSize bits of the new
6812 // value with those that we loaded.
6813 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
6814 // ^^ Rotate the new field to its proper position.
6815 // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
6817 // # fall through to ExitMMB
6819 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RetrySwapVal
)
6820 .addReg(SwapVal
).addReg(Dest
).addImm(32).addImm(63 - BitSize
).addImm(0);
6821 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), StoreVal
)
6822 .addReg(RetrySwapVal
).addReg(NegBitShift
).addImm(-BitSize
);
6823 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), RetryOldVal
)
6828 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6829 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
6830 MBB
->addSuccessor(LoopMBB
);
6831 MBB
->addSuccessor(DoneMBB
);
6833 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
6834 // to the block after the loop. At this point, CC may have been defined
6835 // either by the CR in LoopMBB or by the CS in SetMBB.
6836 if (!MI
.registerDefIsDead(SystemZ::CC
))
6837 DoneMBB
->addLiveIn(SystemZ::CC
);
6839 MI
.eraseFromParent();
6843 // Emit a move from two GR64s to a GR128.
6845 SystemZTargetLowering::emitPair128(MachineInstr
&MI
,
6846 MachineBasicBlock
*MBB
) const {
6847 MachineFunction
&MF
= *MBB
->getParent();
6848 const SystemZInstrInfo
*TII
=
6849 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6850 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6851 DebugLoc DL
= MI
.getDebugLoc();
6853 unsigned Dest
= MI
.getOperand(0).getReg();
6854 unsigned Hi
= MI
.getOperand(1).getReg();
6855 unsigned Lo
= MI
.getOperand(2).getReg();
6856 unsigned Tmp1
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
6857 unsigned Tmp2
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
6859 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::IMPLICIT_DEF
), Tmp1
);
6860 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Tmp2
)
6861 .addReg(Tmp1
).addReg(Hi
).addImm(SystemZ::subreg_h64
);
6862 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Dest
)
6863 .addReg(Tmp2
).addReg(Lo
).addImm(SystemZ::subreg_l64
);
6865 MI
.eraseFromParent();
6869 // Emit an extension from a GR64 to a GR128. ClearEven is true
6870 // if the high register of the GR128 value must be cleared or false if
6871 // it's "don't care".
6872 MachineBasicBlock
*SystemZTargetLowering::emitExt128(MachineInstr
&MI
,
6873 MachineBasicBlock
*MBB
,
6874 bool ClearEven
) const {
6875 MachineFunction
&MF
= *MBB
->getParent();
6876 const SystemZInstrInfo
*TII
=
6877 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6878 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6879 DebugLoc DL
= MI
.getDebugLoc();
6881 unsigned Dest
= MI
.getOperand(0).getReg();
6882 unsigned Src
= MI
.getOperand(1).getReg();
6883 unsigned In128
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
6885 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::IMPLICIT_DEF
), In128
);
6887 unsigned NewIn128
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
6888 unsigned Zero64
= MRI
.createVirtualRegister(&SystemZ::GR64BitRegClass
);
6890 BuildMI(*MBB
, MI
, DL
, TII
->get(SystemZ::LLILL
), Zero64
)
6892 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), NewIn128
)
6893 .addReg(In128
).addReg(Zero64
).addImm(SystemZ::subreg_h64
);
6896 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Dest
)
6897 .addReg(In128
).addReg(Src
).addImm(SystemZ::subreg_l64
);
6899 MI
.eraseFromParent();
6903 MachineBasicBlock
*SystemZTargetLowering::emitMemMemWrapper(
6904 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
6905 MachineFunction
&MF
= *MBB
->getParent();
6906 const SystemZInstrInfo
*TII
=
6907 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6908 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6909 DebugLoc DL
= MI
.getDebugLoc();
6911 MachineOperand DestBase
= earlyUseOperand(MI
.getOperand(0));
6912 uint64_t DestDisp
= MI
.getOperand(1).getImm();
6913 MachineOperand SrcBase
= earlyUseOperand(MI
.getOperand(2));
6914 uint64_t SrcDisp
= MI
.getOperand(3).getImm();
6915 uint64_t Length
= MI
.getOperand(4).getImm();
6917 // When generating more than one CLC, all but the last will need to
6918 // branch to the end when a difference is found.
6919 MachineBasicBlock
*EndMBB
= (Length
> 256 && Opcode
== SystemZ::CLC
?
6920 splitBlockAfter(MI
, MBB
) : nullptr);
6922 // Check for the loop form, in which operand 5 is the trip count.
6923 if (MI
.getNumExplicitOperands() > 5) {
6924 bool HaveSingleBase
= DestBase
.isIdenticalTo(SrcBase
);
6926 uint64_t StartCountReg
= MI
.getOperand(5).getReg();
6927 uint64_t StartSrcReg
= forceReg(MI
, SrcBase
, TII
);
6928 uint64_t StartDestReg
= (HaveSingleBase
? StartSrcReg
:
6929 forceReg(MI
, DestBase
, TII
));
6931 const TargetRegisterClass
*RC
= &SystemZ::ADDR64BitRegClass
;
6932 uint64_t ThisSrcReg
= MRI
.createVirtualRegister(RC
);
6933 uint64_t ThisDestReg
= (HaveSingleBase
? ThisSrcReg
:
6934 MRI
.createVirtualRegister(RC
));
6935 uint64_t NextSrcReg
= MRI
.createVirtualRegister(RC
);
6936 uint64_t NextDestReg
= (HaveSingleBase
? NextSrcReg
:
6937 MRI
.createVirtualRegister(RC
));
6939 RC
= &SystemZ::GR64BitRegClass
;
6940 uint64_t ThisCountReg
= MRI
.createVirtualRegister(RC
);
6941 uint64_t NextCountReg
= MRI
.createVirtualRegister(RC
);
6943 MachineBasicBlock
*StartMBB
= MBB
;
6944 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
6945 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
6946 MachineBasicBlock
*NextMBB
= (EndMBB
? emitBlockAfter(LoopMBB
) : LoopMBB
);
6949 // # fall through to LoopMMB
6950 MBB
->addSuccessor(LoopMBB
);
6953 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
6954 // [ %NextDestReg, NextMBB ]
6955 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
6956 // [ %NextSrcReg, NextMBB ]
6957 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
6958 // [ %NextCountReg, NextMBB ]
6959 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
6960 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
6963 // The prefetch is used only for MVC. The JLH is used only for CLC.
6966 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisDestReg
)
6967 .addReg(StartDestReg
).addMBB(StartMBB
)
6968 .addReg(NextDestReg
).addMBB(NextMBB
);
6969 if (!HaveSingleBase
)
6970 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisSrcReg
)
6971 .addReg(StartSrcReg
).addMBB(StartMBB
)
6972 .addReg(NextSrcReg
).addMBB(NextMBB
);
6973 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisCountReg
)
6974 .addReg(StartCountReg
).addMBB(StartMBB
)
6975 .addReg(NextCountReg
).addMBB(NextMBB
);
6976 if (Opcode
== SystemZ::MVC
)
6977 BuildMI(MBB
, DL
, TII
->get(SystemZ::PFD
))
6978 .addImm(SystemZ::PFD_WRITE
)
6979 .addReg(ThisDestReg
).addImm(DestDisp
+ 768).addReg(0);
6980 BuildMI(MBB
, DL
, TII
->get(Opcode
))
6981 .addReg(ThisDestReg
).addImm(DestDisp
).addImm(256)
6982 .addReg(ThisSrcReg
).addImm(SrcDisp
);
6984 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6985 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
6987 MBB
->addSuccessor(EndMBB
);
6988 MBB
->addSuccessor(NextMBB
);
6992 // %NextDestReg = LA 256(%ThisDestReg)
6993 // %NextSrcReg = LA 256(%ThisSrcReg)
6994 // %NextCountReg = AGHI %ThisCountReg, -1
6995 // CGHI %NextCountReg, 0
6997 // # fall through to DoneMMB
6999 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
7002 BuildMI(MBB
, DL
, TII
->get(SystemZ::LA
), NextDestReg
)
7003 .addReg(ThisDestReg
).addImm(256).addReg(0);
7004 if (!HaveSingleBase
)
7005 BuildMI(MBB
, DL
, TII
->get(SystemZ::LA
), NextSrcReg
)
7006 .addReg(ThisSrcReg
).addImm(256).addReg(0);
7007 BuildMI(MBB
, DL
, TII
->get(SystemZ::AGHI
), NextCountReg
)
7008 .addReg(ThisCountReg
).addImm(-1);
7009 BuildMI(MBB
, DL
, TII
->get(SystemZ::CGHI
))
7010 .addReg(NextCountReg
).addImm(0);
7011 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7012 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
7014 MBB
->addSuccessor(LoopMBB
);
7015 MBB
->addSuccessor(DoneMBB
);
7017 DestBase
= MachineOperand::CreateReg(NextDestReg
, false);
7018 SrcBase
= MachineOperand::CreateReg(NextSrcReg
, false);
7020 if (EndMBB
&& !Length
)
7021 // If the loop handled the whole CLC range, DoneMBB will be empty with
7022 // CC live-through into EndMBB, so add it as live-in.
7023 DoneMBB
->addLiveIn(SystemZ::CC
);
7026 // Handle any remaining bytes with straight-line code.
7027 while (Length
> 0) {
7028 uint64_t ThisLength
= std::min(Length
, uint64_t(256));
7029 // The previous iteration might have created out-of-range displacements.
7030 // Apply them using LAY if so.
7031 if (!isUInt
<12>(DestDisp
)) {
7032 unsigned Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
7033 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LAY
), Reg
)
7037 DestBase
= MachineOperand::CreateReg(Reg
, false);
7040 if (!isUInt
<12>(SrcDisp
)) {
7041 unsigned Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
7042 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LAY
), Reg
)
7046 SrcBase
= MachineOperand::CreateReg(Reg
, false);
7049 BuildMI(*MBB
, MI
, DL
, TII
->get(Opcode
))
7055 .setMemRefs(MI
.memoperands());
7056 DestDisp
+= ThisLength
;
7057 SrcDisp
+= ThisLength
;
7058 Length
-= ThisLength
;
7059 // If there's another CLC to go, branch to the end if a difference
7061 if (EndMBB
&& Length
> 0) {
7062 MachineBasicBlock
*NextMBB
= splitBlockBefore(MI
, MBB
);
7063 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7064 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
7066 MBB
->addSuccessor(EndMBB
);
7067 MBB
->addSuccessor(NextMBB
);
7072 MBB
->addSuccessor(EndMBB
);
7074 MBB
->addLiveIn(SystemZ::CC
);
7077 MI
.eraseFromParent();
7081 // Decompose string pseudo-instruction MI into a loop that continually performs
7082 // Opcode until CC != 3.
7083 MachineBasicBlock
*SystemZTargetLowering::emitStringWrapper(
7084 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
7085 MachineFunction
&MF
= *MBB
->getParent();
7086 const SystemZInstrInfo
*TII
=
7087 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7088 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7089 DebugLoc DL
= MI
.getDebugLoc();
7091 uint64_t End1Reg
= MI
.getOperand(0).getReg();
7092 uint64_t Start1Reg
= MI
.getOperand(1).getReg();
7093 uint64_t Start2Reg
= MI
.getOperand(2).getReg();
7094 uint64_t CharReg
= MI
.getOperand(3).getReg();
7096 const TargetRegisterClass
*RC
= &SystemZ::GR64BitRegClass
;
7097 uint64_t This1Reg
= MRI
.createVirtualRegister(RC
);
7098 uint64_t This2Reg
= MRI
.createVirtualRegister(RC
);
7099 uint64_t End2Reg
= MRI
.createVirtualRegister(RC
);
7101 MachineBasicBlock
*StartMBB
= MBB
;
7102 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7103 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7106 // # fall through to LoopMMB
7107 MBB
->addSuccessor(LoopMBB
);
7110 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
7111 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
7113 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
7115 // # fall through to DoneMMB
7117 // The load of R0L can be hoisted by post-RA LICM.
7120 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), This1Reg
)
7121 .addReg(Start1Reg
).addMBB(StartMBB
)
7122 .addReg(End1Reg
).addMBB(LoopMBB
);
7123 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), This2Reg
)
7124 .addReg(Start2Reg
).addMBB(StartMBB
)
7125 .addReg(End2Reg
).addMBB(LoopMBB
);
7126 BuildMI(MBB
, DL
, TII
->get(TargetOpcode::COPY
), SystemZ::R0L
).addReg(CharReg
);
7127 BuildMI(MBB
, DL
, TII
->get(Opcode
))
7128 .addReg(End1Reg
, RegState::Define
).addReg(End2Reg
, RegState::Define
)
7129 .addReg(This1Reg
).addReg(This2Reg
);
7130 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7131 .addImm(SystemZ::CCMASK_ANY
).addImm(SystemZ::CCMASK_3
).addMBB(LoopMBB
);
7132 MBB
->addSuccessor(LoopMBB
);
7133 MBB
->addSuccessor(DoneMBB
);
7135 DoneMBB
->addLiveIn(SystemZ::CC
);
7137 MI
.eraseFromParent();
7141 // Update TBEGIN instruction with final opcode and register clobbers.
7142 MachineBasicBlock
*SystemZTargetLowering::emitTransactionBegin(
7143 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
,
7144 bool NoFloat
) const {
7145 MachineFunction
&MF
= *MBB
->getParent();
7146 const TargetFrameLowering
*TFI
= Subtarget
.getFrameLowering();
7147 const SystemZInstrInfo
*TII
= Subtarget
.getInstrInfo();
7150 MI
.setDesc(TII
->get(Opcode
));
7152 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
7153 // Make sure to add the corresponding GRSM bits if they are missing.
7154 uint64_t Control
= MI
.getOperand(2).getImm();
7155 static const unsigned GPRControlBit
[16] = {
7156 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
7157 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
7159 Control
|= GPRControlBit
[15];
7161 Control
|= GPRControlBit
[11];
7162 MI
.getOperand(2).setImm(Control
);
7164 // Add GPR clobbers.
7165 for (int I
= 0; I
< 16; I
++) {
7166 if ((Control
& GPRControlBit
[I
]) == 0) {
7167 unsigned Reg
= SystemZMC::GR64Regs
[I
];
7168 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7172 // Add FPR/VR clobbers.
7173 if (!NoFloat
&& (Control
& 4) != 0) {
7174 if (Subtarget
.hasVector()) {
7175 for (int I
= 0; I
< 32; I
++) {
7176 unsigned Reg
= SystemZMC::VR128Regs
[I
];
7177 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7180 for (int I
= 0; I
< 16; I
++) {
7181 unsigned Reg
= SystemZMC::FP64Regs
[I
];
7182 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7190 MachineBasicBlock
*SystemZTargetLowering::emitLoadAndTestCmp0(
7191 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
7192 MachineFunction
&MF
= *MBB
->getParent();
7193 MachineRegisterInfo
*MRI
= &MF
.getRegInfo();
7194 const SystemZInstrInfo
*TII
=
7195 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7196 DebugLoc DL
= MI
.getDebugLoc();
7198 unsigned SrcReg
= MI
.getOperand(0).getReg();
7200 // Create new virtual register of the same class as source.
7201 const TargetRegisterClass
*RC
= MRI
->getRegClass(SrcReg
);
7202 unsigned DstReg
= MRI
->createVirtualRegister(RC
);
7204 // Replace pseudo with a normal load-and-test that models the def as
7206 BuildMI(*MBB
, MI
, DL
, TII
->get(Opcode
), DstReg
)
7208 MI
.eraseFromParent();
7213 MachineBasicBlock
*SystemZTargetLowering::EmitInstrWithCustomInserter(
7214 MachineInstr
&MI
, MachineBasicBlock
*MBB
) const {
7215 switch (MI
.getOpcode()) {
7216 case SystemZ::Select32
:
7217 case SystemZ::Select64
:
7218 case SystemZ::SelectF32
:
7219 case SystemZ::SelectF64
:
7220 case SystemZ::SelectF128
:
7221 case SystemZ::SelectVR32
:
7222 case SystemZ::SelectVR64
:
7223 case SystemZ::SelectVR128
:
7224 return emitSelect(MI
, MBB
);
7226 case SystemZ::CondStore8Mux
:
7227 return emitCondStore(MI
, MBB
, SystemZ::STCMux
, 0, false);
7228 case SystemZ::CondStore8MuxInv
:
7229 return emitCondStore(MI
, MBB
, SystemZ::STCMux
, 0, true);
7230 case SystemZ::CondStore16Mux
:
7231 return emitCondStore(MI
, MBB
, SystemZ::STHMux
, 0, false);
7232 case SystemZ::CondStore16MuxInv
:
7233 return emitCondStore(MI
, MBB
, SystemZ::STHMux
, 0, true);
7234 case SystemZ::CondStore32Mux
:
7235 return emitCondStore(MI
, MBB
, SystemZ::STMux
, SystemZ::STOCMux
, false);
7236 case SystemZ::CondStore32MuxInv
:
7237 return emitCondStore(MI
, MBB
, SystemZ::STMux
, SystemZ::STOCMux
, true);
7238 case SystemZ::CondStore8
:
7239 return emitCondStore(MI
, MBB
, SystemZ::STC
, 0, false);
7240 case SystemZ::CondStore8Inv
:
7241 return emitCondStore(MI
, MBB
, SystemZ::STC
, 0, true);
7242 case SystemZ::CondStore16
:
7243 return emitCondStore(MI
, MBB
, SystemZ::STH
, 0, false);
7244 case SystemZ::CondStore16Inv
:
7245 return emitCondStore(MI
, MBB
, SystemZ::STH
, 0, true);
7246 case SystemZ::CondStore32
:
7247 return emitCondStore(MI
, MBB
, SystemZ::ST
, SystemZ::STOC
, false);
7248 case SystemZ::CondStore32Inv
:
7249 return emitCondStore(MI
, MBB
, SystemZ::ST
, SystemZ::STOC
, true);
7250 case SystemZ::CondStore64
:
7251 return emitCondStore(MI
, MBB
, SystemZ::STG
, SystemZ::STOCG
, false);
7252 case SystemZ::CondStore64Inv
:
7253 return emitCondStore(MI
, MBB
, SystemZ::STG
, SystemZ::STOCG
, true);
7254 case SystemZ::CondStoreF32
:
7255 return emitCondStore(MI
, MBB
, SystemZ::STE
, 0, false);
7256 case SystemZ::CondStoreF32Inv
:
7257 return emitCondStore(MI
, MBB
, SystemZ::STE
, 0, true);
7258 case SystemZ::CondStoreF64
:
7259 return emitCondStore(MI
, MBB
, SystemZ::STD
, 0, false);
7260 case SystemZ::CondStoreF64Inv
:
7261 return emitCondStore(MI
, MBB
, SystemZ::STD
, 0, true);
7263 case SystemZ::PAIR128
:
7264 return emitPair128(MI
, MBB
);
7265 case SystemZ::AEXT128
:
7266 return emitExt128(MI
, MBB
, false);
7267 case SystemZ::ZEXT128
:
7268 return emitExt128(MI
, MBB
, true);
7270 case SystemZ::ATOMIC_SWAPW
:
7271 return emitAtomicLoadBinary(MI
, MBB
, 0, 0);
7272 case SystemZ::ATOMIC_SWAP_32
:
7273 return emitAtomicLoadBinary(MI
, MBB
, 0, 32);
7274 case SystemZ::ATOMIC_SWAP_64
:
7275 return emitAtomicLoadBinary(MI
, MBB
, 0, 64);
7277 case SystemZ::ATOMIC_LOADW_AR
:
7278 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AR
, 0);
7279 case SystemZ::ATOMIC_LOADW_AFI
:
7280 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AFI
, 0);
7281 case SystemZ::ATOMIC_LOAD_AR
:
7282 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AR
, 32);
7283 case SystemZ::ATOMIC_LOAD_AHI
:
7284 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AHI
, 32);
7285 case SystemZ::ATOMIC_LOAD_AFI
:
7286 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AFI
, 32);
7287 case SystemZ::ATOMIC_LOAD_AGR
:
7288 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGR
, 64);
7289 case SystemZ::ATOMIC_LOAD_AGHI
:
7290 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGHI
, 64);
7291 case SystemZ::ATOMIC_LOAD_AGFI
:
7292 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGFI
, 64);
7294 case SystemZ::ATOMIC_LOADW_SR
:
7295 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SR
, 0);
7296 case SystemZ::ATOMIC_LOAD_SR
:
7297 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SR
, 32);
7298 case SystemZ::ATOMIC_LOAD_SGR
:
7299 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SGR
, 64);
7301 case SystemZ::ATOMIC_LOADW_NR
:
7302 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 0);
7303 case SystemZ::ATOMIC_LOADW_NILH
:
7304 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 0);
7305 case SystemZ::ATOMIC_LOAD_NR
:
7306 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 32);
7307 case SystemZ::ATOMIC_LOAD_NILL
:
7308 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL
, 32);
7309 case SystemZ::ATOMIC_LOAD_NILH
:
7310 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 32);
7311 case SystemZ::ATOMIC_LOAD_NILF
:
7312 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF
, 32);
7313 case SystemZ::ATOMIC_LOAD_NGR
:
7314 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NGR
, 64);
7315 case SystemZ::ATOMIC_LOAD_NILL64
:
7316 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL64
, 64);
7317 case SystemZ::ATOMIC_LOAD_NILH64
:
7318 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH64
, 64);
7319 case SystemZ::ATOMIC_LOAD_NIHL64
:
7320 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHL64
, 64);
7321 case SystemZ::ATOMIC_LOAD_NIHH64
:
7322 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHH64
, 64);
7323 case SystemZ::ATOMIC_LOAD_NILF64
:
7324 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF64
, 64);
7325 case SystemZ::ATOMIC_LOAD_NIHF64
:
7326 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHF64
, 64);
7328 case SystemZ::ATOMIC_LOADW_OR
:
7329 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OR
, 0);
7330 case SystemZ::ATOMIC_LOADW_OILH
:
7331 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH
, 0);
7332 case SystemZ::ATOMIC_LOAD_OR
:
7333 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OR
, 32);
7334 case SystemZ::ATOMIC_LOAD_OILL
:
7335 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILL
, 32);
7336 case SystemZ::ATOMIC_LOAD_OILH
:
7337 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH
, 32);
7338 case SystemZ::ATOMIC_LOAD_OILF
:
7339 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILF
, 32);
7340 case SystemZ::ATOMIC_LOAD_OGR
:
7341 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OGR
, 64);
7342 case SystemZ::ATOMIC_LOAD_OILL64
:
7343 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILL64
, 64);
7344 case SystemZ::ATOMIC_LOAD_OILH64
:
7345 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH64
, 64);
7346 case SystemZ::ATOMIC_LOAD_OIHL64
:
7347 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHL64
, 64);
7348 case SystemZ::ATOMIC_LOAD_OIHH64
:
7349 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHH64
, 64);
7350 case SystemZ::ATOMIC_LOAD_OILF64
:
7351 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILF64
, 64);
7352 case SystemZ::ATOMIC_LOAD_OIHF64
:
7353 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHF64
, 64);
7355 case SystemZ::ATOMIC_LOADW_XR
:
7356 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XR
, 0);
7357 case SystemZ::ATOMIC_LOADW_XILF
:
7358 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF
, 0);
7359 case SystemZ::ATOMIC_LOAD_XR
:
7360 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XR
, 32);
7361 case SystemZ::ATOMIC_LOAD_XILF
:
7362 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF
, 32);
7363 case SystemZ::ATOMIC_LOAD_XGR
:
7364 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XGR
, 64);
7365 case SystemZ::ATOMIC_LOAD_XILF64
:
7366 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF64
, 64);
7367 case SystemZ::ATOMIC_LOAD_XIHF64
:
7368 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XIHF64
, 64);
7370 case SystemZ::ATOMIC_LOADW_NRi
:
7371 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 0, true);
7372 case SystemZ::ATOMIC_LOADW_NILHi
:
7373 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 0, true);
7374 case SystemZ::ATOMIC_LOAD_NRi
:
7375 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 32, true);
7376 case SystemZ::ATOMIC_LOAD_NILLi
:
7377 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL
, 32, true);
7378 case SystemZ::ATOMIC_LOAD_NILHi
:
7379 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 32, true);
7380 case SystemZ::ATOMIC_LOAD_NILFi
:
7381 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF
, 32, true);
7382 case SystemZ::ATOMIC_LOAD_NGRi
:
7383 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NGR
, 64, true);
7384 case SystemZ::ATOMIC_LOAD_NILL64i
:
7385 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL64
, 64, true);
7386 case SystemZ::ATOMIC_LOAD_NILH64i
:
7387 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH64
, 64, true);
7388 case SystemZ::ATOMIC_LOAD_NIHL64i
:
7389 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHL64
, 64, true);
7390 case SystemZ::ATOMIC_LOAD_NIHH64i
:
7391 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHH64
, 64, true);
7392 case SystemZ::ATOMIC_LOAD_NILF64i
:
7393 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF64
, 64, true);
7394 case SystemZ::ATOMIC_LOAD_NIHF64i
:
7395 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHF64
, 64, true);
7397 case SystemZ::ATOMIC_LOADW_MIN
:
7398 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7399 SystemZ::CCMASK_CMP_LE
, 0);
7400 case SystemZ::ATOMIC_LOAD_MIN_32
:
7401 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7402 SystemZ::CCMASK_CMP_LE
, 32);
7403 case SystemZ::ATOMIC_LOAD_MIN_64
:
7404 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CGR
,
7405 SystemZ::CCMASK_CMP_LE
, 64);
7407 case SystemZ::ATOMIC_LOADW_MAX
:
7408 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7409 SystemZ::CCMASK_CMP_GE
, 0);
7410 case SystemZ::ATOMIC_LOAD_MAX_32
:
7411 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7412 SystemZ::CCMASK_CMP_GE
, 32);
7413 case SystemZ::ATOMIC_LOAD_MAX_64
:
7414 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CGR
,
7415 SystemZ::CCMASK_CMP_GE
, 64);
7417 case SystemZ::ATOMIC_LOADW_UMIN
:
7418 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7419 SystemZ::CCMASK_CMP_LE
, 0);
7420 case SystemZ::ATOMIC_LOAD_UMIN_32
:
7421 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7422 SystemZ::CCMASK_CMP_LE
, 32);
7423 case SystemZ::ATOMIC_LOAD_UMIN_64
:
7424 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLGR
,
7425 SystemZ::CCMASK_CMP_LE
, 64);
7427 case SystemZ::ATOMIC_LOADW_UMAX
:
7428 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7429 SystemZ::CCMASK_CMP_GE
, 0);
7430 case SystemZ::ATOMIC_LOAD_UMAX_32
:
7431 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7432 SystemZ::CCMASK_CMP_GE
, 32);
7433 case SystemZ::ATOMIC_LOAD_UMAX_64
:
7434 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLGR
,
7435 SystemZ::CCMASK_CMP_GE
, 64);
7437 case SystemZ::ATOMIC_CMP_SWAPW
:
7438 return emitAtomicCmpSwapW(MI
, MBB
);
7439 case SystemZ::MVCSequence
:
7440 case SystemZ::MVCLoop
:
7441 return emitMemMemWrapper(MI
, MBB
, SystemZ::MVC
);
7442 case SystemZ::NCSequence
:
7443 case SystemZ::NCLoop
:
7444 return emitMemMemWrapper(MI
, MBB
, SystemZ::NC
);
7445 case SystemZ::OCSequence
:
7446 case SystemZ::OCLoop
:
7447 return emitMemMemWrapper(MI
, MBB
, SystemZ::OC
);
7448 case SystemZ::XCSequence
:
7449 case SystemZ::XCLoop
:
7450 return emitMemMemWrapper(MI
, MBB
, SystemZ::XC
);
7451 case SystemZ::CLCSequence
:
7452 case SystemZ::CLCLoop
:
7453 return emitMemMemWrapper(MI
, MBB
, SystemZ::CLC
);
7454 case SystemZ::CLSTLoop
:
7455 return emitStringWrapper(MI
, MBB
, SystemZ::CLST
);
7456 case SystemZ::MVSTLoop
:
7457 return emitStringWrapper(MI
, MBB
, SystemZ::MVST
);
7458 case SystemZ::SRSTLoop
:
7459 return emitStringWrapper(MI
, MBB
, SystemZ::SRST
);
7460 case SystemZ::TBEGIN
:
7461 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGIN
, false);
7462 case SystemZ::TBEGIN_nofloat
:
7463 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGIN
, true);
7464 case SystemZ::TBEGINC
:
7465 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGINC
, true);
7466 case SystemZ::LTEBRCompare_VecPseudo
:
7467 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTEBR
);
7468 case SystemZ::LTDBRCompare_VecPseudo
:
7469 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTDBR
);
7470 case SystemZ::LTXBRCompare_VecPseudo
:
7471 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTXBR
);
7473 case TargetOpcode::STACKMAP
:
7474 case TargetOpcode::PATCHPOINT
:
7475 return emitPatchPoint(MI
, MBB
);
7478 llvm_unreachable("Unexpected instr type to insert");
7482 // This is only used by the isel schedulers, and is needed only to prevent
7483 // compiler from crashing when list-ilp is used.
7484 const TargetRegisterClass
*
7485 SystemZTargetLowering::getRepRegClassFor(MVT VT
) const {
7486 if (VT
== MVT::Untyped
)
7487 return &SystemZ::ADDR128BitRegClass
;
7488 return TargetLowering::getRepRegClassFor(VT
);