1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SystemZTargetLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
15 #include "SystemZConstantPoolValue.h"
16 #include "SystemZMachineFunctionInfo.h"
17 #include "SystemZTargetMachine.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/IntrinsicsS390.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/KnownBits.h"
31 #define DEBUG_TYPE "systemz-lower"
34 // Represents information about a comparison.
36 Comparison(SDValue Op0In
, SDValue Op1In
, SDValue ChainIn
)
37 : Op0(Op0In
), Op1(Op1In
), Chain(ChainIn
),
38 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
40 // The operands to the comparison.
43 // Chain if this is a strict floating-point comparison.
46 // The opcode that should be used to compare Op0 and Op1.
49 // A SystemZICMP value. Only used for integer comparisons.
52 // The mask of CC values that Opcode can produce.
55 // The mask of CC values for which the original condition is true.
58 } // end anonymous namespace
60 // Classify VT as either 32 or 64 bit.
61 static bool is32Bit(EVT VT
) {
62 switch (VT
.getSimpleVT().SimpleTy
) {
68 llvm_unreachable("Unsupported type");
72 // Return a version of MachineOperand that can be safely used before the
74 static MachineOperand
earlyUseOperand(MachineOperand Op
) {
80 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine
&TM
,
81 const SystemZSubtarget
&STI
)
82 : TargetLowering(TM
), Subtarget(STI
) {
83 MVT PtrVT
= MVT::getIntegerVT(8 * TM
.getPointerSize(0));
85 // Set up the register classes.
86 if (Subtarget
.hasHighWord())
87 addRegisterClass(MVT::i32
, &SystemZ::GRX32BitRegClass
);
89 addRegisterClass(MVT::i32
, &SystemZ::GR32BitRegClass
);
90 addRegisterClass(MVT::i64
, &SystemZ::GR64BitRegClass
);
91 if (Subtarget
.hasVector()) {
92 addRegisterClass(MVT::f32
, &SystemZ::VR32BitRegClass
);
93 addRegisterClass(MVT::f64
, &SystemZ::VR64BitRegClass
);
95 addRegisterClass(MVT::f32
, &SystemZ::FP32BitRegClass
);
96 addRegisterClass(MVT::f64
, &SystemZ::FP64BitRegClass
);
98 if (Subtarget
.hasVectorEnhancements1())
99 addRegisterClass(MVT::f128
, &SystemZ::VR128BitRegClass
);
101 addRegisterClass(MVT::f128
, &SystemZ::FP128BitRegClass
);
103 if (Subtarget
.hasVector()) {
104 addRegisterClass(MVT::v16i8
, &SystemZ::VR128BitRegClass
);
105 addRegisterClass(MVT::v8i16
, &SystemZ::VR128BitRegClass
);
106 addRegisterClass(MVT::v4i32
, &SystemZ::VR128BitRegClass
);
107 addRegisterClass(MVT::v2i64
, &SystemZ::VR128BitRegClass
);
108 addRegisterClass(MVT::v4f32
, &SystemZ::VR128BitRegClass
);
109 addRegisterClass(MVT::v2f64
, &SystemZ::VR128BitRegClass
);
112 // Compute derived properties from the register classes
113 computeRegisterProperties(Subtarget
.getRegisterInfo());
115 // Set up special registers.
116 setStackPointerRegisterToSaveRestore(SystemZ::R15D
);
118 // TODO: It may be better to default to latency-oriented scheduling, however
119 // LLVM's current latency-oriented scheduler can't handle physreg definitions
120 // such as SystemZ has with CC, so set this to the register-pressure
121 // scheduler, because it can.
122 setSchedulingPreference(Sched::RegPressure
);
124 setBooleanContents(ZeroOrOneBooleanContent
);
125 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
127 // Instructions are strings of 2-byte aligned 2-byte values.
128 setMinFunctionAlignment(Align(2));
129 // For performance reasons we prefer 16-byte alignment.
130 setPrefFunctionAlignment(Align(16));
132 // Handle operations that are handled in a similar way for all types.
133 for (unsigned I
= MVT::FIRST_INTEGER_VALUETYPE
;
134 I
<= MVT::LAST_FP_VALUETYPE
;
136 MVT VT
= MVT::SimpleValueType(I
);
137 if (isTypeLegal(VT
)) {
138 // Lower SET_CC into an IPM-based sequence.
139 setOperationAction(ISD::SETCC
, VT
, Custom
);
140 setOperationAction(ISD::STRICT_FSETCC
, VT
, Custom
);
141 setOperationAction(ISD::STRICT_FSETCCS
, VT
, Custom
);
143 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
144 setOperationAction(ISD::SELECT
, VT
, Expand
);
146 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
147 setOperationAction(ISD::SELECT_CC
, VT
, Custom
);
148 setOperationAction(ISD::BR_CC
, VT
, Custom
);
152 // Expand jump table branches as address arithmetic followed by an
154 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
156 // Expand BRCOND into a BR_CC (see above).
157 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
159 // Handle integer types.
160 for (unsigned I
= MVT::FIRST_INTEGER_VALUETYPE
;
161 I
<= MVT::LAST_INTEGER_VALUETYPE
;
163 MVT VT
= MVT::SimpleValueType(I
);
164 if (isTypeLegal(VT
)) {
165 // Expand individual DIV and REMs into DIVREMs.
166 setOperationAction(ISD::SDIV
, VT
, Expand
);
167 setOperationAction(ISD::UDIV
, VT
, Expand
);
168 setOperationAction(ISD::SREM
, VT
, Expand
);
169 setOperationAction(ISD::UREM
, VT
, Expand
);
170 setOperationAction(ISD::SDIVREM
, VT
, Custom
);
171 setOperationAction(ISD::UDIVREM
, VT
, Custom
);
173 // Support addition/subtraction with overflow.
174 setOperationAction(ISD::SADDO
, VT
, Custom
);
175 setOperationAction(ISD::SSUBO
, VT
, Custom
);
177 // Support addition/subtraction with carry.
178 setOperationAction(ISD::UADDO
, VT
, Custom
);
179 setOperationAction(ISD::USUBO
, VT
, Custom
);
181 // Support carry in as value rather than glue.
182 setOperationAction(ISD::ADDCARRY
, VT
, Custom
);
183 setOperationAction(ISD::SUBCARRY
, VT
, Custom
);
185 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
186 // stores, putting a serialization instruction after the stores.
187 setOperationAction(ISD::ATOMIC_LOAD
, VT
, Custom
);
188 setOperationAction(ISD::ATOMIC_STORE
, VT
, Custom
);
190 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
191 // available, or if the operand is constant.
192 setOperationAction(ISD::ATOMIC_LOAD_SUB
, VT
, Custom
);
194 // Use POPCNT on z196 and above.
195 if (Subtarget
.hasPopulationCount())
196 setOperationAction(ISD::CTPOP
, VT
, Custom
);
198 setOperationAction(ISD::CTPOP
, VT
, Expand
);
200 // No special instructions for these.
201 setOperationAction(ISD::CTTZ
, VT
, Expand
);
202 setOperationAction(ISD::ROTR
, VT
, Expand
);
204 // Use *MUL_LOHI where possible instead of MULH*.
205 setOperationAction(ISD::MULHS
, VT
, Expand
);
206 setOperationAction(ISD::MULHU
, VT
, Expand
);
207 setOperationAction(ISD::SMUL_LOHI
, VT
, Custom
);
208 setOperationAction(ISD::UMUL_LOHI
, VT
, Custom
);
210 // Only z196 and above have native support for conversions to unsigned.
211 // On z10, promoting to i64 doesn't generate an inexact condition for
212 // values that are outside the i32 range but in the i64 range, so use
213 // the default expansion.
214 if (!Subtarget
.hasFPExtension())
215 setOperationAction(ISD::FP_TO_UINT
, VT
, Expand
);
217 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
218 // default to Expand, so need to be modified to Legal where appropriate.
219 setOperationAction(ISD::STRICT_FP_TO_SINT
, VT
, Legal
);
220 if (Subtarget
.hasFPExtension())
221 setOperationAction(ISD::STRICT_FP_TO_UINT
, VT
, Legal
);
223 // And similarly for STRICT_[SU]INT_TO_FP.
224 setOperationAction(ISD::STRICT_SINT_TO_FP
, VT
, Legal
);
225 if (Subtarget
.hasFPExtension())
226 setOperationAction(ISD::STRICT_UINT_TO_FP
, VT
, Legal
);
230 // Type legalization will convert 8- and 16-bit atomic operations into
231 // forms that operate on i32s (but still keeping the original memory VT).
232 // Lower them into full i32 operations.
233 setOperationAction(ISD::ATOMIC_SWAP
, MVT::i32
, Custom
);
234 setOperationAction(ISD::ATOMIC_LOAD_ADD
, MVT::i32
, Custom
);
235 setOperationAction(ISD::ATOMIC_LOAD_SUB
, MVT::i32
, Custom
);
236 setOperationAction(ISD::ATOMIC_LOAD_AND
, MVT::i32
, Custom
);
237 setOperationAction(ISD::ATOMIC_LOAD_OR
, MVT::i32
, Custom
);
238 setOperationAction(ISD::ATOMIC_LOAD_XOR
, MVT::i32
, Custom
);
239 setOperationAction(ISD::ATOMIC_LOAD_NAND
, MVT::i32
, Custom
);
240 setOperationAction(ISD::ATOMIC_LOAD_MIN
, MVT::i32
, Custom
);
241 setOperationAction(ISD::ATOMIC_LOAD_MAX
, MVT::i32
, Custom
);
242 setOperationAction(ISD::ATOMIC_LOAD_UMIN
, MVT::i32
, Custom
);
243 setOperationAction(ISD::ATOMIC_LOAD_UMAX
, MVT::i32
, Custom
);
245 // Even though i128 is not a legal type, we still need to custom lower
246 // the atomic operations in order to exploit SystemZ instructions.
247 setOperationAction(ISD::ATOMIC_LOAD
, MVT::i128
, Custom
);
248 setOperationAction(ISD::ATOMIC_STORE
, MVT::i128
, Custom
);
250 // We can use the CC result of compare-and-swap to implement
251 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
252 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i32
, Custom
);
253 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i64
, Custom
);
254 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i128
, Custom
);
256 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
258 // Traps are legal, as we will convert them to "j .+2".
259 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
261 // z10 has instructions for signed but not unsigned FP conversion.
262 // Handle unsigned 32-bit types as signed 64-bit types.
263 if (!Subtarget
.hasFPExtension()) {
264 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Promote
);
265 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
266 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::i32
, Promote
);
267 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::i64
, Expand
);
270 // We have native support for a 64-bit CTLZ, via FLOGR.
271 setOperationAction(ISD::CTLZ
, MVT::i32
, Promote
);
272 setOperationAction(ISD::CTLZ_ZERO_UNDEF
, MVT::i32
, Promote
);
273 setOperationAction(ISD::CTLZ
, MVT::i64
, Legal
);
275 // On z15 we have native support for a 64-bit CTPOP.
276 if (Subtarget
.hasMiscellaneousExtensions3()) {
277 setOperationAction(ISD::CTPOP
, MVT::i32
, Promote
);
278 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
281 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
282 setOperationAction(ISD::OR
, MVT::i64
, Custom
);
284 // FIXME: Can we support these natively?
285 setOperationAction(ISD::SRL_PARTS
, MVT::i64
, Expand
);
286 setOperationAction(ISD::SHL_PARTS
, MVT::i64
, Expand
);
287 setOperationAction(ISD::SRA_PARTS
, MVT::i64
, Expand
);
289 // We have native instructions for i8, i16 and i32 extensions, but not i1.
290 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
291 for (MVT VT
: MVT::integer_valuetypes()) {
292 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
293 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
294 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i1
, Promote
);
297 // Handle the various types of symbolic address.
298 setOperationAction(ISD::ConstantPool
, PtrVT
, Custom
);
299 setOperationAction(ISD::GlobalAddress
, PtrVT
, Custom
);
300 setOperationAction(ISD::GlobalTLSAddress
, PtrVT
, Custom
);
301 setOperationAction(ISD::BlockAddress
, PtrVT
, Custom
);
302 setOperationAction(ISD::JumpTable
, PtrVT
, Custom
);
304 // We need to handle dynamic allocations specially because of the
305 // 160-byte area at the bottom of the stack.
306 setOperationAction(ISD::DYNAMIC_STACKALLOC
, PtrVT
, Custom
);
307 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, PtrVT
, Custom
);
309 // Use custom expanders so that we can force the function to use
311 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Custom
);
312 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Custom
);
314 // Handle prefetches with PFD or PFDRL.
315 setOperationAction(ISD::PREFETCH
, MVT::Other
, Custom
);
317 for (MVT VT
: MVT::fixedlen_vector_valuetypes()) {
318 // Assume by default that all vector operations need to be expanded.
319 for (unsigned Opcode
= 0; Opcode
< ISD::BUILTIN_OP_END
; ++Opcode
)
320 if (getOperationAction(Opcode
, VT
) == Legal
)
321 setOperationAction(Opcode
, VT
, Expand
);
323 // Likewise all truncating stores and extending loads.
324 for (MVT InnerVT
: MVT::fixedlen_vector_valuetypes()) {
325 setTruncStoreAction(VT
, InnerVT
, Expand
);
326 setLoadExtAction(ISD::SEXTLOAD
, VT
, InnerVT
, Expand
);
327 setLoadExtAction(ISD::ZEXTLOAD
, VT
, InnerVT
, Expand
);
328 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
331 if (isTypeLegal(VT
)) {
332 // These operations are legal for anything that can be stored in a
333 // vector register, even if there is no native support for the format
334 // as such. In particular, we can do these for v4f32 even though there
335 // are no specific instructions for that format.
336 setOperationAction(ISD::LOAD
, VT
, Legal
);
337 setOperationAction(ISD::STORE
, VT
, Legal
);
338 setOperationAction(ISD::VSELECT
, VT
, Legal
);
339 setOperationAction(ISD::BITCAST
, VT
, Legal
);
340 setOperationAction(ISD::UNDEF
, VT
, Legal
);
342 // Likewise, except that we need to replace the nodes with something
344 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
345 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Custom
);
349 // Handle integer vector types.
350 for (MVT VT
: MVT::integer_fixedlen_vector_valuetypes()) {
351 if (isTypeLegal(VT
)) {
352 // These operations have direct equivalents.
353 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Legal
);
354 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Legal
);
355 setOperationAction(ISD::ADD
, VT
, Legal
);
356 setOperationAction(ISD::SUB
, VT
, Legal
);
357 if (VT
!= MVT::v2i64
)
358 setOperationAction(ISD::MUL
, VT
, Legal
);
359 setOperationAction(ISD::AND
, VT
, Legal
);
360 setOperationAction(ISD::OR
, VT
, Legal
);
361 setOperationAction(ISD::XOR
, VT
, Legal
);
362 if (Subtarget
.hasVectorEnhancements1())
363 setOperationAction(ISD::CTPOP
, VT
, Legal
);
365 setOperationAction(ISD::CTPOP
, VT
, Custom
);
366 setOperationAction(ISD::CTTZ
, VT
, Legal
);
367 setOperationAction(ISD::CTLZ
, VT
, Legal
);
369 // Convert a GPR scalar to a vector by inserting it into element 0.
370 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
372 // Use a series of unpacks for extensions.
373 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, VT
, Custom
);
374 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, VT
, Custom
);
376 // Detect shifts by a scalar amount and convert them into
378 setOperationAction(ISD::SHL
, VT
, Custom
);
379 setOperationAction(ISD::SRA
, VT
, Custom
);
380 setOperationAction(ISD::SRL
, VT
, Custom
);
382 // At present ROTL isn't matched by DAGCombiner. ROTR should be
383 // converted into ROTL.
384 setOperationAction(ISD::ROTL
, VT
, Expand
);
385 setOperationAction(ISD::ROTR
, VT
, Expand
);
387 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
388 // and inverting the result as necessary.
389 setOperationAction(ISD::SETCC
, VT
, Custom
);
390 setOperationAction(ISD::STRICT_FSETCC
, VT
, Custom
);
391 if (Subtarget
.hasVectorEnhancements1())
392 setOperationAction(ISD::STRICT_FSETCCS
, VT
, Custom
);
396 if (Subtarget
.hasVector()) {
397 // There should be no need to check for float types other than v2f64
398 // since <2 x f32> isn't a legal type.
399 setOperationAction(ISD::FP_TO_SINT
, MVT::v2i64
, Legal
);
400 setOperationAction(ISD::FP_TO_SINT
, MVT::v2f64
, Legal
);
401 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Legal
);
402 setOperationAction(ISD::FP_TO_UINT
, MVT::v2f64
, Legal
);
403 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i64
, Legal
);
404 setOperationAction(ISD::SINT_TO_FP
, MVT::v2f64
, Legal
);
405 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Legal
);
406 setOperationAction(ISD::UINT_TO_FP
, MVT::v2f64
, Legal
);
408 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v2i64
, Legal
);
409 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v2f64
, Legal
);
410 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v2i64
, Legal
);
411 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v2f64
, Legal
);
412 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::v2i64
, Legal
);
413 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::v2f64
, Legal
);
414 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::v2i64
, Legal
);
415 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::v2f64
, Legal
);
418 if (Subtarget
.hasVectorEnhancements2()) {
419 setOperationAction(ISD::FP_TO_SINT
, MVT::v4i32
, Legal
);
420 setOperationAction(ISD::FP_TO_SINT
, MVT::v4f32
, Legal
);
421 setOperationAction(ISD::FP_TO_UINT
, MVT::v4i32
, Legal
);
422 setOperationAction(ISD::FP_TO_UINT
, MVT::v4f32
, Legal
);
423 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i32
, Legal
);
424 setOperationAction(ISD::SINT_TO_FP
, MVT::v4f32
, Legal
);
425 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i32
, Legal
);
426 setOperationAction(ISD::UINT_TO_FP
, MVT::v4f32
, Legal
);
428 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v4i32
, Legal
);
429 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v4f32
, Legal
);
430 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v4i32
, Legal
);
431 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v4f32
, Legal
);
432 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::v4i32
, Legal
);
433 setOperationAction(ISD::STRICT_SINT_TO_FP
, MVT::v4f32
, Legal
);
434 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::v4i32
, Legal
);
435 setOperationAction(ISD::STRICT_UINT_TO_FP
, MVT::v4f32
, Legal
);
438 // Handle floating-point types.
439 for (unsigned I
= MVT::FIRST_FP_VALUETYPE
;
440 I
<= MVT::LAST_FP_VALUETYPE
;
442 MVT VT
= MVT::SimpleValueType(I
);
443 if (isTypeLegal(VT
)) {
444 // We can use FI for FRINT.
445 setOperationAction(ISD::FRINT
, VT
, Legal
);
447 // We can use the extended form of FI for other rounding operations.
448 if (Subtarget
.hasFPExtension()) {
449 setOperationAction(ISD::FNEARBYINT
, VT
, Legal
);
450 setOperationAction(ISD::FFLOOR
, VT
, Legal
);
451 setOperationAction(ISD::FCEIL
, VT
, Legal
);
452 setOperationAction(ISD::FTRUNC
, VT
, Legal
);
453 setOperationAction(ISD::FROUND
, VT
, Legal
);
456 // No special instructions for these.
457 setOperationAction(ISD::FSIN
, VT
, Expand
);
458 setOperationAction(ISD::FCOS
, VT
, Expand
);
459 setOperationAction(ISD::FSINCOS
, VT
, Expand
);
460 setOperationAction(ISD::FREM
, VT
, Expand
);
461 setOperationAction(ISD::FPOW
, VT
, Expand
);
463 // Handle constrained floating-point operations.
464 setOperationAction(ISD::STRICT_FADD
, VT
, Legal
);
465 setOperationAction(ISD::STRICT_FSUB
, VT
, Legal
);
466 setOperationAction(ISD::STRICT_FMUL
, VT
, Legal
);
467 setOperationAction(ISD::STRICT_FDIV
, VT
, Legal
);
468 setOperationAction(ISD::STRICT_FMA
, VT
, Legal
);
469 setOperationAction(ISD::STRICT_FSQRT
, VT
, Legal
);
470 setOperationAction(ISD::STRICT_FRINT
, VT
, Legal
);
471 setOperationAction(ISD::STRICT_FP_ROUND
, VT
, Legal
);
472 setOperationAction(ISD::STRICT_FP_EXTEND
, VT
, Legal
);
473 if (Subtarget
.hasFPExtension()) {
474 setOperationAction(ISD::STRICT_FNEARBYINT
, VT
, Legal
);
475 setOperationAction(ISD::STRICT_FFLOOR
, VT
, Legal
);
476 setOperationAction(ISD::STRICT_FCEIL
, VT
, Legal
);
477 setOperationAction(ISD::STRICT_FROUND
, VT
, Legal
);
478 setOperationAction(ISD::STRICT_FTRUNC
, VT
, Legal
);
483 // Handle floating-point vector types.
484 if (Subtarget
.hasVector()) {
485 // Scalar-to-vector conversion is just a subreg.
486 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
487 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2f64
, Legal
);
489 // Some insertions and extractions can be done directly but others
490 // need to go via integers.
491 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
492 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v2f64
, Custom
);
493 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Custom
);
494 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Custom
);
496 // These operations have direct equivalents.
497 setOperationAction(ISD::FADD
, MVT::v2f64
, Legal
);
498 setOperationAction(ISD::FNEG
, MVT::v2f64
, Legal
);
499 setOperationAction(ISD::FSUB
, MVT::v2f64
, Legal
);
500 setOperationAction(ISD::FMUL
, MVT::v2f64
, Legal
);
501 setOperationAction(ISD::FMA
, MVT::v2f64
, Legal
);
502 setOperationAction(ISD::FDIV
, MVT::v2f64
, Legal
);
503 setOperationAction(ISD::FABS
, MVT::v2f64
, Legal
);
504 setOperationAction(ISD::FSQRT
, MVT::v2f64
, Legal
);
505 setOperationAction(ISD::FRINT
, MVT::v2f64
, Legal
);
506 setOperationAction(ISD::FNEARBYINT
, MVT::v2f64
, Legal
);
507 setOperationAction(ISD::FFLOOR
, MVT::v2f64
, Legal
);
508 setOperationAction(ISD::FCEIL
, MVT::v2f64
, Legal
);
509 setOperationAction(ISD::FTRUNC
, MVT::v2f64
, Legal
);
510 setOperationAction(ISD::FROUND
, MVT::v2f64
, Legal
);
512 // Handle constrained floating-point operations.
513 setOperationAction(ISD::STRICT_FADD
, MVT::v2f64
, Legal
);
514 setOperationAction(ISD::STRICT_FSUB
, MVT::v2f64
, Legal
);
515 setOperationAction(ISD::STRICT_FMUL
, MVT::v2f64
, Legal
);
516 setOperationAction(ISD::STRICT_FMA
, MVT::v2f64
, Legal
);
517 setOperationAction(ISD::STRICT_FDIV
, MVT::v2f64
, Legal
);
518 setOperationAction(ISD::STRICT_FSQRT
, MVT::v2f64
, Legal
);
519 setOperationAction(ISD::STRICT_FRINT
, MVT::v2f64
, Legal
);
520 setOperationAction(ISD::STRICT_FNEARBYINT
, MVT::v2f64
, Legal
);
521 setOperationAction(ISD::STRICT_FFLOOR
, MVT::v2f64
, Legal
);
522 setOperationAction(ISD::STRICT_FCEIL
, MVT::v2f64
, Legal
);
523 setOperationAction(ISD::STRICT_FTRUNC
, MVT::v2f64
, Legal
);
524 setOperationAction(ISD::STRICT_FROUND
, MVT::v2f64
, Legal
);
527 // The vector enhancements facility 1 has instructions for these.
528 if (Subtarget
.hasVectorEnhancements1()) {
529 setOperationAction(ISD::FADD
, MVT::v4f32
, Legal
);
530 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
531 setOperationAction(ISD::FSUB
, MVT::v4f32
, Legal
);
532 setOperationAction(ISD::FMUL
, MVT::v4f32
, Legal
);
533 setOperationAction(ISD::FMA
, MVT::v4f32
, Legal
);
534 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
535 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
536 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
537 setOperationAction(ISD::FRINT
, MVT::v4f32
, Legal
);
538 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Legal
);
539 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
540 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
541 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
542 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
544 setOperationAction(ISD::FMAXNUM
, MVT::f64
, Legal
);
545 setOperationAction(ISD::FMAXIMUM
, MVT::f64
, Legal
);
546 setOperationAction(ISD::FMINNUM
, MVT::f64
, Legal
);
547 setOperationAction(ISD::FMINIMUM
, MVT::f64
, Legal
);
549 setOperationAction(ISD::FMAXNUM
, MVT::v2f64
, Legal
);
550 setOperationAction(ISD::FMAXIMUM
, MVT::v2f64
, Legal
);
551 setOperationAction(ISD::FMINNUM
, MVT::v2f64
, Legal
);
552 setOperationAction(ISD::FMINIMUM
, MVT::v2f64
, Legal
);
554 setOperationAction(ISD::FMAXNUM
, MVT::f32
, Legal
);
555 setOperationAction(ISD::FMAXIMUM
, MVT::f32
, Legal
);
556 setOperationAction(ISD::FMINNUM
, MVT::f32
, Legal
);
557 setOperationAction(ISD::FMINIMUM
, MVT::f32
, Legal
);
559 setOperationAction(ISD::FMAXNUM
, MVT::v4f32
, Legal
);
560 setOperationAction(ISD::FMAXIMUM
, MVT::v4f32
, Legal
);
561 setOperationAction(ISD::FMINNUM
, MVT::v4f32
, Legal
);
562 setOperationAction(ISD::FMINIMUM
, MVT::v4f32
, Legal
);
564 setOperationAction(ISD::FMAXNUM
, MVT::f128
, Legal
);
565 setOperationAction(ISD::FMAXIMUM
, MVT::f128
, Legal
);
566 setOperationAction(ISD::FMINNUM
, MVT::f128
, Legal
);
567 setOperationAction(ISD::FMINIMUM
, MVT::f128
, Legal
);
569 // Handle constrained floating-point operations.
570 setOperationAction(ISD::STRICT_FADD
, MVT::v4f32
, Legal
);
571 setOperationAction(ISD::STRICT_FSUB
, MVT::v4f32
, Legal
);
572 setOperationAction(ISD::STRICT_FMUL
, MVT::v4f32
, Legal
);
573 setOperationAction(ISD::STRICT_FMA
, MVT::v4f32
, Legal
);
574 setOperationAction(ISD::STRICT_FDIV
, MVT::v4f32
, Legal
);
575 setOperationAction(ISD::STRICT_FSQRT
, MVT::v4f32
, Legal
);
576 setOperationAction(ISD::STRICT_FRINT
, MVT::v4f32
, Legal
);
577 setOperationAction(ISD::STRICT_FNEARBYINT
, MVT::v4f32
, Legal
);
578 setOperationAction(ISD::STRICT_FFLOOR
, MVT::v4f32
, Legal
);
579 setOperationAction(ISD::STRICT_FCEIL
, MVT::v4f32
, Legal
);
580 setOperationAction(ISD::STRICT_FROUND
, MVT::v4f32
, Legal
);
581 setOperationAction(ISD::STRICT_FTRUNC
, MVT::v4f32
, Legal
);
582 for (auto VT
: { MVT::f32
, MVT::f64
, MVT::f128
,
583 MVT::v4f32
, MVT::v2f64
}) {
584 setOperationAction(ISD::STRICT_FMAXNUM
, VT
, Legal
);
585 setOperationAction(ISD::STRICT_FMINNUM
, VT
, Legal
);
586 setOperationAction(ISD::STRICT_FMAXIMUM
, VT
, Legal
);
587 setOperationAction(ISD::STRICT_FMINIMUM
, VT
, Legal
);
591 // We only have fused f128 multiply-addition on vector registers.
592 if (!Subtarget
.hasVectorEnhancements1()) {
593 setOperationAction(ISD::FMA
, MVT::f128
, Expand
);
594 setOperationAction(ISD::STRICT_FMA
, MVT::f128
, Expand
);
597 // We don't have a copysign instruction on vector registers.
598 if (Subtarget
.hasVectorEnhancements1())
599 setOperationAction(ISD::FCOPYSIGN
, MVT::f128
, Expand
);
601 // Needed so that we don't try to implement f128 constant loads using
602 // a load-and-extend of a f80 constant (in cases where the constant
603 // would fit in an f80).
604 for (MVT VT
: MVT::fp_valuetypes())
605 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::f80
, Expand
);
607 // We don't have extending load instruction on vector registers.
608 if (Subtarget
.hasVectorEnhancements1()) {
609 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, MVT::f32
, Expand
);
610 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, MVT::f64
, Expand
);
613 // Floating-point truncation and stores need to be done separately.
614 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
615 setTruncStoreAction(MVT::f128
, MVT::f32
, Expand
);
616 setTruncStoreAction(MVT::f128
, MVT::f64
, Expand
);
618 // We have 64-bit FPR<->GPR moves, but need special handling for
620 if (!Subtarget
.hasVector()) {
621 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
622 setOperationAction(ISD::BITCAST
, MVT::f32
, Custom
);
625 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
626 // structure, but VAEND is a no-op.
627 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
628 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
629 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
631 // Codes for which we want to perform some z-specific combinations.
632 setTargetDAGCombine(ISD::ZERO_EXTEND
);
633 setTargetDAGCombine(ISD::SIGN_EXTEND
);
634 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG
);
635 setTargetDAGCombine(ISD::LOAD
);
636 setTargetDAGCombine(ISD::STORE
);
637 setTargetDAGCombine(ISD::VECTOR_SHUFFLE
);
638 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT
);
639 setTargetDAGCombine(ISD::FP_ROUND
);
640 setTargetDAGCombine(ISD::STRICT_FP_ROUND
);
641 setTargetDAGCombine(ISD::FP_EXTEND
);
642 setTargetDAGCombine(ISD::STRICT_FP_EXTEND
);
643 setTargetDAGCombine(ISD::BSWAP
);
644 setTargetDAGCombine(ISD::SDIV
);
645 setTargetDAGCombine(ISD::UDIV
);
646 setTargetDAGCombine(ISD::SREM
);
647 setTargetDAGCombine(ISD::UREM
);
649 // Handle intrinsics.
650 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
651 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
653 // We want to use MVC in preference to even a single load/store pair.
654 MaxStoresPerMemcpy
= 0;
655 MaxStoresPerMemcpyOptSize
= 0;
657 // The main memset sequence is a byte store followed by an MVC.
658 // Two STC or MV..I stores win over that, but the kind of fused stores
659 // generated by target-independent code don't when the byte value is
660 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
661 // than "STC;MVC". Handle the choice in target-specific code instead.
662 MaxStoresPerMemset
= 0;
663 MaxStoresPerMemsetOptSize
= 0;
665 // Default to having -disable-strictnode-mutation on
666 IsStrictFPEnabled
= true;
669 EVT
SystemZTargetLowering::getSetCCResultType(const DataLayout
&DL
,
670 LLVMContext
&, EVT VT
) const {
673 return VT
.changeVectorElementTypeToInteger();
676 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
677 const MachineFunction
&MF
, EVT VT
) const {
678 VT
= VT
.getScalarType();
683 switch (VT
.getSimpleVT().SimpleTy
) {
688 return Subtarget
.hasVectorEnhancements1();
696 // Return true if the constant can be generated with a vector instruction,
697 // such as VGM, VGMB or VREPI.
698 bool SystemZVectorConstantInfo::isVectorConstantLegal(
699 const SystemZSubtarget
&Subtarget
) {
700 const SystemZInstrInfo
*TII
=
701 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
702 if (!Subtarget
.hasVector() ||
703 (isFP128
&& !Subtarget
.hasVectorEnhancements1()))
706 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
707 // preferred way of creating all-zero and all-one vectors so give it
708 // priority over other methods below.
711 for (; I
< SystemZ::VectorBytes
; ++I
) {
712 uint64_t Byte
= IntBits
.lshr(I
* 8).trunc(8).getZExtValue();
718 if (I
== SystemZ::VectorBytes
) {
719 Opcode
= SystemZISD::BYTE_MASK
;
720 OpVals
.push_back(Mask
);
721 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(8), 16);
725 if (SplatBitSize
> 64)
728 auto tryValue
= [&](uint64_t Value
) -> bool {
729 // Try VECTOR REPLICATE IMMEDIATE
730 int64_t SignedValue
= SignExtend64(Value
, SplatBitSize
);
731 if (isInt
<16>(SignedValue
)) {
732 OpVals
.push_back(((unsigned) SignedValue
));
733 Opcode
= SystemZISD::REPLICATE
;
734 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize
),
735 SystemZ::VectorBits
/ SplatBitSize
);
738 // Try VECTOR GENERATE MASK
740 if (TII
->isRxSBGMask(Value
, SplatBitSize
, Start
, End
)) {
741 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
742 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
743 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
744 OpVals
.push_back(Start
- (64 - SplatBitSize
));
745 OpVals
.push_back(End
- (64 - SplatBitSize
));
746 Opcode
= SystemZISD::ROTATE_MASK
;
747 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize
),
748 SystemZ::VectorBits
/ SplatBitSize
);
754 // First try assuming that any undefined bits above the highest set bit
755 // and below the lowest set bit are 1s. This increases the likelihood of
756 // being able to use a sign-extended element value in VECTOR REPLICATE
757 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
758 uint64_t SplatBitsZ
= SplatBits
.getZExtValue();
759 uint64_t SplatUndefZ
= SplatUndef
.getZExtValue();
761 (SplatUndefZ
& ((uint64_t(1) << findFirstSet(SplatBitsZ
)) - 1));
763 (SplatUndefZ
& ~((uint64_t(1) << findLastSet(SplatBitsZ
)) - 1));
764 if (tryValue(SplatBitsZ
| Upper
| Lower
))
767 // Now try assuming that any undefined bits between the first and
768 // last defined set bits are set. This increases the chances of
769 // using a non-wraparound mask.
770 uint64_t Middle
= SplatUndefZ
& ~Upper
& ~Lower
;
771 return tryValue(SplatBitsZ
| Middle
);
774 SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm
) {
775 IntBits
= FPImm
.bitcastToAPInt().zextOrSelf(128);
776 isFP128
= (&FPImm
.getSemantics() == &APFloat::IEEEquad());
778 // Find the smallest splat.
779 SplatBits
= FPImm
.bitcastToAPInt();
780 unsigned Width
= SplatBits
.getBitWidth();
782 unsigned HalfSize
= Width
/ 2;
783 APInt HighValue
= SplatBits
.lshr(HalfSize
).trunc(HalfSize
);
784 APInt LowValue
= SplatBits
.trunc(HalfSize
);
786 // If the two halves do not match, stop here.
787 if (HighValue
!= LowValue
|| 8 > HalfSize
)
790 SplatBits
= HighValue
;
794 SplatBitSize
= Width
;
797 SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode
*BVN
) {
798 assert(BVN
->isConstant() && "Expected a constant BUILD_VECTOR");
801 // Get IntBits by finding the 128 bit splat.
802 BVN
->isConstantSplat(IntBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
, 128,
805 // Get SplatBits by finding the 8 bit or greater splat.
806 BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
, 8,
810 bool SystemZTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
811 bool ForCodeSize
) const {
812 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
813 if (Imm
.isZero() || Imm
.isNegZero())
816 return SystemZVectorConstantInfo(Imm
).isVectorConstantLegal(Subtarget
);
819 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
820 // We can use CGFI or CLGFI.
821 return isInt
<32>(Imm
) || isUInt
<32>(Imm
);
824 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
825 // We can use ALGFI or SLGFI.
826 return isUInt
<32>(Imm
) || isUInt
<32>(-Imm
);
829 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
830 EVT VT
, unsigned, unsigned, MachineMemOperand::Flags
, bool *Fast
) const {
831 // Unaligned accesses should never be slower than the expanded version.
832 // We check specifically for aligned accesses in the few cases where
833 // they are required.
839 // Information about the addressing mode for a memory access.
840 struct AddressingMode
{
841 // True if a long displacement is supported.
842 bool LongDisplacement
;
844 // True if use of index register is supported.
847 AddressingMode(bool LongDispl
, bool IdxReg
) :
848 LongDisplacement(LongDispl
), IndexReg(IdxReg
) {}
851 // Return the desired addressing mode for a Load which has only one use (in
852 // the same block) which is a Store.
853 static AddressingMode
getLoadStoreAddrMode(bool HasVector
,
855 // With vector support a Load->Store combination may be combined to either
856 // an MVC or vector operations and it seems to work best to allow the
857 // vector addressing mode.
859 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
861 // Otherwise only the MVC case is special.
862 bool MVC
= Ty
->isIntegerTy(8);
863 return AddressingMode(!MVC
/*LongDispl*/, !MVC
/*IdxReg*/);
866 // Return the addressing mode which seems most desirable given an LLVM
867 // Instruction pointer.
868 static AddressingMode
869 supportedAddressingMode(Instruction
*I
, bool HasVector
) {
870 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
871 switch (II
->getIntrinsicID()) {
873 case Intrinsic::memset
:
874 case Intrinsic::memmove
:
875 case Intrinsic::memcpy
:
876 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
880 if (isa
<LoadInst
>(I
) && I
->hasOneUse()) {
881 auto *SingleUser
= cast
<Instruction
>(*I
->user_begin());
882 if (SingleUser
->getParent() == I
->getParent()) {
883 if (isa
<ICmpInst
>(SingleUser
)) {
884 if (auto *C
= dyn_cast
<ConstantInt
>(SingleUser
->getOperand(1)))
885 if (C
->getBitWidth() <= 64 &&
886 (isInt
<16>(C
->getSExtValue()) || isUInt
<16>(C
->getZExtValue())))
887 // Comparison of memory with 16 bit signed / unsigned immediate
888 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
889 } else if (isa
<StoreInst
>(SingleUser
))
891 return getLoadStoreAddrMode(HasVector
, I
->getType());
893 } else if (auto *StoreI
= dyn_cast
<StoreInst
>(I
)) {
894 if (auto *LoadI
= dyn_cast
<LoadInst
>(StoreI
->getValueOperand()))
895 if (LoadI
->hasOneUse() && LoadI
->getParent() == I
->getParent())
897 return getLoadStoreAddrMode(HasVector
, LoadI
->getType());
900 if (HasVector
&& (isa
<LoadInst
>(I
) || isa
<StoreInst
>(I
))) {
902 // * Use LDE instead of LE/LEY for z13 to avoid partial register
903 // dependencies (LDE only supports small offsets).
904 // * Utilize the vector registers to hold floating point
905 // values (vector load / store instructions only support small
908 Type
*MemAccessTy
= (isa
<LoadInst
>(I
) ? I
->getType() :
909 I
->getOperand(0)->getType());
910 bool IsFPAccess
= MemAccessTy
->isFloatingPointTy();
911 bool IsVectorAccess
= MemAccessTy
->isVectorTy();
913 // A store of an extracted vector element will be combined into a VSTE type
915 if (!IsVectorAccess
&& isa
<StoreInst
>(I
)) {
916 Value
*DataOp
= I
->getOperand(0);
917 if (isa
<ExtractElementInst
>(DataOp
))
918 IsVectorAccess
= true;
921 // A load which gets inserted into a vector element will be combined into a
922 // VLE type instruction.
923 if (!IsVectorAccess
&& isa
<LoadInst
>(I
) && I
->hasOneUse()) {
924 User
*LoadUser
= *I
->user_begin();
925 if (isa
<InsertElementInst
>(LoadUser
))
926 IsVectorAccess
= true;
929 if (IsFPAccess
|| IsVectorAccess
)
930 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
933 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
936 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
937 const AddrMode
&AM
, Type
*Ty
, unsigned AS
, Instruction
*I
) const {
938 // Punt on globals for now, although they can be used in limited
939 // RELATIVE LONG cases.
943 // Require a 20-bit signed offset.
944 if (!isInt
<20>(AM
.BaseOffs
))
947 AddressingMode
SupportedAM(true, true);
949 SupportedAM
= supportedAddressingMode(I
, Subtarget
.hasVector());
951 if (!SupportedAM
.LongDisplacement
&& !isUInt
<12>(AM
.BaseOffs
))
954 if (!SupportedAM
.IndexReg
)
955 // No indexing allowed.
956 return AM
.Scale
== 0;
958 // Indexing is OK but no scale factor can be applied.
959 return AM
.Scale
== 0 || AM
.Scale
== 1;
962 bool SystemZTargetLowering::isTruncateFree(Type
*FromType
, Type
*ToType
) const {
963 if (!FromType
->isIntegerTy() || !ToType
->isIntegerTy())
965 unsigned FromBits
= FromType
->getPrimitiveSizeInBits();
966 unsigned ToBits
= ToType
->getPrimitiveSizeInBits();
967 return FromBits
> ToBits
;
970 bool SystemZTargetLowering::isTruncateFree(EVT FromVT
, EVT ToVT
) const {
971 if (!FromVT
.isInteger() || !ToVT
.isInteger())
973 unsigned FromBits
= FromVT
.getSizeInBits();
974 unsigned ToBits
= ToVT
.getSizeInBits();
975 return FromBits
> ToBits
;
978 //===----------------------------------------------------------------------===//
979 // Inline asm support
980 //===----------------------------------------------------------------------===//
982 TargetLowering::ConstraintType
983 SystemZTargetLowering::getConstraintType(StringRef Constraint
) const {
984 if (Constraint
.size() == 1) {
985 switch (Constraint
[0]) {
986 case 'a': // Address register
987 case 'd': // Data register (equivalent to 'r')
988 case 'f': // Floating-point register
989 case 'h': // High-part register
990 case 'r': // General-purpose register
991 case 'v': // Vector register
992 return C_RegisterClass
;
994 case 'Q': // Memory with base and unsigned 12-bit displacement
995 case 'R': // Likewise, plus an index
996 case 'S': // Memory with base and signed 20-bit displacement
997 case 'T': // Likewise, plus an index
998 case 'm': // Equivalent to 'T'.
1001 case 'I': // Unsigned 8-bit constant
1002 case 'J': // Unsigned 12-bit constant
1003 case 'K': // Signed 16-bit constant
1004 case 'L': // Signed 20-bit displacement (on all targets we support)
1005 case 'M': // 0x7fffffff
1012 return TargetLowering::getConstraintType(Constraint
);
1015 TargetLowering::ConstraintWeight
SystemZTargetLowering::
1016 getSingleConstraintMatchWeight(AsmOperandInfo
&info
,
1017 const char *constraint
) const {
1018 ConstraintWeight weight
= CW_Invalid
;
1019 Value
*CallOperandVal
= info
.CallOperandVal
;
1020 // If we don't have a value, we can't do a match,
1021 // but allow it at the lowest weight.
1022 if (!CallOperandVal
)
1024 Type
*type
= CallOperandVal
->getType();
1025 // Look at the constraint type.
1026 switch (*constraint
) {
1028 weight
= TargetLowering::getSingleConstraintMatchWeight(info
, constraint
);
1031 case 'a': // Address register
1032 case 'd': // Data register (equivalent to 'r')
1033 case 'h': // High-part register
1034 case 'r': // General-purpose register
1035 if (CallOperandVal
->getType()->isIntegerTy())
1036 weight
= CW_Register
;
1039 case 'f': // Floating-point register
1040 if (type
->isFloatingPointTy())
1041 weight
= CW_Register
;
1044 case 'v': // Vector register
1045 if ((type
->isVectorTy() || type
->isFloatingPointTy()) &&
1046 Subtarget
.hasVector())
1047 weight
= CW_Register
;
1050 case 'I': // Unsigned 8-bit constant
1051 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1052 if (isUInt
<8>(C
->getZExtValue()))
1053 weight
= CW_Constant
;
1056 case 'J': // Unsigned 12-bit constant
1057 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1058 if (isUInt
<12>(C
->getZExtValue()))
1059 weight
= CW_Constant
;
1062 case 'K': // Signed 16-bit constant
1063 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1064 if (isInt
<16>(C
->getSExtValue()))
1065 weight
= CW_Constant
;
1068 case 'L': // Signed 20-bit displacement (on all targets we support)
1069 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1070 if (isInt
<20>(C
->getSExtValue()))
1071 weight
= CW_Constant
;
1074 case 'M': // 0x7fffffff
1075 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1076 if (C
->getZExtValue() == 0x7fffffff)
1077 weight
= CW_Constant
;
1083 // Parse a "{tNNN}" register constraint for which the register type "t"
1084 // has already been verified. MC is the class associated with "t" and
1085 // Map maps 0-based register numbers to LLVM register numbers.
1086 static std::pair
<unsigned, const TargetRegisterClass
*>
1087 parseRegisterNumber(StringRef Constraint
, const TargetRegisterClass
*RC
,
1088 const unsigned *Map
, unsigned Size
) {
1089 assert(*(Constraint
.end()-1) == '}' && "Missing '}'");
1090 if (isdigit(Constraint
[2])) {
1093 Constraint
.slice(2, Constraint
.size() - 1).getAsInteger(10, Index
);
1094 if (!Failed
&& Index
< Size
&& Map
[Index
])
1095 return std::make_pair(Map
[Index
], RC
);
1097 return std::make_pair(0U, nullptr);
1100 std::pair
<unsigned, const TargetRegisterClass
*>
1101 SystemZTargetLowering::getRegForInlineAsmConstraint(
1102 const TargetRegisterInfo
*TRI
, StringRef Constraint
, MVT VT
) const {
1103 if (Constraint
.size() == 1) {
1104 // GCC Constraint Letters
1105 switch (Constraint
[0]) {
1107 case 'd': // Data register (equivalent to 'r')
1108 case 'r': // General-purpose register
1110 return std::make_pair(0U, &SystemZ::GR64BitRegClass
);
1111 else if (VT
== MVT::i128
)
1112 return std::make_pair(0U, &SystemZ::GR128BitRegClass
);
1113 return std::make_pair(0U, &SystemZ::GR32BitRegClass
);
1115 case 'a': // Address register
1117 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass
);
1118 else if (VT
== MVT::i128
)
1119 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass
);
1120 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass
);
1122 case 'h': // High-part register (an LLVM extension)
1123 return std::make_pair(0U, &SystemZ::GRH32BitRegClass
);
1125 case 'f': // Floating-point register
1127 return std::make_pair(0U, &SystemZ::FP64BitRegClass
);
1128 else if (VT
== MVT::f128
)
1129 return std::make_pair(0U, &SystemZ::FP128BitRegClass
);
1130 return std::make_pair(0U, &SystemZ::FP32BitRegClass
);
1132 case 'v': // Vector register
1133 if (Subtarget
.hasVector()) {
1135 return std::make_pair(0U, &SystemZ::VR32BitRegClass
);
1137 return std::make_pair(0U, &SystemZ::VR64BitRegClass
);
1138 return std::make_pair(0U, &SystemZ::VR128BitRegClass
);
1143 if (Constraint
.size() > 0 && Constraint
[0] == '{') {
1144 // We need to override the default register parsing for GPRs and FPRs
1145 // because the interpretation depends on VT. The internal names of
1146 // the registers are also different from the external names
1147 // (F0D and F0S instead of F0, etc.).
1148 if (Constraint
[1] == 'r') {
1150 return parseRegisterNumber(Constraint
, &SystemZ::GR32BitRegClass
,
1151 SystemZMC::GR32Regs
, 16);
1152 if (VT
== MVT::i128
)
1153 return parseRegisterNumber(Constraint
, &SystemZ::GR128BitRegClass
,
1154 SystemZMC::GR128Regs
, 16);
1155 return parseRegisterNumber(Constraint
, &SystemZ::GR64BitRegClass
,
1156 SystemZMC::GR64Regs
, 16);
1158 if (Constraint
[1] == 'f') {
1160 return parseRegisterNumber(Constraint
, &SystemZ::FP32BitRegClass
,
1161 SystemZMC::FP32Regs
, 16);
1162 if (VT
== MVT::f128
)
1163 return parseRegisterNumber(Constraint
, &SystemZ::FP128BitRegClass
,
1164 SystemZMC::FP128Regs
, 16);
1165 return parseRegisterNumber(Constraint
, &SystemZ::FP64BitRegClass
,
1166 SystemZMC::FP64Regs
, 16);
1168 if (Constraint
[1] == 'v') {
1170 return parseRegisterNumber(Constraint
, &SystemZ::VR32BitRegClass
,
1171 SystemZMC::VR32Regs
, 32);
1173 return parseRegisterNumber(Constraint
, &SystemZ::VR64BitRegClass
,
1174 SystemZMC::VR64Regs
, 32);
1175 return parseRegisterNumber(Constraint
, &SystemZ::VR128BitRegClass
,
1176 SystemZMC::VR128Regs
, 32);
1179 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
1182 void SystemZTargetLowering::
1183 LowerAsmOperandForConstraint(SDValue Op
, std::string
&Constraint
,
1184 std::vector
<SDValue
> &Ops
,
1185 SelectionDAG
&DAG
) const {
1186 // Only support length 1 constraints for now.
1187 if (Constraint
.length() == 1) {
1188 switch (Constraint
[0]) {
1189 case 'I': // Unsigned 8-bit constant
1190 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1191 if (isUInt
<8>(C
->getZExtValue()))
1192 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
1193 Op
.getValueType()));
1196 case 'J': // Unsigned 12-bit constant
1197 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1198 if (isUInt
<12>(C
->getZExtValue()))
1199 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
1200 Op
.getValueType()));
1203 case 'K': // Signed 16-bit constant
1204 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1205 if (isInt
<16>(C
->getSExtValue()))
1206 Ops
.push_back(DAG
.getTargetConstant(C
->getSExtValue(), SDLoc(Op
),
1207 Op
.getValueType()));
1210 case 'L': // Signed 20-bit displacement (on all targets we support)
1211 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1212 if (isInt
<20>(C
->getSExtValue()))
1213 Ops
.push_back(DAG
.getTargetConstant(C
->getSExtValue(), SDLoc(Op
),
1214 Op
.getValueType()));
1217 case 'M': // 0x7fffffff
1218 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1219 if (C
->getZExtValue() == 0x7fffffff)
1220 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
1221 Op
.getValueType()));
1225 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
1228 //===----------------------------------------------------------------------===//
1229 // Calling conventions
1230 //===----------------------------------------------------------------------===//
1232 #include "SystemZGenCallingConv.inc"
1234 const MCPhysReg
*SystemZTargetLowering::getScratchRegisters(
1235 CallingConv::ID
) const {
1236 static const MCPhysReg ScratchRegs
[] = { SystemZ::R0D
, SystemZ::R1D
,
1241 bool SystemZTargetLowering::allowTruncateForTailCall(Type
*FromType
,
1242 Type
*ToType
) const {
1243 return isTruncateFree(FromType
, ToType
);
1246 bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
1247 return CI
->isTailCall();
1250 // We do not yet support 128-bit single-element vector types. If the user
1251 // attempts to use such types as function argument or return type, prefer
1252 // to error out instead of emitting code violating the ABI.
1253 static void VerifyVectorType(MVT VT
, EVT ArgVT
) {
1254 if (ArgVT
.isVector() && !VT
.isVector())
1255 report_fatal_error("Unsupported vector argument or return type");
1258 static void VerifyVectorTypes(const SmallVectorImpl
<ISD::InputArg
> &Ins
) {
1259 for (unsigned i
= 0; i
< Ins
.size(); ++i
)
1260 VerifyVectorType(Ins
[i
].VT
, Ins
[i
].ArgVT
);
1263 static void VerifyVectorTypes(const SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
1264 for (unsigned i
= 0; i
< Outs
.size(); ++i
)
1265 VerifyVectorType(Outs
[i
].VT
, Outs
[i
].ArgVT
);
1268 // Value is a value that has been passed to us in the location described by VA
1269 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1270 // any loads onto Chain.
1271 static SDValue
convertLocVTToValVT(SelectionDAG
&DAG
, const SDLoc
&DL
,
1272 CCValAssign
&VA
, SDValue Chain
,
1274 // If the argument has been promoted from a smaller type, insert an
1275 // assertion to capture this.
1276 if (VA
.getLocInfo() == CCValAssign::SExt
)
1277 Value
= DAG
.getNode(ISD::AssertSext
, DL
, VA
.getLocVT(), Value
,
1278 DAG
.getValueType(VA
.getValVT()));
1279 else if (VA
.getLocInfo() == CCValAssign::ZExt
)
1280 Value
= DAG
.getNode(ISD::AssertZext
, DL
, VA
.getLocVT(), Value
,
1281 DAG
.getValueType(VA
.getValVT()));
1283 if (VA
.isExtInLoc())
1284 Value
= DAG
.getNode(ISD::TRUNCATE
, DL
, VA
.getValVT(), Value
);
1285 else if (VA
.getLocInfo() == CCValAssign::BCvt
) {
1286 // If this is a short vector argument loaded from the stack,
1287 // extend from i64 to full vector size and then bitcast.
1288 assert(VA
.getLocVT() == MVT::i64
);
1289 assert(VA
.getValVT().isVector());
1290 Value
= DAG
.getBuildVector(MVT::v2i64
, DL
, {Value
, DAG
.getUNDEF(MVT::i64
)});
1291 Value
= DAG
.getNode(ISD::BITCAST
, DL
, VA
.getValVT(), Value
);
1293 assert(VA
.getLocInfo() == CCValAssign::Full
&& "Unsupported getLocInfo");
1297 // Value is a value of type VA.getValVT() that we need to copy into
1298 // the location described by VA. Return a copy of Value converted to
1299 // VA.getValVT(). The caller is responsible for handling indirect values.
1300 static SDValue
convertValVTToLocVT(SelectionDAG
&DAG
, const SDLoc
&DL
,
1301 CCValAssign
&VA
, SDValue Value
) {
1302 switch (VA
.getLocInfo()) {
1303 case CCValAssign::SExt
:
1304 return DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VA
.getLocVT(), Value
);
1305 case CCValAssign::ZExt
:
1306 return DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VA
.getLocVT(), Value
);
1307 case CCValAssign::AExt
:
1308 return DAG
.getNode(ISD::ANY_EXTEND
, DL
, VA
.getLocVT(), Value
);
1309 case CCValAssign::BCvt
:
1310 // If this is a short vector argument to be stored to the stack,
1311 // bitcast to v2i64 and then extract first element.
1312 assert(VA
.getLocVT() == MVT::i64
);
1313 assert(VA
.getValVT().isVector());
1314 Value
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Value
);
1315 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VA
.getLocVT(), Value
,
1316 DAG
.getConstant(0, DL
, MVT::i32
));
1317 case CCValAssign::Full
:
1320 llvm_unreachable("Unhandled getLocInfo()");
1324 SDValue
SystemZTargetLowering::LowerFormalArguments(
1325 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
1326 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
1327 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
1328 MachineFunction
&MF
= DAG
.getMachineFunction();
1329 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1330 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1331 SystemZMachineFunctionInfo
*FuncInfo
=
1332 MF
.getInfo
<SystemZMachineFunctionInfo
>();
1334 static_cast<const SystemZFrameLowering
*>(Subtarget
.getFrameLowering());
1335 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1337 // Detect unsupported vector argument types.
1338 if (Subtarget
.hasVector())
1339 VerifyVectorTypes(Ins
);
1341 // Assign locations to all of the incoming arguments.
1342 SmallVector
<CCValAssign
, 16> ArgLocs
;
1343 SystemZCCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1344 CCInfo
.AnalyzeFormalArguments(Ins
, CC_SystemZ
);
1346 unsigned NumFixedGPRs
= 0;
1347 unsigned NumFixedFPRs
= 0;
1348 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1350 CCValAssign
&VA
= ArgLocs
[I
];
1351 EVT LocVT
= VA
.getLocVT();
1352 if (VA
.isRegLoc()) {
1353 // Arguments passed in registers
1354 const TargetRegisterClass
*RC
;
1355 switch (LocVT
.getSimpleVT().SimpleTy
) {
1357 // Integers smaller than i64 should be promoted to i64.
1358 llvm_unreachable("Unexpected argument type");
1361 RC
= &SystemZ::GR32BitRegClass
;
1365 RC
= &SystemZ::GR64BitRegClass
;
1369 RC
= &SystemZ::FP32BitRegClass
;
1373 RC
= &SystemZ::FP64BitRegClass
;
1381 RC
= &SystemZ::VR128BitRegClass
;
1385 Register VReg
= MRI
.createVirtualRegister(RC
);
1386 MRI
.addLiveIn(VA
.getLocReg(), VReg
);
1387 ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, LocVT
);
1389 assert(VA
.isMemLoc() && "Argument not register or memory");
1391 // Create the frame index object for this incoming parameter.
1392 int FI
= MFI
.CreateFixedObject(LocVT
.getSizeInBits() / 8,
1393 VA
.getLocMemOffset(), true);
1395 // Create the SelectionDAG nodes corresponding to a load
1396 // from this parameter. Unpromoted ints and floats are
1397 // passed as right-justified 8-byte values.
1398 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
1399 if (VA
.getLocVT() == MVT::i32
|| VA
.getLocVT() == MVT::f32
)
1400 FIN
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, FIN
,
1401 DAG
.getIntPtrConstant(4, DL
));
1402 ArgValue
= DAG
.getLoad(LocVT
, DL
, Chain
, FIN
,
1403 MachinePointerInfo::getFixedStack(MF
, FI
));
1406 // Convert the value of the argument register into the value that's
1408 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
1409 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), DL
, Chain
, ArgValue
,
1410 MachinePointerInfo()));
1411 // If the original argument was split (e.g. i128), we need
1412 // to load all parts of it here (using the same address).
1413 unsigned ArgIndex
= Ins
[I
].OrigArgIndex
;
1414 assert (Ins
[I
].PartOffset
== 0);
1415 while (I
+ 1 != E
&& Ins
[I
+ 1].OrigArgIndex
== ArgIndex
) {
1416 CCValAssign
&PartVA
= ArgLocs
[I
+ 1];
1417 unsigned PartOffset
= Ins
[I
+ 1].PartOffset
;
1418 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, ArgValue
,
1419 DAG
.getIntPtrConstant(PartOffset
, DL
));
1420 InVals
.push_back(DAG
.getLoad(PartVA
.getValVT(), DL
, Chain
, Address
,
1421 MachinePointerInfo()));
1425 InVals
.push_back(convertLocVTToValVT(DAG
, DL
, VA
, Chain
, ArgValue
));
1429 // Save the number of non-varargs registers for later use by va_start, etc.
1430 FuncInfo
->setVarArgsFirstGPR(NumFixedGPRs
);
1431 FuncInfo
->setVarArgsFirstFPR(NumFixedFPRs
);
1433 // Likewise the address (in the form of a frame index) of where the
1434 // first stack vararg would be. The 1-byte size here is arbitrary.
1435 int64_t StackSize
= CCInfo
.getNextStackOffset();
1436 FuncInfo
->setVarArgsFrameIndex(MFI
.CreateFixedObject(1, StackSize
, true));
1438 // ...and a similar frame index for the caller-allocated save area
1439 // that will be used to store the incoming registers.
1440 int64_t RegSaveOffset
= -SystemZMC::CallFrameSize
;
1441 unsigned RegSaveIndex
= MFI
.CreateFixedObject(1, RegSaveOffset
, true);
1442 FuncInfo
->setRegSaveFrameIndex(RegSaveIndex
);
1444 // Store the FPR varargs in the reserved frame slots. (We store the
1445 // GPRs as part of the prologue.)
1446 if (NumFixedFPRs
< SystemZ::NumArgFPRs
) {
1447 SDValue MemOps
[SystemZ::NumArgFPRs
];
1448 for (unsigned I
= NumFixedFPRs
; I
< SystemZ::NumArgFPRs
; ++I
) {
1449 unsigned Offset
= TFL
->getRegSpillOffset(SystemZ::ArgFPRs
[I
]);
1450 int FI
= MFI
.CreateFixedObject(8, RegSaveOffset
+ Offset
, true);
1451 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
1452 unsigned VReg
= MF
.addLiveIn(SystemZ::ArgFPRs
[I
],
1453 &SystemZ::FP64BitRegClass
);
1454 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, MVT::f64
);
1455 MemOps
[I
] = DAG
.getStore(ArgValue
.getValue(1), DL
, ArgValue
, FIN
,
1456 MachinePointerInfo::getFixedStack(MF
, FI
));
1458 // Join the stores, which are independent of one another.
1459 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
,
1460 makeArrayRef(&MemOps
[NumFixedFPRs
],
1461 SystemZ::NumArgFPRs
-NumFixedFPRs
));
1468 static bool canUseSiblingCall(const CCState
&ArgCCInfo
,
1469 SmallVectorImpl
<CCValAssign
> &ArgLocs
,
1470 SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
1471 // Punt if there are any indirect or stack arguments, or if the call
1472 // needs the callee-saved argument register R6, or if the call uses
1473 // the callee-saved register arguments SwiftSelf and SwiftError.
1474 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1475 CCValAssign
&VA
= ArgLocs
[I
];
1476 if (VA
.getLocInfo() == CCValAssign::Indirect
)
1480 Register Reg
= VA
.getLocReg();
1481 if (Reg
== SystemZ::R6H
|| Reg
== SystemZ::R6L
|| Reg
== SystemZ::R6D
)
1483 if (Outs
[I
].Flags
.isSwiftSelf() || Outs
[I
].Flags
.isSwiftError())
1490 SystemZTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
1491 SmallVectorImpl
<SDValue
> &InVals
) const {
1492 SelectionDAG
&DAG
= CLI
.DAG
;
1494 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
1495 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
1496 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
1497 SDValue Chain
= CLI
.Chain
;
1498 SDValue Callee
= CLI
.Callee
;
1499 bool &IsTailCall
= CLI
.IsTailCall
;
1500 CallingConv::ID CallConv
= CLI
.CallConv
;
1501 bool IsVarArg
= CLI
.IsVarArg
;
1502 MachineFunction
&MF
= DAG
.getMachineFunction();
1503 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
1505 // Detect unsupported vector argument and return types.
1506 if (Subtarget
.hasVector()) {
1507 VerifyVectorTypes(Outs
);
1508 VerifyVectorTypes(Ins
);
1511 // Analyze the operands of the call, assigning locations to each operand.
1512 SmallVector
<CCValAssign
, 16> ArgLocs
;
1513 SystemZCCState
ArgCCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1514 ArgCCInfo
.AnalyzeCallOperands(Outs
, CC_SystemZ
);
1516 // We don't support GuaranteedTailCallOpt, only automatically-detected
1518 if (IsTailCall
&& !canUseSiblingCall(ArgCCInfo
, ArgLocs
, Outs
))
1521 // Get a count of how many bytes are to be pushed on the stack.
1522 unsigned NumBytes
= ArgCCInfo
.getNextStackOffset();
1524 // Mark the start of the call.
1526 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, DL
);
1528 // Copy argument values to their designated locations.
1529 SmallVector
<std::pair
<unsigned, SDValue
>, 9> RegsToPass
;
1530 SmallVector
<SDValue
, 8> MemOpChains
;
1532 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1533 CCValAssign
&VA
= ArgLocs
[I
];
1534 SDValue ArgValue
= OutVals
[I
];
1536 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
1537 // Store the argument in a stack slot and pass its address.
1538 SDValue SpillSlot
= DAG
.CreateStackTemporary(Outs
[I
].ArgVT
);
1539 int FI
= cast
<FrameIndexSDNode
>(SpillSlot
)->getIndex();
1540 MemOpChains
.push_back(
1541 DAG
.getStore(Chain
, DL
, ArgValue
, SpillSlot
,
1542 MachinePointerInfo::getFixedStack(MF
, FI
)));
1543 // If the original argument was split (e.g. i128), we need
1544 // to store all parts of it here (and pass just one address).
1545 unsigned ArgIndex
= Outs
[I
].OrigArgIndex
;
1546 assert (Outs
[I
].PartOffset
== 0);
1547 while (I
+ 1 != E
&& Outs
[I
+ 1].OrigArgIndex
== ArgIndex
) {
1548 SDValue PartValue
= OutVals
[I
+ 1];
1549 unsigned PartOffset
= Outs
[I
+ 1].PartOffset
;
1550 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, SpillSlot
,
1551 DAG
.getIntPtrConstant(PartOffset
, DL
));
1552 MemOpChains
.push_back(
1553 DAG
.getStore(Chain
, DL
, PartValue
, Address
,
1554 MachinePointerInfo::getFixedStack(MF
, FI
)));
1557 ArgValue
= SpillSlot
;
1559 ArgValue
= convertValVTToLocVT(DAG
, DL
, VA
, ArgValue
);
1562 // Queue up the argument copies and emit them at the end.
1563 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), ArgValue
));
1565 assert(VA
.isMemLoc() && "Argument not register or memory");
1567 // Work out the address of the stack slot. Unpromoted ints and
1568 // floats are passed as right-justified 8-byte values.
1569 if (!StackPtr
.getNode())
1570 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R15D
, PtrVT
);
1571 unsigned Offset
= SystemZMC::CallFrameSize
+ VA
.getLocMemOffset();
1572 if (VA
.getLocVT() == MVT::i32
|| VA
.getLocVT() == MVT::f32
)
1574 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
1575 DAG
.getIntPtrConstant(Offset
, DL
));
1578 MemOpChains
.push_back(
1579 DAG
.getStore(Chain
, DL
, ArgValue
, Address
, MachinePointerInfo()));
1583 // Join the stores, which are independent of one another.
1584 if (!MemOpChains
.empty())
1585 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
1587 // Accept direct calls by converting symbolic call addresses to the
1588 // associated Target* opcodes. Force %r1 to be used for indirect
1591 if (auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1592 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), DL
, PtrVT
);
1593 Callee
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Callee
);
1594 } else if (auto *E
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
1595 Callee
= DAG
.getTargetExternalSymbol(E
->getSymbol(), PtrVT
);
1596 Callee
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Callee
);
1597 } else if (IsTailCall
) {
1598 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R1D
, Callee
, Glue
);
1599 Glue
= Chain
.getValue(1);
1600 Callee
= DAG
.getRegister(SystemZ::R1D
, Callee
.getValueType());
1603 // Build a sequence of copy-to-reg nodes, chained and glued together.
1604 for (unsigned I
= 0, E
= RegsToPass
.size(); I
!= E
; ++I
) {
1605 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegsToPass
[I
].first
,
1606 RegsToPass
[I
].second
, Glue
);
1607 Glue
= Chain
.getValue(1);
1610 // The first call operand is the chain and the second is the target address.
1611 SmallVector
<SDValue
, 8> Ops
;
1612 Ops
.push_back(Chain
);
1613 Ops
.push_back(Callee
);
1615 // Add argument registers to the end of the list so that they are
1616 // known live into the call.
1617 for (unsigned I
= 0, E
= RegsToPass
.size(); I
!= E
; ++I
)
1618 Ops
.push_back(DAG
.getRegister(RegsToPass
[I
].first
,
1619 RegsToPass
[I
].second
.getValueType()));
1621 // Add a register mask operand representing the call-preserved registers.
1622 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
1623 const uint32_t *Mask
= TRI
->getCallPreservedMask(MF
, CallConv
);
1624 assert(Mask
&& "Missing call preserved mask for calling convention");
1625 Ops
.push_back(DAG
.getRegisterMask(Mask
));
1627 // Glue the call to the argument copies, if any.
1629 Ops
.push_back(Glue
);
1632 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1634 return DAG
.getNode(SystemZISD::SIBCALL
, DL
, NodeTys
, Ops
);
1635 Chain
= DAG
.getNode(SystemZISD::CALL
, DL
, NodeTys
, Ops
);
1636 Glue
= Chain
.getValue(1);
1638 // Mark the end of the call, which is glued to the call itself.
1639 Chain
= DAG
.getCALLSEQ_END(Chain
,
1640 DAG
.getConstant(NumBytes
, DL
, PtrVT
, true),
1641 DAG
.getConstant(0, DL
, PtrVT
, true),
1643 Glue
= Chain
.getValue(1);
1645 // Assign locations to each value returned by this call.
1646 SmallVector
<CCValAssign
, 16> RetLocs
;
1647 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RetLocs
, *DAG
.getContext());
1648 RetCCInfo
.AnalyzeCallResult(Ins
, RetCC_SystemZ
);
1650 // Copy all of the result registers out of their specified physreg.
1651 for (unsigned I
= 0, E
= RetLocs
.size(); I
!= E
; ++I
) {
1652 CCValAssign
&VA
= RetLocs
[I
];
1654 // Copy the value out, gluing the copy to the end of the call sequence.
1655 SDValue RetValue
= DAG
.getCopyFromReg(Chain
, DL
, VA
.getLocReg(),
1656 VA
.getLocVT(), Glue
);
1657 Chain
= RetValue
.getValue(1);
1658 Glue
= RetValue
.getValue(2);
1660 // Convert the value of the return register into the value that's
1662 InVals
.push_back(convertLocVTToValVT(DAG
, DL
, VA
, Chain
, RetValue
));
1668 bool SystemZTargetLowering::
1669 CanLowerReturn(CallingConv::ID CallConv
,
1670 MachineFunction
&MF
, bool isVarArg
,
1671 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1672 LLVMContext
&Context
) const {
1673 // Detect unsupported vector return types.
1674 if (Subtarget
.hasVector())
1675 VerifyVectorTypes(Outs
);
1677 // Special case that we cannot easily detect in RetCC_SystemZ since
1678 // i128 is not a legal type.
1679 for (auto &Out
: Outs
)
1680 if (Out
.ArgVT
== MVT::i128
)
1683 SmallVector
<CCValAssign
, 16> RetLocs
;
1684 CCState
RetCCInfo(CallConv
, isVarArg
, MF
, RetLocs
, Context
);
1685 return RetCCInfo
.CheckReturn(Outs
, RetCC_SystemZ
);
1689 SystemZTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
1691 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1692 const SmallVectorImpl
<SDValue
> &OutVals
,
1693 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
1694 MachineFunction
&MF
= DAG
.getMachineFunction();
1696 // Detect unsupported vector return types.
1697 if (Subtarget
.hasVector())
1698 VerifyVectorTypes(Outs
);
1700 // Assign locations to each returned value.
1701 SmallVector
<CCValAssign
, 16> RetLocs
;
1702 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RetLocs
, *DAG
.getContext());
1703 RetCCInfo
.AnalyzeReturn(Outs
, RetCC_SystemZ
);
1705 // Quick exit for void returns
1706 if (RetLocs
.empty())
1707 return DAG
.getNode(SystemZISD::RET_FLAG
, DL
, MVT::Other
, Chain
);
1709 if (CallConv
== CallingConv::GHC
)
1710 report_fatal_error("GHC functions return void only");
1712 // Copy the result values into the output registers.
1714 SmallVector
<SDValue
, 4> RetOps
;
1715 RetOps
.push_back(Chain
);
1716 for (unsigned I
= 0, E
= RetLocs
.size(); I
!= E
; ++I
) {
1717 CCValAssign
&VA
= RetLocs
[I
];
1718 SDValue RetValue
= OutVals
[I
];
1720 // Make the return register live on exit.
1721 assert(VA
.isRegLoc() && "Can only return in registers!");
1723 // Promote the value as required.
1724 RetValue
= convertValVTToLocVT(DAG
, DL
, VA
, RetValue
);
1726 // Chain and glue the copies together.
1727 Register Reg
= VA
.getLocReg();
1728 Chain
= DAG
.getCopyToReg(Chain
, DL
, Reg
, RetValue
, Glue
);
1729 Glue
= Chain
.getValue(1);
1730 RetOps
.push_back(DAG
.getRegister(Reg
, VA
.getLocVT()));
1733 // Update chain and glue.
1736 RetOps
.push_back(Glue
);
1738 return DAG
.getNode(SystemZISD::RET_FLAG
, DL
, MVT::Other
, RetOps
);
1741 // Return true if Op is an intrinsic node with chain that returns the CC value
1742 // as its only (other) argument. Provide the associated SystemZISD opcode and
1743 // the mask of valid CC values if so.
1744 static bool isIntrinsicWithCCAndChain(SDValue Op
, unsigned &Opcode
,
1745 unsigned &CCValid
) {
1746 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
1748 case Intrinsic::s390_tbegin
:
1749 Opcode
= SystemZISD::TBEGIN
;
1750 CCValid
= SystemZ::CCMASK_TBEGIN
;
1753 case Intrinsic::s390_tbegin_nofloat
:
1754 Opcode
= SystemZISD::TBEGIN_NOFLOAT
;
1755 CCValid
= SystemZ::CCMASK_TBEGIN
;
1758 case Intrinsic::s390_tend
:
1759 Opcode
= SystemZISD::TEND
;
1760 CCValid
= SystemZ::CCMASK_TEND
;
1768 // Return true if Op is an intrinsic node without chain that returns the
1769 // CC value as its final argument. Provide the associated SystemZISD
1770 // opcode and the mask of valid CC values if so.
1771 static bool isIntrinsicWithCC(SDValue Op
, unsigned &Opcode
, unsigned &CCValid
) {
1772 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1774 case Intrinsic::s390_vpkshs
:
1775 case Intrinsic::s390_vpksfs
:
1776 case Intrinsic::s390_vpksgs
:
1777 Opcode
= SystemZISD::PACKS_CC
;
1778 CCValid
= SystemZ::CCMASK_VCMP
;
1781 case Intrinsic::s390_vpklshs
:
1782 case Intrinsic::s390_vpklsfs
:
1783 case Intrinsic::s390_vpklsgs
:
1784 Opcode
= SystemZISD::PACKLS_CC
;
1785 CCValid
= SystemZ::CCMASK_VCMP
;
1788 case Intrinsic::s390_vceqbs
:
1789 case Intrinsic::s390_vceqhs
:
1790 case Intrinsic::s390_vceqfs
:
1791 case Intrinsic::s390_vceqgs
:
1792 Opcode
= SystemZISD::VICMPES
;
1793 CCValid
= SystemZ::CCMASK_VCMP
;
1796 case Intrinsic::s390_vchbs
:
1797 case Intrinsic::s390_vchhs
:
1798 case Intrinsic::s390_vchfs
:
1799 case Intrinsic::s390_vchgs
:
1800 Opcode
= SystemZISD::VICMPHS
;
1801 CCValid
= SystemZ::CCMASK_VCMP
;
1804 case Intrinsic::s390_vchlbs
:
1805 case Intrinsic::s390_vchlhs
:
1806 case Intrinsic::s390_vchlfs
:
1807 case Intrinsic::s390_vchlgs
:
1808 Opcode
= SystemZISD::VICMPHLS
;
1809 CCValid
= SystemZ::CCMASK_VCMP
;
1812 case Intrinsic::s390_vtm
:
1813 Opcode
= SystemZISD::VTM
;
1814 CCValid
= SystemZ::CCMASK_VCMP
;
1817 case Intrinsic::s390_vfaebs
:
1818 case Intrinsic::s390_vfaehs
:
1819 case Intrinsic::s390_vfaefs
:
1820 Opcode
= SystemZISD::VFAE_CC
;
1821 CCValid
= SystemZ::CCMASK_ANY
;
1824 case Intrinsic::s390_vfaezbs
:
1825 case Intrinsic::s390_vfaezhs
:
1826 case Intrinsic::s390_vfaezfs
:
1827 Opcode
= SystemZISD::VFAEZ_CC
;
1828 CCValid
= SystemZ::CCMASK_ANY
;
1831 case Intrinsic::s390_vfeebs
:
1832 case Intrinsic::s390_vfeehs
:
1833 case Intrinsic::s390_vfeefs
:
1834 Opcode
= SystemZISD::VFEE_CC
;
1835 CCValid
= SystemZ::CCMASK_ANY
;
1838 case Intrinsic::s390_vfeezbs
:
1839 case Intrinsic::s390_vfeezhs
:
1840 case Intrinsic::s390_vfeezfs
:
1841 Opcode
= SystemZISD::VFEEZ_CC
;
1842 CCValid
= SystemZ::CCMASK_ANY
;
1845 case Intrinsic::s390_vfenebs
:
1846 case Intrinsic::s390_vfenehs
:
1847 case Intrinsic::s390_vfenefs
:
1848 Opcode
= SystemZISD::VFENE_CC
;
1849 CCValid
= SystemZ::CCMASK_ANY
;
1852 case Intrinsic::s390_vfenezbs
:
1853 case Intrinsic::s390_vfenezhs
:
1854 case Intrinsic::s390_vfenezfs
:
1855 Opcode
= SystemZISD::VFENEZ_CC
;
1856 CCValid
= SystemZ::CCMASK_ANY
;
1859 case Intrinsic::s390_vistrbs
:
1860 case Intrinsic::s390_vistrhs
:
1861 case Intrinsic::s390_vistrfs
:
1862 Opcode
= SystemZISD::VISTR_CC
;
1863 CCValid
= SystemZ::CCMASK_0
| SystemZ::CCMASK_3
;
1866 case Intrinsic::s390_vstrcbs
:
1867 case Intrinsic::s390_vstrchs
:
1868 case Intrinsic::s390_vstrcfs
:
1869 Opcode
= SystemZISD::VSTRC_CC
;
1870 CCValid
= SystemZ::CCMASK_ANY
;
1873 case Intrinsic::s390_vstrczbs
:
1874 case Intrinsic::s390_vstrczhs
:
1875 case Intrinsic::s390_vstrczfs
:
1876 Opcode
= SystemZISD::VSTRCZ_CC
;
1877 CCValid
= SystemZ::CCMASK_ANY
;
1880 case Intrinsic::s390_vstrsb
:
1881 case Intrinsic::s390_vstrsh
:
1882 case Intrinsic::s390_vstrsf
:
1883 Opcode
= SystemZISD::VSTRS_CC
;
1884 CCValid
= SystemZ::CCMASK_ANY
;
1887 case Intrinsic::s390_vstrszb
:
1888 case Intrinsic::s390_vstrszh
:
1889 case Intrinsic::s390_vstrszf
:
1890 Opcode
= SystemZISD::VSTRSZ_CC
;
1891 CCValid
= SystemZ::CCMASK_ANY
;
1894 case Intrinsic::s390_vfcedbs
:
1895 case Intrinsic::s390_vfcesbs
:
1896 Opcode
= SystemZISD::VFCMPES
;
1897 CCValid
= SystemZ::CCMASK_VCMP
;
1900 case Intrinsic::s390_vfchdbs
:
1901 case Intrinsic::s390_vfchsbs
:
1902 Opcode
= SystemZISD::VFCMPHS
;
1903 CCValid
= SystemZ::CCMASK_VCMP
;
1906 case Intrinsic::s390_vfchedbs
:
1907 case Intrinsic::s390_vfchesbs
:
1908 Opcode
= SystemZISD::VFCMPHES
;
1909 CCValid
= SystemZ::CCMASK_VCMP
;
1912 case Intrinsic::s390_vftcidb
:
1913 case Intrinsic::s390_vftcisb
:
1914 Opcode
= SystemZISD::VFTCI
;
1915 CCValid
= SystemZ::CCMASK_VCMP
;
1918 case Intrinsic::s390_tdc
:
1919 Opcode
= SystemZISD::TDC
;
1920 CCValid
= SystemZ::CCMASK_TDC
;
1928 // Emit an intrinsic with chain and an explicit CC register result.
1929 static SDNode
*emitIntrinsicWithCCAndChain(SelectionDAG
&DAG
, SDValue Op
,
1931 // Copy all operands except the intrinsic ID.
1932 unsigned NumOps
= Op
.getNumOperands();
1933 SmallVector
<SDValue
, 6> Ops
;
1934 Ops
.reserve(NumOps
- 1);
1935 Ops
.push_back(Op
.getOperand(0));
1936 for (unsigned I
= 2; I
< NumOps
; ++I
)
1937 Ops
.push_back(Op
.getOperand(I
));
1939 assert(Op
->getNumValues() == 2 && "Expected only CC result and chain");
1940 SDVTList RawVTs
= DAG
.getVTList(MVT::i32
, MVT::Other
);
1941 SDValue Intr
= DAG
.getNode(Opcode
, SDLoc(Op
), RawVTs
, Ops
);
1942 SDValue OldChain
= SDValue(Op
.getNode(), 1);
1943 SDValue NewChain
= SDValue(Intr
.getNode(), 1);
1944 DAG
.ReplaceAllUsesOfValueWith(OldChain
, NewChain
);
1945 return Intr
.getNode();
1948 // Emit an intrinsic with an explicit CC register result.
1949 static SDNode
*emitIntrinsicWithCC(SelectionDAG
&DAG
, SDValue Op
,
1951 // Copy all operands except the intrinsic ID.
1952 unsigned NumOps
= Op
.getNumOperands();
1953 SmallVector
<SDValue
, 6> Ops
;
1954 Ops
.reserve(NumOps
- 1);
1955 for (unsigned I
= 1; I
< NumOps
; ++I
)
1956 Ops
.push_back(Op
.getOperand(I
));
1958 SDValue Intr
= DAG
.getNode(Opcode
, SDLoc(Op
), Op
->getVTList(), Ops
);
1959 return Intr
.getNode();
1962 // CC is a comparison that will be implemented using an integer or
1963 // floating-point comparison. Return the condition code mask for
1964 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
1965 // unsigned comparisons and clear for signed ones. In the floating-point
1966 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1967 static unsigned CCMaskForCondCode(ISD::CondCode CC
) {
1969 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1970 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1971 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1975 llvm_unreachable("Invalid integer condition!");
1984 case ISD::SETO
: return SystemZ::CCMASK_CMP_O
;
1985 case ISD::SETUO
: return SystemZ::CCMASK_CMP_UO
;
1990 // If C can be converted to a comparison against zero, adjust the operands
1992 static void adjustZeroCmp(SelectionDAG
&DAG
, const SDLoc
&DL
, Comparison
&C
) {
1993 if (C
.ICmpType
== SystemZICMP::UnsignedOnly
)
1996 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
.getNode());
2000 int64_t Value
= ConstOp1
->getSExtValue();
2001 if ((Value
== -1 && C
.CCMask
== SystemZ::CCMASK_CMP_GT
) ||
2002 (Value
== -1 && C
.CCMask
== SystemZ::CCMASK_CMP_LE
) ||
2003 (Value
== 1 && C
.CCMask
== SystemZ::CCMASK_CMP_LT
) ||
2004 (Value
== 1 && C
.CCMask
== SystemZ::CCMASK_CMP_GE
)) {
2005 C
.CCMask
^= SystemZ::CCMASK_CMP_EQ
;
2006 C
.Op1
= DAG
.getConstant(0, DL
, C
.Op1
.getValueType());
2010 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2011 // adjust the operands as necessary.
2012 static void adjustSubwordCmp(SelectionDAG
&DAG
, const SDLoc
&DL
,
2014 // For us to make any changes, it must a comparison between a single-use
2015 // load and a constant.
2016 if (!C
.Op0
.hasOneUse() ||
2017 C
.Op0
.getOpcode() != ISD::LOAD
||
2018 C
.Op1
.getOpcode() != ISD::Constant
)
2021 // We must have an 8- or 16-bit load.
2022 auto *Load
= cast
<LoadSDNode
>(C
.Op0
);
2023 unsigned NumBits
= Load
->getMemoryVT().getStoreSizeInBits();
2024 if (NumBits
!= 8 && NumBits
!= 16)
2027 // The load must be an extending one and the constant must be within the
2028 // range of the unextended value.
2029 auto *ConstOp1
= cast
<ConstantSDNode
>(C
.Op1
);
2030 uint64_t Value
= ConstOp1
->getZExtValue();
2031 uint64_t Mask
= (1 << NumBits
) - 1;
2032 if (Load
->getExtensionType() == ISD::SEXTLOAD
) {
2033 // Make sure that ConstOp1 is in range of C.Op0.
2034 int64_t SignedValue
= ConstOp1
->getSExtValue();
2035 if (uint64_t(SignedValue
) + (uint64_t(1) << (NumBits
- 1)) > Mask
)
2037 if (C
.ICmpType
!= SystemZICMP::SignedOnly
) {
2038 // Unsigned comparison between two sign-extended values is equivalent
2039 // to unsigned comparison between two zero-extended values.
2041 } else if (NumBits
== 8) {
2042 // Try to treat the comparison as unsigned, so that we can use CLI.
2043 // Adjust CCMask and Value as necessary.
2044 if (Value
== 0 && C
.CCMask
== SystemZ::CCMASK_CMP_LT
)
2045 // Test whether the high bit of the byte is set.
2046 Value
= 127, C
.CCMask
= SystemZ::CCMASK_CMP_GT
;
2047 else if (Value
== 0 && C
.CCMask
== SystemZ::CCMASK_CMP_GE
)
2048 // Test whether the high bit of the byte is clear.
2049 Value
= 128, C
.CCMask
= SystemZ::CCMASK_CMP_LT
;
2051 // No instruction exists for this combination.
2053 C
.ICmpType
= SystemZICMP::UnsignedOnly
;
2055 } else if (Load
->getExtensionType() == ISD::ZEXTLOAD
) {
2058 // If the constant is in range, we can use any comparison.
2059 C
.ICmpType
= SystemZICMP::Any
;
2063 // Make sure that the first operand is an i32 of the right extension type.
2064 ISD::LoadExtType ExtType
= (C
.ICmpType
== SystemZICMP::SignedOnly
?
2067 if (C
.Op0
.getValueType() != MVT::i32
||
2068 Load
->getExtensionType() != ExtType
) {
2069 C
.Op0
= DAG
.getExtLoad(ExtType
, SDLoc(Load
), MVT::i32
, Load
->getChain(),
2070 Load
->getBasePtr(), Load
->getPointerInfo(),
2071 Load
->getMemoryVT(), Load
->getAlignment(),
2072 Load
->getMemOperand()->getFlags());
2073 // Update the chain uses.
2074 DAG
.ReplaceAllUsesOfValueWith(SDValue(Load
, 1), C
.Op0
.getValue(1));
2077 // Make sure that the second operand is an i32 with the right value.
2078 if (C
.Op1
.getValueType() != MVT::i32
||
2079 Value
!= ConstOp1
->getZExtValue())
2080 C
.Op1
= DAG
.getConstant(Value
, DL
, MVT::i32
);
2083 // Return true if Op is either an unextended load, or a load suitable
2084 // for integer register-memory comparisons of type ICmpType.
2085 static bool isNaturalMemoryOperand(SDValue Op
, unsigned ICmpType
) {
2086 auto *Load
= dyn_cast
<LoadSDNode
>(Op
.getNode());
2088 // There are no instructions to compare a register with a memory byte.
2089 if (Load
->getMemoryVT() == MVT::i8
)
2091 // Otherwise decide on extension type.
2092 switch (Load
->getExtensionType()) {
2093 case ISD::NON_EXTLOAD
:
2096 return ICmpType
!= SystemZICMP::UnsignedOnly
;
2098 return ICmpType
!= SystemZICMP::SignedOnly
;
2106 // Return true if it is better to swap the operands of C.
2107 static bool shouldSwapCmpOperands(const Comparison
&C
) {
2108 // Leave f128 comparisons alone, since they have no memory forms.
2109 if (C
.Op0
.getValueType() == MVT::f128
)
2112 // Always keep a floating-point constant second, since comparisons with
2113 // zero can use LOAD TEST and comparisons with other constants make a
2114 // natural memory operand.
2115 if (isa
<ConstantFPSDNode
>(C
.Op1
))
2118 // Never swap comparisons with zero since there are many ways to optimize
2120 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
);
2121 if (ConstOp1
&& ConstOp1
->getZExtValue() == 0)
2124 // Also keep natural memory operands second if the loaded value is
2125 // only used here. Several comparisons have memory forms.
2126 if (isNaturalMemoryOperand(C
.Op1
, C
.ICmpType
) && C
.Op1
.hasOneUse())
2129 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2130 // In that case we generally prefer the memory to be second.
2131 if (isNaturalMemoryOperand(C
.Op0
, C
.ICmpType
) && C
.Op0
.hasOneUse()) {
2132 // The only exceptions are when the second operand is a constant and
2133 // we can use things like CHHSI.
2136 // The unsigned memory-immediate instructions can handle 16-bit
2137 // unsigned integers.
2138 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&&
2139 isUInt
<16>(ConstOp1
->getZExtValue()))
2141 // The signed memory-immediate instructions can handle 16-bit
2143 if (C
.ICmpType
!= SystemZICMP::UnsignedOnly
&&
2144 isInt
<16>(ConstOp1
->getSExtValue()))
2149 // Try to promote the use of CGFR and CLGFR.
2150 unsigned Opcode0
= C
.Op0
.getOpcode();
2151 if (C
.ICmpType
!= SystemZICMP::UnsignedOnly
&& Opcode0
== ISD::SIGN_EXTEND
)
2153 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&& Opcode0
== ISD::ZERO_EXTEND
)
2155 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&&
2156 Opcode0
== ISD::AND
&&
2157 C
.Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
2158 cast
<ConstantSDNode
>(C
.Op0
.getOperand(1))->getZExtValue() == 0xffffffff)
2164 // Return a version of comparison CC mask CCMask in which the LT and GT
2165 // actions are swapped.
2166 static unsigned reverseCCMask(unsigned CCMask
) {
2167 return ((CCMask
& SystemZ::CCMASK_CMP_EQ
) |
2168 (CCMask
& SystemZ::CCMASK_CMP_GT
? SystemZ::CCMASK_CMP_LT
: 0) |
2169 (CCMask
& SystemZ::CCMASK_CMP_LT
? SystemZ::CCMASK_CMP_GT
: 0) |
2170 (CCMask
& SystemZ::CCMASK_CMP_UO
));
2173 // Check whether C tests for equality between X and Y and whether X - Y
2174 // or Y - X is also computed. In that case it's better to compare the
2175 // result of the subtraction against zero.
2176 static void adjustForSubtraction(SelectionDAG
&DAG
, const SDLoc
&DL
,
2178 if (C
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
2179 C
.CCMask
== SystemZ::CCMASK_CMP_NE
) {
2180 for (auto I
= C
.Op0
->use_begin(), E
= C
.Op0
->use_end(); I
!= E
; ++I
) {
2182 if (N
->getOpcode() == ISD::SUB
&&
2183 ((N
->getOperand(0) == C
.Op0
&& N
->getOperand(1) == C
.Op1
) ||
2184 (N
->getOperand(0) == C
.Op1
&& N
->getOperand(1) == C
.Op0
))) {
2185 C
.Op0
= SDValue(N
, 0);
2186 C
.Op1
= DAG
.getConstant(0, DL
, N
->getValueType(0));
2193 // Check whether C compares a floating-point value with zero and if that
2194 // floating-point value is also negated. In this case we can use the
2195 // negation to set CC, so avoiding separate LOAD AND TEST and
2196 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2197 static void adjustForFNeg(Comparison
&C
) {
2198 // This optimization is invalid for strict comparisons, since FNEG
2199 // does not raise any exceptions.
2202 auto *C1
= dyn_cast
<ConstantFPSDNode
>(C
.Op1
);
2203 if (C1
&& C1
->isZero()) {
2204 for (auto I
= C
.Op0
->use_begin(), E
= C
.Op0
->use_end(); I
!= E
; ++I
) {
2206 if (N
->getOpcode() == ISD::FNEG
) {
2207 C
.Op0
= SDValue(N
, 0);
2208 C
.CCMask
= reverseCCMask(C
.CCMask
);
2215 // Check whether C compares (shl X, 32) with 0 and whether X is
2216 // also sign-extended. In that case it is better to test the result
2217 // of the sign extension using LTGFR.
2219 // This case is important because InstCombine transforms a comparison
2220 // with (sext (trunc X)) into a comparison with (shl X, 32).
2221 static void adjustForLTGFR(Comparison
&C
) {
2222 // Check for a comparison between (shl X, 32) and 0.
2223 if (C
.Op0
.getOpcode() == ISD::SHL
&&
2224 C
.Op0
.getValueType() == MVT::i64
&&
2225 C
.Op1
.getOpcode() == ISD::Constant
&&
2226 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2227 auto *C1
= dyn_cast
<ConstantSDNode
>(C
.Op0
.getOperand(1));
2228 if (C1
&& C1
->getZExtValue() == 32) {
2229 SDValue ShlOp0
= C
.Op0
.getOperand(0);
2230 // See whether X has any SIGN_EXTEND_INREG uses.
2231 for (auto I
= ShlOp0
->use_begin(), E
= ShlOp0
->use_end(); I
!= E
; ++I
) {
2233 if (N
->getOpcode() == ISD::SIGN_EXTEND_INREG
&&
2234 cast
<VTSDNode
>(N
->getOperand(1))->getVT() == MVT::i32
) {
2235 C
.Op0
= SDValue(N
, 0);
2243 // If C compares the truncation of an extending load, try to compare
2244 // the untruncated value instead. This exposes more opportunities to
2246 static void adjustICmpTruncate(SelectionDAG
&DAG
, const SDLoc
&DL
,
2248 if (C
.Op0
.getOpcode() == ISD::TRUNCATE
&&
2249 C
.Op0
.getOperand(0).getOpcode() == ISD::LOAD
&&
2250 C
.Op1
.getOpcode() == ISD::Constant
&&
2251 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2252 auto *L
= cast
<LoadSDNode
>(C
.Op0
.getOperand(0));
2253 if (L
->getMemoryVT().getStoreSizeInBits() <= C
.Op0
.getValueSizeInBits()) {
2254 unsigned Type
= L
->getExtensionType();
2255 if ((Type
== ISD::ZEXTLOAD
&& C
.ICmpType
!= SystemZICMP::SignedOnly
) ||
2256 (Type
== ISD::SEXTLOAD
&& C
.ICmpType
!= SystemZICMP::UnsignedOnly
)) {
2257 C
.Op0
= C
.Op0
.getOperand(0);
2258 C
.Op1
= DAG
.getConstant(0, DL
, C
.Op0
.getValueType());
2264 // Return true if shift operation N has an in-range constant shift value.
2265 // Store it in ShiftVal if so.
2266 static bool isSimpleShift(SDValue N
, unsigned &ShiftVal
) {
2267 auto *Shift
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
2271 uint64_t Amount
= Shift
->getZExtValue();
2272 if (Amount
>= N
.getValueSizeInBits())
2279 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2280 // instruction and whether the CC value is descriptive enough to handle
2281 // a comparison of type Opcode between the AND result and CmpVal.
2282 // CCMask says which comparison result is being tested and BitSize is
2283 // the number of bits in the operands. If TEST UNDER MASK can be used,
2284 // return the corresponding CC mask, otherwise return 0.
2285 static unsigned getTestUnderMaskCond(unsigned BitSize
, unsigned CCMask
,
2286 uint64_t Mask
, uint64_t CmpVal
,
2287 unsigned ICmpType
) {
2288 assert(Mask
!= 0 && "ANDs with zero should have been removed by now");
2290 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2291 if (!SystemZ::isImmLL(Mask
) && !SystemZ::isImmLH(Mask
) &&
2292 !SystemZ::isImmHL(Mask
) && !SystemZ::isImmHH(Mask
))
2295 // Work out the masks for the lowest and highest bits.
2296 unsigned HighShift
= 63 - countLeadingZeros(Mask
);
2297 uint64_t High
= uint64_t(1) << HighShift
;
2298 uint64_t Low
= uint64_t(1) << countTrailingZeros(Mask
);
2300 // Signed ordered comparisons are effectively unsigned if the sign
2302 bool EffectivelyUnsigned
= (ICmpType
!= SystemZICMP::SignedOnly
);
2304 // Check for equality comparisons with 0, or the equivalent.
2306 if (CCMask
== SystemZ::CCMASK_CMP_EQ
)
2307 return SystemZ::CCMASK_TM_ALL_0
;
2308 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
2309 return SystemZ::CCMASK_TM_SOME_1
;
2311 if (EffectivelyUnsigned
&& CmpVal
> 0 && CmpVal
<= Low
) {
2312 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2313 return SystemZ::CCMASK_TM_ALL_0
;
2314 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2315 return SystemZ::CCMASK_TM_SOME_1
;
2317 if (EffectivelyUnsigned
&& CmpVal
< Low
) {
2318 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2319 return SystemZ::CCMASK_TM_ALL_0
;
2320 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2321 return SystemZ::CCMASK_TM_SOME_1
;
2324 // Check for equality comparisons with the mask, or the equivalent.
2325 if (CmpVal
== Mask
) {
2326 if (CCMask
== SystemZ::CCMASK_CMP_EQ
)
2327 return SystemZ::CCMASK_TM_ALL_1
;
2328 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
2329 return SystemZ::CCMASK_TM_SOME_0
;
2331 if (EffectivelyUnsigned
&& CmpVal
>= Mask
- Low
&& CmpVal
< Mask
) {
2332 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2333 return SystemZ::CCMASK_TM_ALL_1
;
2334 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2335 return SystemZ::CCMASK_TM_SOME_0
;
2337 if (EffectivelyUnsigned
&& CmpVal
> Mask
- Low
&& CmpVal
<= Mask
) {
2338 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2339 return SystemZ::CCMASK_TM_ALL_1
;
2340 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2341 return SystemZ::CCMASK_TM_SOME_0
;
2344 // Check for ordered comparisons with the top bit.
2345 if (EffectivelyUnsigned
&& CmpVal
>= Mask
- High
&& CmpVal
< High
) {
2346 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2347 return SystemZ::CCMASK_TM_MSB_0
;
2348 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2349 return SystemZ::CCMASK_TM_MSB_1
;
2351 if (EffectivelyUnsigned
&& CmpVal
> Mask
- High
&& CmpVal
<= High
) {
2352 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2353 return SystemZ::CCMASK_TM_MSB_0
;
2354 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2355 return SystemZ::CCMASK_TM_MSB_1
;
2358 // If there are just two bits, we can do equality checks for Low and High
2360 if (Mask
== Low
+ High
) {
2361 if (CCMask
== SystemZ::CCMASK_CMP_EQ
&& CmpVal
== Low
)
2362 return SystemZ::CCMASK_TM_MIXED_MSB_0
;
2363 if (CCMask
== SystemZ::CCMASK_CMP_NE
&& CmpVal
== Low
)
2364 return SystemZ::CCMASK_TM_MIXED_MSB_0
^ SystemZ::CCMASK_ANY
;
2365 if (CCMask
== SystemZ::CCMASK_CMP_EQ
&& CmpVal
== High
)
2366 return SystemZ::CCMASK_TM_MIXED_MSB_1
;
2367 if (CCMask
== SystemZ::CCMASK_CMP_NE
&& CmpVal
== High
)
2368 return SystemZ::CCMASK_TM_MIXED_MSB_1
^ SystemZ::CCMASK_ANY
;
2371 // Looks like we've exhausted our options.
2375 // See whether C can be implemented as a TEST UNDER MASK instruction.
2376 // Update the arguments with the TM version if so.
2377 static void adjustForTestUnderMask(SelectionDAG
&DAG
, const SDLoc
&DL
,
2379 // Check that we have a comparison with a constant.
2380 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
);
2383 uint64_t CmpVal
= ConstOp1
->getZExtValue();
2385 // Check whether the nonconstant input is an AND with a constant mask.
2388 ConstantSDNode
*Mask
= nullptr;
2389 if (C
.Op0
.getOpcode() == ISD::AND
) {
2390 NewC
.Op0
= C
.Op0
.getOperand(0);
2391 NewC
.Op1
= C
.Op0
.getOperand(1);
2392 Mask
= dyn_cast
<ConstantSDNode
>(NewC
.Op1
);
2395 MaskVal
= Mask
->getZExtValue();
2397 // There is no instruction to compare with a 64-bit immediate
2398 // so use TMHH instead if possible. We need an unsigned ordered
2399 // comparison with an i64 immediate.
2400 if (NewC
.Op0
.getValueType() != MVT::i64
||
2401 NewC
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
2402 NewC
.CCMask
== SystemZ::CCMASK_CMP_NE
||
2403 NewC
.ICmpType
== SystemZICMP::SignedOnly
)
2405 // Convert LE and GT comparisons into LT and GE.
2406 if (NewC
.CCMask
== SystemZ::CCMASK_CMP_LE
||
2407 NewC
.CCMask
== SystemZ::CCMASK_CMP_GT
) {
2408 if (CmpVal
== uint64_t(-1))
2411 NewC
.CCMask
^= SystemZ::CCMASK_CMP_EQ
;
2413 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2414 // be masked off without changing the result.
2415 MaskVal
= -(CmpVal
& -CmpVal
);
2416 NewC
.ICmpType
= SystemZICMP::UnsignedOnly
;
2421 // Check whether the combination of mask, comparison value and comparison
2422 // type are suitable.
2423 unsigned BitSize
= NewC
.Op0
.getValueSizeInBits();
2424 unsigned NewCCMask
, ShiftVal
;
2425 if (NewC
.ICmpType
!= SystemZICMP::SignedOnly
&&
2426 NewC
.Op0
.getOpcode() == ISD::SHL
&&
2427 isSimpleShift(NewC
.Op0
, ShiftVal
) &&
2428 (MaskVal
>> ShiftVal
!= 0) &&
2429 ((CmpVal
>> ShiftVal
) << ShiftVal
) == CmpVal
&&
2430 (NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
,
2431 MaskVal
>> ShiftVal
,
2433 SystemZICMP::Any
))) {
2434 NewC
.Op0
= NewC
.Op0
.getOperand(0);
2435 MaskVal
>>= ShiftVal
;
2436 } else if (NewC
.ICmpType
!= SystemZICMP::SignedOnly
&&
2437 NewC
.Op0
.getOpcode() == ISD::SRL
&&
2438 isSimpleShift(NewC
.Op0
, ShiftVal
) &&
2439 (MaskVal
<< ShiftVal
!= 0) &&
2440 ((CmpVal
<< ShiftVal
) >> ShiftVal
) == CmpVal
&&
2441 (NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
,
2442 MaskVal
<< ShiftVal
,
2444 SystemZICMP::UnsignedOnly
))) {
2445 NewC
.Op0
= NewC
.Op0
.getOperand(0);
2446 MaskVal
<<= ShiftVal
;
2448 NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
, MaskVal
, CmpVal
,
2454 // Go ahead and make the change.
2455 C
.Opcode
= SystemZISD::TM
;
2457 if (Mask
&& Mask
->getZExtValue() == MaskVal
)
2458 C
.Op1
= SDValue(Mask
, 0);
2460 C
.Op1
= DAG
.getConstant(MaskVal
, DL
, C
.Op0
.getValueType());
2461 C
.CCValid
= SystemZ::CCMASK_TM
;
2462 C
.CCMask
= NewCCMask
;
2465 // See whether the comparison argument contains a redundant AND
2466 // and remove it if so. This sometimes happens due to the generic
2467 // BRCOND expansion.
2468 static void adjustForRedundantAnd(SelectionDAG
&DAG
, const SDLoc
&DL
,
2470 if (C
.Op0
.getOpcode() != ISD::AND
)
2472 auto *Mask
= dyn_cast
<ConstantSDNode
>(C
.Op0
.getOperand(1));
2475 KnownBits Known
= DAG
.computeKnownBits(C
.Op0
.getOperand(0));
2476 if ((~Known
.Zero
).getZExtValue() & ~Mask
->getZExtValue())
2479 C
.Op0
= C
.Op0
.getOperand(0);
2482 // Return a Comparison that tests the condition-code result of intrinsic
2483 // node Call against constant integer CC using comparison code Cond.
2484 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2485 // and CCValid is the set of possible condition-code results.
2486 static Comparison
getIntrinsicCmp(SelectionDAG
&DAG
, unsigned Opcode
,
2487 SDValue Call
, unsigned CCValid
, uint64_t CC
,
2488 ISD::CondCode Cond
) {
2489 Comparison
C(Call
, SDValue(), SDValue());
2491 C
.CCValid
= CCValid
;
2492 if (Cond
== ISD::SETEQ
)
2493 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2494 C
.CCMask
= CC
< 4 ? 1 << (3 - CC
) : 0;
2495 else if (Cond
== ISD::SETNE
)
2496 // ...and the inverse of that.
2497 C
.CCMask
= CC
< 4 ? ~(1 << (3 - CC
)) : -1;
2498 else if (Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
)
2499 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2500 // always true for CC>3.
2501 C
.CCMask
= CC
< 4 ? ~0U << (4 - CC
) : -1;
2502 else if (Cond
== ISD::SETGE
|| Cond
== ISD::SETUGE
)
2503 // ...and the inverse of that.
2504 C
.CCMask
= CC
< 4 ? ~(~0U << (4 - CC
)) : 0;
2505 else if (Cond
== ISD::SETLE
|| Cond
== ISD::SETULE
)
2506 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2507 // always true for CC>3.
2508 C
.CCMask
= CC
< 4 ? ~0U << (3 - CC
) : -1;
2509 else if (Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
)
2510 // ...and the inverse of that.
2511 C
.CCMask
= CC
< 4 ? ~(~0U << (3 - CC
)) : 0;
2513 llvm_unreachable("Unexpected integer comparison type");
2514 C
.CCMask
&= CCValid
;
2518 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2519 static Comparison
getCmp(SelectionDAG
&DAG
, SDValue CmpOp0
, SDValue CmpOp1
,
2520 ISD::CondCode Cond
, const SDLoc
&DL
,
2521 SDValue Chain
= SDValue(),
2522 bool IsSignaling
= false) {
2523 if (CmpOp1
.getOpcode() == ISD::Constant
) {
2525 uint64_t Constant
= cast
<ConstantSDNode
>(CmpOp1
)->getZExtValue();
2526 unsigned Opcode
, CCValid
;
2527 if (CmpOp0
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
2528 CmpOp0
.getResNo() == 0 && CmpOp0
->hasNUsesOfValue(1, 0) &&
2529 isIntrinsicWithCCAndChain(CmpOp0
, Opcode
, CCValid
))
2530 return getIntrinsicCmp(DAG
, Opcode
, CmpOp0
, CCValid
, Constant
, Cond
);
2531 if (CmpOp0
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
2532 CmpOp0
.getResNo() == CmpOp0
->getNumValues() - 1 &&
2533 isIntrinsicWithCC(CmpOp0
, Opcode
, CCValid
))
2534 return getIntrinsicCmp(DAG
, Opcode
, CmpOp0
, CCValid
, Constant
, Cond
);
2536 Comparison
C(CmpOp0
, CmpOp1
, Chain
);
2537 C
.CCMask
= CCMaskForCondCode(Cond
);
2538 if (C
.Op0
.getValueType().isFloatingPoint()) {
2539 C
.CCValid
= SystemZ::CCMASK_FCMP
;
2541 C
.Opcode
= SystemZISD::FCMP
;
2542 else if (!IsSignaling
)
2543 C
.Opcode
= SystemZISD::STRICT_FCMP
;
2545 C
.Opcode
= SystemZISD::STRICT_FCMPS
;
2549 C
.CCValid
= SystemZ::CCMASK_ICMP
;
2550 C
.Opcode
= SystemZISD::ICMP
;
2551 // Choose the type of comparison. Equality and inequality tests can
2552 // use either signed or unsigned comparisons. The choice also doesn't
2553 // matter if both sign bits are known to be clear. In those cases we
2554 // want to give the main isel code the freedom to choose whichever
2556 if (C
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
2557 C
.CCMask
== SystemZ::CCMASK_CMP_NE
||
2558 (DAG
.SignBitIsZero(C
.Op0
) && DAG
.SignBitIsZero(C
.Op1
)))
2559 C
.ICmpType
= SystemZICMP::Any
;
2560 else if (C
.CCMask
& SystemZ::CCMASK_CMP_UO
)
2561 C
.ICmpType
= SystemZICMP::UnsignedOnly
;
2563 C
.ICmpType
= SystemZICMP::SignedOnly
;
2564 C
.CCMask
&= ~SystemZ::CCMASK_CMP_UO
;
2565 adjustForRedundantAnd(DAG
, DL
, C
);
2566 adjustZeroCmp(DAG
, DL
, C
);
2567 adjustSubwordCmp(DAG
, DL
, C
);
2568 adjustForSubtraction(DAG
, DL
, C
);
2570 adjustICmpTruncate(DAG
, DL
, C
);
2573 if (shouldSwapCmpOperands(C
)) {
2574 std::swap(C
.Op0
, C
.Op1
);
2575 C
.CCMask
= reverseCCMask(C
.CCMask
);
2578 adjustForTestUnderMask(DAG
, DL
, C
);
2582 // Emit the comparison instruction described by C.
2583 static SDValue
emitCmp(SelectionDAG
&DAG
, const SDLoc
&DL
, Comparison
&C
) {
2584 if (!C
.Op1
.getNode()) {
2586 switch (C
.Op0
.getOpcode()) {
2587 case ISD::INTRINSIC_W_CHAIN
:
2588 Node
= emitIntrinsicWithCCAndChain(DAG
, C
.Op0
, C
.Opcode
);
2589 return SDValue(Node
, 0);
2590 case ISD::INTRINSIC_WO_CHAIN
:
2591 Node
= emitIntrinsicWithCC(DAG
, C
.Op0
, C
.Opcode
);
2592 return SDValue(Node
, Node
->getNumValues() - 1);
2594 llvm_unreachable("Invalid comparison operands");
2597 if (C
.Opcode
== SystemZISD::ICMP
)
2598 return DAG
.getNode(SystemZISD::ICMP
, DL
, MVT::i32
, C
.Op0
, C
.Op1
,
2599 DAG
.getTargetConstant(C
.ICmpType
, DL
, MVT::i32
));
2600 if (C
.Opcode
== SystemZISD::TM
) {
2601 bool RegisterOnly
= (bool(C
.CCMask
& SystemZ::CCMASK_TM_MIXED_MSB_0
) !=
2602 bool(C
.CCMask
& SystemZ::CCMASK_TM_MIXED_MSB_1
));
2603 return DAG
.getNode(SystemZISD::TM
, DL
, MVT::i32
, C
.Op0
, C
.Op1
,
2604 DAG
.getTargetConstant(RegisterOnly
, DL
, MVT::i32
));
2607 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::Other
);
2608 return DAG
.getNode(C
.Opcode
, DL
, VTs
, C
.Chain
, C
.Op0
, C
.Op1
);
2610 return DAG
.getNode(C
.Opcode
, DL
, MVT::i32
, C
.Op0
, C
.Op1
);
2613 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2614 // 64 bits. Extend is the extension type to use. Store the high part
2615 // in Hi and the low part in Lo.
2616 static void lowerMUL_LOHI32(SelectionDAG
&DAG
, const SDLoc
&DL
, unsigned Extend
,
2617 SDValue Op0
, SDValue Op1
, SDValue
&Hi
,
2619 Op0
= DAG
.getNode(Extend
, DL
, MVT::i64
, Op0
);
2620 Op1
= DAG
.getNode(Extend
, DL
, MVT::i64
, Op1
);
2621 SDValue Mul
= DAG
.getNode(ISD::MUL
, DL
, MVT::i64
, Op0
, Op1
);
2622 Hi
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Mul
,
2623 DAG
.getConstant(32, DL
, MVT::i64
));
2624 Hi
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Hi
);
2625 Lo
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Mul
);
2628 // Lower a binary operation that produces two VT results, one in each
2629 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2630 // and Opcode performs the GR128 operation. Store the even register result
2631 // in Even and the odd register result in Odd.
2632 static void lowerGR128Binary(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
2633 unsigned Opcode
, SDValue Op0
, SDValue Op1
,
2634 SDValue
&Even
, SDValue
&Odd
) {
2635 SDValue Result
= DAG
.getNode(Opcode
, DL
, MVT::Untyped
, Op0
, Op1
);
2636 bool Is32Bit
= is32Bit(VT
);
2637 Even
= DAG
.getTargetExtractSubreg(SystemZ::even128(Is32Bit
), DL
, VT
, Result
);
2638 Odd
= DAG
.getTargetExtractSubreg(SystemZ::odd128(Is32Bit
), DL
, VT
, Result
);
2641 // Return an i32 value that is 1 if the CC value produced by CCReg is
2642 // in the mask CCMask and 0 otherwise. CC is known to have a value
2643 // in CCValid, so other values can be ignored.
2644 static SDValue
emitSETCC(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue CCReg
,
2645 unsigned CCValid
, unsigned CCMask
) {
2646 SDValue Ops
[] = {DAG
.getConstant(1, DL
, MVT::i32
),
2647 DAG
.getConstant(0, DL
, MVT::i32
),
2648 DAG
.getTargetConstant(CCValid
, DL
, MVT::i32
),
2649 DAG
.getTargetConstant(CCMask
, DL
, MVT::i32
), CCReg
};
2650 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, MVT::i32
, Ops
);
2653 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2654 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2655 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2656 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2657 // floating-point comparisons.
2658 enum class CmpMode
{ Int
, FP
, StrictFP
, SignalingFP
};
2659 static unsigned getVectorComparison(ISD::CondCode CC
, CmpMode Mode
) {
2664 case CmpMode::Int
: return SystemZISD::VICMPE
;
2665 case CmpMode::FP
: return SystemZISD::VFCMPE
;
2666 case CmpMode::StrictFP
: return SystemZISD::STRICT_VFCMPE
;
2667 case CmpMode::SignalingFP
: return SystemZISD::STRICT_VFCMPES
;
2669 llvm_unreachable("Bad mode");
2674 case CmpMode::Int
: return 0;
2675 case CmpMode::FP
: return SystemZISD::VFCMPHE
;
2676 case CmpMode::StrictFP
: return SystemZISD::STRICT_VFCMPHE
;
2677 case CmpMode::SignalingFP
: return SystemZISD::STRICT_VFCMPHES
;
2679 llvm_unreachable("Bad mode");
2684 case CmpMode::Int
: return SystemZISD::VICMPH
;
2685 case CmpMode::FP
: return SystemZISD::VFCMPH
;
2686 case CmpMode::StrictFP
: return SystemZISD::STRICT_VFCMPH
;
2687 case CmpMode::SignalingFP
: return SystemZISD::STRICT_VFCMPHS
;
2689 llvm_unreachable("Bad mode");
2693 case CmpMode::Int
: return SystemZISD::VICMPHL
;
2694 case CmpMode::FP
: return 0;
2695 case CmpMode::StrictFP
: return 0;
2696 case CmpMode::SignalingFP
: return 0;
2698 llvm_unreachable("Bad mode");
2705 // Return the SystemZISD vector comparison operation for CC or its inverse,
2706 // or 0 if neither can be done directly. Indicate in Invert whether the
2707 // result is for the inverse of CC. Mode is as above.
2708 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC
, CmpMode Mode
,
2710 if (unsigned Opcode
= getVectorComparison(CC
, Mode
)) {
2715 CC
= ISD::getSetCCInverse(CC
, Mode
== CmpMode::Int
? MVT::i32
: MVT::f32
);
2716 if (unsigned Opcode
= getVectorComparison(CC
, Mode
)) {
2724 // Return a v2f64 that contains the extended form of elements Start and Start+1
2725 // of v4f32 value Op. If Chain is nonnull, return the strict form.
2726 static SDValue
expandV4F32ToV2F64(SelectionDAG
&DAG
, int Start
, const SDLoc
&DL
,
2727 SDValue Op
, SDValue Chain
) {
2728 int Mask
[] = { Start
, -1, Start
+ 1, -1 };
2729 Op
= DAG
.getVectorShuffle(MVT::v4f32
, DL
, Op
, DAG
.getUNDEF(MVT::v4f32
), Mask
);
2731 SDVTList VTs
= DAG
.getVTList(MVT::v2f64
, MVT::Other
);
2732 return DAG
.getNode(SystemZISD::STRICT_VEXTEND
, DL
, VTs
, Chain
, Op
);
2734 return DAG
.getNode(SystemZISD::VEXTEND
, DL
, MVT::v2f64
, Op
);
2737 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2738 // producing a result of type VT. If Chain is nonnull, return the strict form.
2739 SDValue
SystemZTargetLowering::getVectorCmp(SelectionDAG
&DAG
, unsigned Opcode
,
2740 const SDLoc
&DL
, EVT VT
,
2743 SDValue Chain
) const {
2744 // There is no hardware support for v4f32 (unless we have the vector
2745 // enhancements facility 1), so extend the vector into two v2f64s
2746 // and compare those.
2747 if (CmpOp0
.getValueType() == MVT::v4f32
&&
2748 !Subtarget
.hasVectorEnhancements1()) {
2749 SDValue H0
= expandV4F32ToV2F64(DAG
, 0, DL
, CmpOp0
, Chain
);
2750 SDValue L0
= expandV4F32ToV2F64(DAG
, 2, DL
, CmpOp0
, Chain
);
2751 SDValue H1
= expandV4F32ToV2F64(DAG
, 0, DL
, CmpOp1
, Chain
);
2752 SDValue L1
= expandV4F32ToV2F64(DAG
, 2, DL
, CmpOp1
, Chain
);
2754 SDVTList VTs
= DAG
.getVTList(MVT::v2i64
, MVT::Other
);
2755 SDValue HRes
= DAG
.getNode(Opcode
, DL
, VTs
, Chain
, H0
, H1
);
2756 SDValue LRes
= DAG
.getNode(Opcode
, DL
, VTs
, Chain
, L0
, L1
);
2757 SDValue Res
= DAG
.getNode(SystemZISD::PACK
, DL
, VT
, HRes
, LRes
);
2758 SDValue Chains
[6] = { H0
.getValue(1), L0
.getValue(1),
2759 H1
.getValue(1), L1
.getValue(1),
2760 HRes
.getValue(1), LRes
.getValue(1) };
2761 SDValue NewChain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, Chains
);
2762 SDValue Ops
[2] = { Res
, NewChain
};
2763 return DAG
.getMergeValues(Ops
, DL
);
2765 SDValue HRes
= DAG
.getNode(Opcode
, DL
, MVT::v2i64
, H0
, H1
);
2766 SDValue LRes
= DAG
.getNode(Opcode
, DL
, MVT::v2i64
, L0
, L1
);
2767 return DAG
.getNode(SystemZISD::PACK
, DL
, VT
, HRes
, LRes
);
2770 SDVTList VTs
= DAG
.getVTList(VT
, MVT::Other
);
2771 return DAG
.getNode(Opcode
, DL
, VTs
, Chain
, CmpOp0
, CmpOp1
);
2773 return DAG
.getNode(Opcode
, DL
, VT
, CmpOp0
, CmpOp1
);
2776 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2777 // an integer mask of type VT. If Chain is nonnull, we have a strict
2778 // floating-point comparison. If in addition IsSignaling is true, we have
2779 // a strict signaling floating-point comparison.
2780 SDValue
SystemZTargetLowering::lowerVectorSETCC(SelectionDAG
&DAG
,
2781 const SDLoc
&DL
, EVT VT
,
2786 bool IsSignaling
) const {
2787 bool IsFP
= CmpOp0
.getValueType().isFloatingPoint();
2788 assert (!Chain
|| IsFP
);
2789 assert (!IsSignaling
|| Chain
);
2790 CmpMode Mode
= IsSignaling
? CmpMode::SignalingFP
:
2791 Chain
? CmpMode::StrictFP
: IsFP
? CmpMode::FP
: CmpMode::Int
;
2792 bool Invert
= false;
2795 // Handle tests for order using (or (ogt y x) (oge x y)).
2800 assert(IsFP
&& "Unexpected integer comparison");
2801 SDValue LT
= getVectorCmp(DAG
, getVectorComparison(ISD::SETOGT
, Mode
),
2802 DL
, VT
, CmpOp1
, CmpOp0
, Chain
);
2803 SDValue GE
= getVectorCmp(DAG
, getVectorComparison(ISD::SETOGE
, Mode
),
2804 DL
, VT
, CmpOp0
, CmpOp1
, Chain
);
2805 Cmp
= DAG
.getNode(ISD::OR
, DL
, VT
, LT
, GE
);
2807 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
,
2808 LT
.getValue(1), GE
.getValue(1));
2812 // Handle <> tests using (or (ogt y x) (ogt x y)).
2817 assert(IsFP
&& "Unexpected integer comparison");
2818 SDValue LT
= getVectorCmp(DAG
, getVectorComparison(ISD::SETOGT
, Mode
),
2819 DL
, VT
, CmpOp1
, CmpOp0
, Chain
);
2820 SDValue GT
= getVectorCmp(DAG
, getVectorComparison(ISD::SETOGT
, Mode
),
2821 DL
, VT
, CmpOp0
, CmpOp1
, Chain
);
2822 Cmp
= DAG
.getNode(ISD::OR
, DL
, VT
, LT
, GT
);
2824 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
,
2825 LT
.getValue(1), GT
.getValue(1));
2829 // Otherwise a single comparison is enough. It doesn't really
2830 // matter whether we try the inversion or the swap first, since
2831 // there are no cases where both work.
2833 if (unsigned Opcode
= getVectorComparisonOrInvert(CC
, Mode
, Invert
))
2834 Cmp
= getVectorCmp(DAG
, Opcode
, DL
, VT
, CmpOp0
, CmpOp1
, Chain
);
2836 CC
= ISD::getSetCCSwappedOperands(CC
);
2837 if (unsigned Opcode
= getVectorComparisonOrInvert(CC
, Mode
, Invert
))
2838 Cmp
= getVectorCmp(DAG
, Opcode
, DL
, VT
, CmpOp1
, CmpOp0
, Chain
);
2840 llvm_unreachable("Unhandled comparison");
2843 Chain
= Cmp
.getValue(1);
2848 DAG
.getSplatBuildVector(VT
, DL
, DAG
.getConstant(-1, DL
, MVT::i64
));
2849 Cmp
= DAG
.getNode(ISD::XOR
, DL
, VT
, Cmp
, Mask
);
2851 if (Chain
&& Chain
.getNode() != Cmp
.getNode()) {
2852 SDValue Ops
[2] = { Cmp
, Chain
};
2853 Cmp
= DAG
.getMergeValues(Ops
, DL
);
2858 SDValue
SystemZTargetLowering::lowerSETCC(SDValue Op
,
2859 SelectionDAG
&DAG
) const {
2860 SDValue CmpOp0
= Op
.getOperand(0);
2861 SDValue CmpOp1
= Op
.getOperand(1);
2862 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
2864 EVT VT
= Op
.getValueType();
2866 return lowerVectorSETCC(DAG
, DL
, VT
, CC
, CmpOp0
, CmpOp1
);
2868 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2869 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2870 return emitSETCC(DAG
, DL
, CCReg
, C
.CCValid
, C
.CCMask
);
2873 SDValue
SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op
,
2875 bool IsSignaling
) const {
2876 SDValue Chain
= Op
.getOperand(0);
2877 SDValue CmpOp0
= Op
.getOperand(1);
2878 SDValue CmpOp1
= Op
.getOperand(2);
2879 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(3))->get();
2881 EVT VT
= Op
.getNode()->getValueType(0);
2882 if (VT
.isVector()) {
2883 SDValue Res
= lowerVectorSETCC(DAG
, DL
, VT
, CC
, CmpOp0
, CmpOp1
,
2884 Chain
, IsSignaling
);
2885 return Res
.getValue(Op
.getResNo());
2888 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
, Chain
, IsSignaling
));
2889 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2890 CCReg
->setFlags(Op
->getFlags());
2891 SDValue Result
= emitSETCC(DAG
, DL
, CCReg
, C
.CCValid
, C
.CCMask
);
2892 SDValue Ops
[2] = { Result
, CCReg
.getValue(1) };
2893 return DAG
.getMergeValues(Ops
, DL
);
2896 SDValue
SystemZTargetLowering::lowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const {
2897 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(1))->get();
2898 SDValue CmpOp0
= Op
.getOperand(2);
2899 SDValue CmpOp1
= Op
.getOperand(3);
2900 SDValue Dest
= Op
.getOperand(4);
2903 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2904 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2906 SystemZISD::BR_CCMASK
, DL
, Op
.getValueType(), Op
.getOperand(0),
2907 DAG
.getTargetConstant(C
.CCValid
, DL
, MVT::i32
),
2908 DAG
.getTargetConstant(C
.CCMask
, DL
, MVT::i32
), Dest
, CCReg
);
2911 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2912 // allowing Pos and Neg to be wider than CmpOp.
2913 static bool isAbsolute(SDValue CmpOp
, SDValue Pos
, SDValue Neg
) {
2914 return (Neg
.getOpcode() == ISD::SUB
&&
2915 Neg
.getOperand(0).getOpcode() == ISD::Constant
&&
2916 cast
<ConstantSDNode
>(Neg
.getOperand(0))->getZExtValue() == 0 &&
2917 Neg
.getOperand(1) == Pos
&&
2919 (Pos
.getOpcode() == ISD::SIGN_EXTEND
&&
2920 Pos
.getOperand(0) == CmpOp
)));
2923 // Return the absolute or negative absolute of Op; IsNegative decides which.
2924 static SDValue
getAbsolute(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Op
,
2926 Op
= DAG
.getNode(SystemZISD::IABS
, DL
, Op
.getValueType(), Op
);
2928 Op
= DAG
.getNode(ISD::SUB
, DL
, Op
.getValueType(),
2929 DAG
.getConstant(0, DL
, Op
.getValueType()), Op
);
2933 SDValue
SystemZTargetLowering::lowerSELECT_CC(SDValue Op
,
2934 SelectionDAG
&DAG
) const {
2935 SDValue CmpOp0
= Op
.getOperand(0);
2936 SDValue CmpOp1
= Op
.getOperand(1);
2937 SDValue TrueOp
= Op
.getOperand(2);
2938 SDValue FalseOp
= Op
.getOperand(3);
2939 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
2942 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2944 // Check for absolute and negative-absolute selections, including those
2945 // where the comparison value is sign-extended (for LPGFR and LNGFR).
2946 // This check supplements the one in DAGCombiner.
2947 if (C
.Opcode
== SystemZISD::ICMP
&&
2948 C
.CCMask
!= SystemZ::CCMASK_CMP_EQ
&&
2949 C
.CCMask
!= SystemZ::CCMASK_CMP_NE
&&
2950 C
.Op1
.getOpcode() == ISD::Constant
&&
2951 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2952 if (isAbsolute(C
.Op0
, TrueOp
, FalseOp
))
2953 return getAbsolute(DAG
, DL
, TrueOp
, C
.CCMask
& SystemZ::CCMASK_CMP_LT
);
2954 if (isAbsolute(C
.Op0
, FalseOp
, TrueOp
))
2955 return getAbsolute(DAG
, DL
, FalseOp
, C
.CCMask
& SystemZ::CCMASK_CMP_GT
);
2958 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2959 SDValue Ops
[] = {TrueOp
, FalseOp
,
2960 DAG
.getTargetConstant(C
.CCValid
, DL
, MVT::i32
),
2961 DAG
.getTargetConstant(C
.CCMask
, DL
, MVT::i32
), CCReg
};
2963 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, Op
.getValueType(), Ops
);
2966 SDValue
SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode
*Node
,
2967 SelectionDAG
&DAG
) const {
2969 const GlobalValue
*GV
= Node
->getGlobal();
2970 int64_t Offset
= Node
->getOffset();
2971 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2972 CodeModel::Model CM
= DAG
.getTarget().getCodeModel();
2975 if (Subtarget
.isPC32DBLSymbol(GV
, CM
)) {
2976 if (isInt
<32>(Offset
)) {
2977 // Assign anchors at 1<<12 byte boundaries.
2978 uint64_t Anchor
= Offset
& ~uint64_t(0xfff);
2979 Result
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, Anchor
);
2980 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
2982 // The offset can be folded into the address if it is aligned to a
2985 if (Offset
!= 0 && (Offset
& 1) == 0) {
2987 DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, Anchor
+ Offset
);
2988 Result
= DAG
.getNode(SystemZISD::PCREL_OFFSET
, DL
, PtrVT
, Full
, Result
);
2992 // Conservatively load a constant offset greater than 32 bits into a
2994 Result
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
);
2995 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
2998 Result
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0, SystemZII::MO_GOT
);
2999 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
3000 Result
= DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Result
,
3001 MachinePointerInfo::getGOT(DAG
.getMachineFunction()));
3004 // If there was a non-zero offset that we didn't fold, create an explicit
3007 Result
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Result
,
3008 DAG
.getConstant(Offset
, DL
, PtrVT
));
3013 SDValue
SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode
*Node
,
3016 SDValue GOTOffset
) const {
3018 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3019 SDValue Chain
= DAG
.getEntryNode();
3022 if (DAG
.getMachineFunction().getFunction().getCallingConv() ==
3024 report_fatal_error("In GHC calling convention TLS is not supported");
3026 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3027 SDValue GOT
= DAG
.getGLOBAL_OFFSET_TABLE(PtrVT
);
3028 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R12D
, GOT
, Glue
);
3029 Glue
= Chain
.getValue(1);
3030 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R2D
, GOTOffset
, Glue
);
3031 Glue
= Chain
.getValue(1);
3033 // The first call operand is the chain and the second is the TLS symbol.
3034 SmallVector
<SDValue
, 8> Ops
;
3035 Ops
.push_back(Chain
);
3036 Ops
.push_back(DAG
.getTargetGlobalAddress(Node
->getGlobal(), DL
,
3037 Node
->getValueType(0),
3040 // Add argument registers to the end of the list so that they are
3041 // known live into the call.
3042 Ops
.push_back(DAG
.getRegister(SystemZ::R2D
, PtrVT
));
3043 Ops
.push_back(DAG
.getRegister(SystemZ::R12D
, PtrVT
));
3045 // Add a register mask operand representing the call-preserved registers.
3046 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
3047 const uint32_t *Mask
=
3048 TRI
->getCallPreservedMask(DAG
.getMachineFunction(), CallingConv::C
);
3049 assert(Mask
&& "Missing call preserved mask for calling convention");
3050 Ops
.push_back(DAG
.getRegisterMask(Mask
));
3052 // Glue the call to the argument copies.
3053 Ops
.push_back(Glue
);
3056 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
3057 Chain
= DAG
.getNode(Opcode
, DL
, NodeTys
, Ops
);
3058 Glue
= Chain
.getValue(1);
3060 // Copy the return value from %r2.
3061 return DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R2D
, PtrVT
, Glue
);
3064 SDValue
SystemZTargetLowering::lowerThreadPointer(const SDLoc
&DL
,
3065 SelectionDAG
&DAG
) const {
3066 SDValue Chain
= DAG
.getEntryNode();
3067 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3069 // The high part of the thread pointer is in access register 0.
3070 SDValue TPHi
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::A0
, MVT::i32
);
3071 TPHi
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, PtrVT
, TPHi
);
3073 // The low part of the thread pointer is in access register 1.
3074 SDValue TPLo
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::A1
, MVT::i32
);
3075 TPLo
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, PtrVT
, TPLo
);
3077 // Merge them into a single 64-bit address.
3078 SDValue TPHiShifted
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, TPHi
,
3079 DAG
.getConstant(32, DL
, PtrVT
));
3080 return DAG
.getNode(ISD::OR
, DL
, PtrVT
, TPHiShifted
, TPLo
);
3083 SDValue
SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode
*Node
,
3084 SelectionDAG
&DAG
) const {
3085 if (DAG
.getTarget().useEmulatedTLS())
3086 return LowerToTLSEmulatedModel(Node
, DAG
);
3088 const GlobalValue
*GV
= Node
->getGlobal();
3089 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3090 TLSModel::Model model
= DAG
.getTarget().getTLSModel(GV
);
3092 if (DAG
.getMachineFunction().getFunction().getCallingConv() ==
3094 report_fatal_error("In GHC calling convention TLS is not supported");
3096 SDValue TP
= lowerThreadPointer(DL
, DAG
);
3098 // Get the offset of GA from the thread pointer, based on the TLS model.
3101 case TLSModel::GeneralDynamic
: {
3102 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3103 SystemZConstantPoolValue
*CPV
=
3104 SystemZConstantPoolValue::Create(GV
, SystemZCP::TLSGD
);
3106 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
3107 Offset
= DAG
.getLoad(
3108 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
3109 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
3111 // Call __tls_get_offset to retrieve the offset.
3112 Offset
= lowerTLSGetOffset(Node
, DAG
, SystemZISD::TLS_GDCALL
, Offset
);
3116 case TLSModel::LocalDynamic
: {
3117 // Load the GOT offset of the module ID.
3118 SystemZConstantPoolValue
*CPV
=
3119 SystemZConstantPoolValue::Create(GV
, SystemZCP::TLSLDM
);
3121 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
3122 Offset
= DAG
.getLoad(
3123 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
3124 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
3126 // Call __tls_get_offset to retrieve the module base offset.
3127 Offset
= lowerTLSGetOffset(Node
, DAG
, SystemZISD::TLS_LDCALL
, Offset
);
3129 // Note: The SystemZLDCleanupPass will remove redundant computations
3130 // of the module base offset. Count total number of local-dynamic
3131 // accesses to trigger execution of that pass.
3132 SystemZMachineFunctionInfo
* MFI
=
3133 DAG
.getMachineFunction().getInfo
<SystemZMachineFunctionInfo
>();
3134 MFI
->incNumLocalDynamicTLSAccesses();
3136 // Add the per-symbol offset.
3137 CPV
= SystemZConstantPoolValue::Create(GV
, SystemZCP::DTPOFF
);
3139 SDValue DTPOffset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
3140 DTPOffset
= DAG
.getLoad(
3141 PtrVT
, DL
, DAG
.getEntryNode(), DTPOffset
,
3142 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
3144 Offset
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Offset
, DTPOffset
);
3148 case TLSModel::InitialExec
: {
3149 // Load the offset from the GOT.
3150 Offset
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0,
3151 SystemZII::MO_INDNTPOFF
);
3152 Offset
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Offset
);
3154 DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
3155 MachinePointerInfo::getGOT(DAG
.getMachineFunction()));
3159 case TLSModel::LocalExec
: {
3160 // Force the offset into the constant pool and load it from there.
3161 SystemZConstantPoolValue
*CPV
=
3162 SystemZConstantPoolValue::Create(GV
, SystemZCP::NTPOFF
);
3164 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
3165 Offset
= DAG
.getLoad(
3166 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
3167 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
3172 // Add the base and offset together.
3173 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, TP
, Offset
);
3176 SDValue
SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode
*Node
,
3177 SelectionDAG
&DAG
) const {
3179 const BlockAddress
*BA
= Node
->getBlockAddress();
3180 int64_t Offset
= Node
->getOffset();
3181 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3183 SDValue Result
= DAG
.getTargetBlockAddress(BA
, PtrVT
, Offset
);
3184 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
3188 SDValue
SystemZTargetLowering::lowerJumpTable(JumpTableSDNode
*JT
,
3189 SelectionDAG
&DAG
) const {
3191 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3192 SDValue Result
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
3194 // Use LARL to load the address of the table.
3195 return DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
3198 SDValue
SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode
*CP
,
3199 SelectionDAG
&DAG
) const {
3201 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3204 if (CP
->isMachineConstantPoolEntry())
3205 Result
= DAG
.getTargetConstantPool(CP
->getMachineCPVal(), PtrVT
,
3206 CP
->getAlignment());
3208 Result
= DAG
.getTargetConstantPool(CP
->getConstVal(), PtrVT
,
3209 CP
->getAlignment(), CP
->getOffset());
3211 // Use LARL to load the address of the constant pool entry.
3212 return DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
3215 SDValue
SystemZTargetLowering::lowerFRAMEADDR(SDValue Op
,
3216 SelectionDAG
&DAG
) const {
3217 MachineFunction
&MF
= DAG
.getMachineFunction();
3218 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3219 MFI
.setFrameAddressIsTaken(true);
3222 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
3223 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3225 // By definition, the frame address is the address of the back chain.
3227 static_cast<const SystemZFrameLowering
*>(Subtarget
.getFrameLowering());
3228 int BackChainIdx
= TFL
->getOrCreateFramePointerSaveIndex(MF
);
3229 SDValue BackChain
= DAG
.getFrameIndex(BackChainIdx
, PtrVT
);
3231 // FIXME The frontend should detect this case.
3233 report_fatal_error("Unsupported stack frame traversal count");
3239 SDValue
SystemZTargetLowering::lowerRETURNADDR(SDValue Op
,
3240 SelectionDAG
&DAG
) const {
3241 MachineFunction
&MF
= DAG
.getMachineFunction();
3242 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3243 MFI
.setReturnAddressIsTaken(true);
3245 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
3249 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
3250 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3252 // FIXME The frontend should detect this case.
3254 report_fatal_error("Unsupported stack frame traversal count");
3257 // Return R14D, which has the return address. Mark it an implicit live-in.
3258 unsigned LinkReg
= MF
.addLiveIn(SystemZ::R14D
, &SystemZ::GR64BitRegClass
);
3259 return DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, LinkReg
, PtrVT
);
3262 SDValue
SystemZTargetLowering::lowerBITCAST(SDValue Op
,
3263 SelectionDAG
&DAG
) const {
3265 SDValue In
= Op
.getOperand(0);
3266 EVT InVT
= In
.getValueType();
3267 EVT ResVT
= Op
.getValueType();
3269 // Convert loads directly. This is normally done by DAGCombiner,
3270 // but we need this case for bitcasts that are created during lowering
3271 // and which are then lowered themselves.
3272 if (auto *LoadN
= dyn_cast
<LoadSDNode
>(In
))
3273 if (ISD::isNormalLoad(LoadN
)) {
3274 SDValue NewLoad
= DAG
.getLoad(ResVT
, DL
, LoadN
->getChain(),
3275 LoadN
->getBasePtr(), LoadN
->getMemOperand());
3276 // Update the chain uses.
3277 DAG
.ReplaceAllUsesOfValueWith(SDValue(LoadN
, 1), NewLoad
.getValue(1));
3281 if (InVT
== MVT::i32
&& ResVT
== MVT::f32
) {
3283 if (Subtarget
.hasHighWord()) {
3284 SDNode
*U64
= DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
,
3286 In64
= DAG
.getTargetInsertSubreg(SystemZ::subreg_h32
, DL
,
3287 MVT::i64
, SDValue(U64
, 0), In
);
3289 In64
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, In
);
3290 In64
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, In64
,
3291 DAG
.getConstant(32, DL
, MVT::i64
));
3293 SDValue Out64
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::f64
, In64
);
3294 return DAG
.getTargetExtractSubreg(SystemZ::subreg_h32
,
3295 DL
, MVT::f32
, Out64
);
3297 if (InVT
== MVT::f32
&& ResVT
== MVT::i32
) {
3298 SDNode
*U64
= DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MVT::f64
);
3299 SDValue In64
= DAG
.getTargetInsertSubreg(SystemZ::subreg_h32
, DL
,
3300 MVT::f64
, SDValue(U64
, 0), In
);
3301 SDValue Out64
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i64
, In64
);
3302 if (Subtarget
.hasHighWord())
3303 return DAG
.getTargetExtractSubreg(SystemZ::subreg_h32
, DL
,
3305 SDValue Shift
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Out64
,
3306 DAG
.getConstant(32, DL
, MVT::i64
));
3307 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Shift
);
3309 llvm_unreachable("Unexpected bitcast combination");
3312 SDValue
SystemZTargetLowering::lowerVASTART(SDValue Op
,
3313 SelectionDAG
&DAG
) const {
3314 MachineFunction
&MF
= DAG
.getMachineFunction();
3315 SystemZMachineFunctionInfo
*FuncInfo
=
3316 MF
.getInfo
<SystemZMachineFunctionInfo
>();
3317 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3319 SDValue Chain
= Op
.getOperand(0);
3320 SDValue Addr
= Op
.getOperand(1);
3321 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
3324 // The initial values of each field.
3325 const unsigned NumFields
= 4;
3326 SDValue Fields
[NumFields
] = {
3327 DAG
.getConstant(FuncInfo
->getVarArgsFirstGPR(), DL
, PtrVT
),
3328 DAG
.getConstant(FuncInfo
->getVarArgsFirstFPR(), DL
, PtrVT
),
3329 DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
),
3330 DAG
.getFrameIndex(FuncInfo
->getRegSaveFrameIndex(), PtrVT
)
3333 // Store each field into its respective slot.
3334 SDValue MemOps
[NumFields
];
3335 unsigned Offset
= 0;
3336 for (unsigned I
= 0; I
< NumFields
; ++I
) {
3337 SDValue FieldAddr
= Addr
;
3339 FieldAddr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, FieldAddr
,
3340 DAG
.getIntPtrConstant(Offset
, DL
));
3341 MemOps
[I
] = DAG
.getStore(Chain
, DL
, Fields
[I
], FieldAddr
,
3342 MachinePointerInfo(SV
, Offset
));
3345 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOps
);
3348 SDValue
SystemZTargetLowering::lowerVACOPY(SDValue Op
,
3349 SelectionDAG
&DAG
) const {
3350 SDValue Chain
= Op
.getOperand(0);
3351 SDValue DstPtr
= Op
.getOperand(1);
3352 SDValue SrcPtr
= Op
.getOperand(2);
3353 const Value
*DstSV
= cast
<SrcValueSDNode
>(Op
.getOperand(3))->getValue();
3354 const Value
*SrcSV
= cast
<SrcValueSDNode
>(Op
.getOperand(4))->getValue();
3357 return DAG
.getMemcpy(Chain
, DL
, DstPtr
, SrcPtr
, DAG
.getIntPtrConstant(32, DL
),
3358 /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
3359 /*isTailCall*/false,
3360 MachinePointerInfo(DstSV
), MachinePointerInfo(SrcSV
));
3363 SDValue
SystemZTargetLowering::
3364 lowerDYNAMIC_STACKALLOC(SDValue Op
, SelectionDAG
&DAG
) const {
3365 const TargetFrameLowering
*TFI
= Subtarget
.getFrameLowering();
3366 MachineFunction
&MF
= DAG
.getMachineFunction();
3367 bool RealignOpt
= !MF
.getFunction().hasFnAttribute("no-realign-stack");
3368 bool StoreBackchain
= MF
.getFunction().hasFnAttribute("backchain");
3370 SDValue Chain
= Op
.getOperand(0);
3371 SDValue Size
= Op
.getOperand(1);
3372 SDValue Align
= Op
.getOperand(2);
3375 // If user has set the no alignment function attribute, ignore
3376 // alloca alignments.
3377 uint64_t AlignVal
= (RealignOpt
?
3378 dyn_cast
<ConstantSDNode
>(Align
)->getZExtValue() : 0);
3380 uint64_t StackAlign
= TFI
->getStackAlignment();
3381 uint64_t RequiredAlign
= std::max(AlignVal
, StackAlign
);
3382 uint64_t ExtraAlignSpace
= RequiredAlign
- StackAlign
;
3384 unsigned SPReg
= getStackPointerRegisterToSaveRestore();
3385 SDValue NeededSpace
= Size
;
3387 // Get a reference to the stack pointer.
3388 SDValue OldSP
= DAG
.getCopyFromReg(Chain
, DL
, SPReg
, MVT::i64
);
3390 // If we need a backchain, save it now.
3393 Backchain
= DAG
.getLoad(MVT::i64
, DL
, Chain
, OldSP
, MachinePointerInfo());
3395 // Add extra space for alignment if needed.
3396 if (ExtraAlignSpace
)
3397 NeededSpace
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, NeededSpace
,
3398 DAG
.getConstant(ExtraAlignSpace
, DL
, MVT::i64
));
3400 // Get the new stack pointer value.
3401 SDValue NewSP
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, OldSP
, NeededSpace
);
3403 // Copy the new stack pointer back.
3404 Chain
= DAG
.getCopyToReg(Chain
, DL
, SPReg
, NewSP
);
3406 // The allocated data lives above the 160 bytes allocated for the standard
3407 // frame, plus any outgoing stack arguments. We don't know how much that
3408 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3409 SDValue ArgAdjust
= DAG
.getNode(SystemZISD::ADJDYNALLOC
, DL
, MVT::i64
);
3410 SDValue Result
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, NewSP
, ArgAdjust
);
3412 // Dynamically realign if needed.
3413 if (RequiredAlign
> StackAlign
) {
3415 DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Result
,
3416 DAG
.getConstant(ExtraAlignSpace
, DL
, MVT::i64
));
3418 DAG
.getNode(ISD::AND
, DL
, MVT::i64
, Result
,
3419 DAG
.getConstant(~(RequiredAlign
- 1), DL
, MVT::i64
));
3423 Chain
= DAG
.getStore(Chain
, DL
, Backchain
, NewSP
, MachinePointerInfo());
3425 SDValue Ops
[2] = { Result
, Chain
};
3426 return DAG
.getMergeValues(Ops
, DL
);
3429 SDValue
SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3430 SDValue Op
, SelectionDAG
&DAG
) const {
3433 return DAG
.getNode(SystemZISD::ADJDYNALLOC
, DL
, MVT::i64
);
3436 SDValue
SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op
,
3437 SelectionDAG
&DAG
) const {
3438 EVT VT
= Op
.getValueType();
3442 // Just do a normal 64-bit multiplication and extract the results.
3443 // We define this so that it can be used for constant division.
3444 lowerMUL_LOHI32(DAG
, DL
, ISD::SIGN_EXTEND
, Op
.getOperand(0),
3445 Op
.getOperand(1), Ops
[1], Ops
[0]);
3446 else if (Subtarget
.hasMiscellaneousExtensions2())
3447 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3448 // the high result in the even register. ISD::SMUL_LOHI is defined to
3449 // return the low half first, so the results are in reverse order.
3450 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::SMUL_LOHI
,
3451 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3453 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3455 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3457 // but using the fact that the upper halves are either all zeros
3460 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3462 // and grouping the right terms together since they are quicker than the
3465 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3466 SDValue C63
= DAG
.getConstant(63, DL
, MVT::i64
);
3467 SDValue LL
= Op
.getOperand(0);
3468 SDValue RL
= Op
.getOperand(1);
3469 SDValue LH
= DAG
.getNode(ISD::SRA
, DL
, VT
, LL
, C63
);
3470 SDValue RH
= DAG
.getNode(ISD::SRA
, DL
, VT
, RL
, C63
);
3471 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3472 // the high result in the even register. ISD::SMUL_LOHI is defined to
3473 // return the low half first, so the results are in reverse order.
3474 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UMUL_LOHI
,
3475 LL
, RL
, Ops
[1], Ops
[0]);
3476 SDValue NegLLTimesRH
= DAG
.getNode(ISD::AND
, DL
, VT
, LL
, RH
);
3477 SDValue NegLHTimesRL
= DAG
.getNode(ISD::AND
, DL
, VT
, LH
, RL
);
3478 SDValue NegSum
= DAG
.getNode(ISD::ADD
, DL
, VT
, NegLLTimesRH
, NegLHTimesRL
);
3479 Ops
[1] = DAG
.getNode(ISD::SUB
, DL
, VT
, Ops
[1], NegSum
);
3481 return DAG
.getMergeValues(Ops
, DL
);
3484 SDValue
SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op
,
3485 SelectionDAG
&DAG
) const {
3486 EVT VT
= Op
.getValueType();
3490 // Just do a normal 64-bit multiplication and extract the results.
3491 // We define this so that it can be used for constant division.
3492 lowerMUL_LOHI32(DAG
, DL
, ISD::ZERO_EXTEND
, Op
.getOperand(0),
3493 Op
.getOperand(1), Ops
[1], Ops
[0]);
3495 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3496 // the high result in the even register. ISD::UMUL_LOHI is defined to
3497 // return the low half first, so the results are in reverse order.
3498 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UMUL_LOHI
,
3499 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3500 return DAG
.getMergeValues(Ops
, DL
);
3503 SDValue
SystemZTargetLowering::lowerSDIVREM(SDValue Op
,
3504 SelectionDAG
&DAG
) const {
3505 SDValue Op0
= Op
.getOperand(0);
3506 SDValue Op1
= Op
.getOperand(1);
3507 EVT VT
= Op
.getValueType();
3510 // We use DSGF for 32-bit division. This means the first operand must
3511 // always be 64-bit, and the second operand should be 32-bit whenever
3512 // that is possible, to improve performance.
3514 Op0
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, Op0
);
3515 else if (DAG
.ComputeNumSignBits(Op1
) > 32)
3516 Op1
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Op1
);
3518 // DSG(F) returns the remainder in the even register and the
3519 // quotient in the odd register.
3521 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::SDIVREM
, Op0
, Op1
, Ops
[1], Ops
[0]);
3522 return DAG
.getMergeValues(Ops
, DL
);
3525 SDValue
SystemZTargetLowering::lowerUDIVREM(SDValue Op
,
3526 SelectionDAG
&DAG
) const {
3527 EVT VT
= Op
.getValueType();
3530 // DL(G) returns the remainder in the even register and the
3531 // quotient in the odd register.
3533 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UDIVREM
,
3534 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3535 return DAG
.getMergeValues(Ops
, DL
);
3538 SDValue
SystemZTargetLowering::lowerOR(SDValue Op
, SelectionDAG
&DAG
) const {
3539 assert(Op
.getValueType() == MVT::i64
&& "Should be 64-bit operation");
3541 // Get the known-zero masks for each operand.
3542 SDValue Ops
[] = {Op
.getOperand(0), Op
.getOperand(1)};
3543 KnownBits Known
[2] = {DAG
.computeKnownBits(Ops
[0]),
3544 DAG
.computeKnownBits(Ops
[1])};
3546 // See if the upper 32 bits of one operand and the lower 32 bits of the
3547 // other are known zero. They are the low and high operands respectively.
3548 uint64_t Masks
[] = { Known
[0].Zero
.getZExtValue(),
3549 Known
[1].Zero
.getZExtValue() };
3551 if ((Masks
[0] >> 32) == 0xffffffff && uint32_t(Masks
[1]) == 0xffffffff)
3553 else if ((Masks
[1] >> 32) == 0xffffffff && uint32_t(Masks
[0]) == 0xffffffff)
3558 SDValue LowOp
= Ops
[Low
];
3559 SDValue HighOp
= Ops
[High
];
3561 // If the high part is a constant, we're better off using IILH.
3562 if (HighOp
.getOpcode() == ISD::Constant
)
3565 // If the low part is a constant that is outside the range of LHI,
3566 // then we're better off using IILF.
3567 if (LowOp
.getOpcode() == ISD::Constant
) {
3568 int64_t Value
= int32_t(cast
<ConstantSDNode
>(LowOp
)->getZExtValue());
3569 if (!isInt
<16>(Value
))
3573 // Check whether the high part is an AND that doesn't change the
3574 // high 32 bits and just masks out low bits. We can skip it if so.
3575 if (HighOp
.getOpcode() == ISD::AND
&&
3576 HighOp
.getOperand(1).getOpcode() == ISD::Constant
) {
3577 SDValue HighOp0
= HighOp
.getOperand(0);
3578 uint64_t Mask
= cast
<ConstantSDNode
>(HighOp
.getOperand(1))->getZExtValue();
3579 if (DAG
.MaskedValueIsZero(HighOp0
, APInt(64, ~(Mask
| 0xffffffff))))
3583 // Take advantage of the fact that all GR32 operations only change the
3584 // low 32 bits by truncating Low to an i32 and inserting it directly
3585 // using a subreg. The interesting cases are those where the truncation
3588 SDValue Low32
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, LowOp
);
3589 return DAG
.getTargetInsertSubreg(SystemZ::subreg_l32
, DL
,
3590 MVT::i64
, HighOp
, Low32
);
3593 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3594 SDValue
SystemZTargetLowering::lowerXALUO(SDValue Op
,
3595 SelectionDAG
&DAG
) const {
3596 SDNode
*N
= Op
.getNode();
3597 SDValue LHS
= N
->getOperand(0);
3598 SDValue RHS
= N
->getOperand(1);
3600 unsigned BaseOp
= 0;
3601 unsigned CCValid
= 0;
3602 unsigned CCMask
= 0;
3604 switch (Op
.getOpcode()) {
3605 default: llvm_unreachable("Unknown instruction!");
3607 BaseOp
= SystemZISD::SADDO
;
3608 CCValid
= SystemZ::CCMASK_ARITH
;
3609 CCMask
= SystemZ::CCMASK_ARITH_OVERFLOW
;
3612 BaseOp
= SystemZISD::SSUBO
;
3613 CCValid
= SystemZ::CCMASK_ARITH
;
3614 CCMask
= SystemZ::CCMASK_ARITH_OVERFLOW
;
3617 BaseOp
= SystemZISD::UADDO
;
3618 CCValid
= SystemZ::CCMASK_LOGICAL
;
3619 CCMask
= SystemZ::CCMASK_LOGICAL_CARRY
;
3622 BaseOp
= SystemZISD::USUBO
;
3623 CCValid
= SystemZ::CCMASK_LOGICAL
;
3624 CCMask
= SystemZ::CCMASK_LOGICAL_BORROW
;
3628 SDVTList VTs
= DAG
.getVTList(N
->getValueType(0), MVT::i32
);
3629 SDValue Result
= DAG
.getNode(BaseOp
, DL
, VTs
, LHS
, RHS
);
3631 SDValue SetCC
= emitSETCC(DAG
, DL
, Result
.getValue(1), CCValid
, CCMask
);
3632 if (N
->getValueType(1) == MVT::i1
)
3633 SetCC
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, SetCC
);
3635 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, N
->getVTList(), Result
, SetCC
);
3638 static bool isAddCarryChain(SDValue Carry
) {
3639 while (Carry
.getOpcode() == ISD::ADDCARRY
)
3640 Carry
= Carry
.getOperand(2);
3641 return Carry
.getOpcode() == ISD::UADDO
;
3644 static bool isSubBorrowChain(SDValue Carry
) {
3645 while (Carry
.getOpcode() == ISD::SUBCARRY
)
3646 Carry
= Carry
.getOperand(2);
3647 return Carry
.getOpcode() == ISD::USUBO
;
3650 // Lower ADDCARRY/SUBCARRY nodes.
3651 SDValue
SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op
,
3652 SelectionDAG
&DAG
) const {
3654 SDNode
*N
= Op
.getNode();
3655 MVT VT
= N
->getSimpleValueType(0);
3657 // Let legalize expand this if it isn't a legal type yet.
3658 if (!DAG
.getTargetLoweringInfo().isTypeLegal(VT
))
3661 SDValue LHS
= N
->getOperand(0);
3662 SDValue RHS
= N
->getOperand(1);
3663 SDValue Carry
= Op
.getOperand(2);
3665 unsigned BaseOp
= 0;
3666 unsigned CCValid
= 0;
3667 unsigned CCMask
= 0;
3669 switch (Op
.getOpcode()) {
3670 default: llvm_unreachable("Unknown instruction!");
3672 if (!isAddCarryChain(Carry
))
3675 BaseOp
= SystemZISD::ADDCARRY
;
3676 CCValid
= SystemZ::CCMASK_LOGICAL
;
3677 CCMask
= SystemZ::CCMASK_LOGICAL_CARRY
;
3680 if (!isSubBorrowChain(Carry
))
3683 BaseOp
= SystemZISD::SUBCARRY
;
3684 CCValid
= SystemZ::CCMASK_LOGICAL
;
3685 CCMask
= SystemZ::CCMASK_LOGICAL_BORROW
;
3689 // Set the condition code from the carry flag.
3690 Carry
= DAG
.getNode(SystemZISD::GET_CCMASK
, DL
, MVT::i32
, Carry
,
3691 DAG
.getConstant(CCValid
, DL
, MVT::i32
),
3692 DAG
.getConstant(CCMask
, DL
, MVT::i32
));
3694 SDVTList VTs
= DAG
.getVTList(VT
, MVT::i32
);
3695 SDValue Result
= DAG
.getNode(BaseOp
, DL
, VTs
, LHS
, RHS
, Carry
);
3697 SDValue SetCC
= emitSETCC(DAG
, DL
, Result
.getValue(1), CCValid
, CCMask
);
3698 if (N
->getValueType(1) == MVT::i1
)
3699 SetCC
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, SetCC
);
3701 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, N
->getVTList(), Result
, SetCC
);
3704 SDValue
SystemZTargetLowering::lowerCTPOP(SDValue Op
,
3705 SelectionDAG
&DAG
) const {
3706 EVT VT
= Op
.getValueType();
3708 Op
= Op
.getOperand(0);
3710 // Handle vector types via VPOPCT.
3711 if (VT
.isVector()) {
3712 Op
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v16i8
, Op
);
3713 Op
= DAG
.getNode(SystemZISD::POPCNT
, DL
, MVT::v16i8
, Op
);
3714 switch (VT
.getScalarSizeInBits()) {
3718 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
3719 SDValue Shift
= DAG
.getConstant(8, DL
, MVT::i32
);
3720 SDValue Tmp
= DAG
.getNode(SystemZISD::VSHL_BY_SCALAR
, DL
, VT
, Op
, Shift
);
3721 Op
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op
, Tmp
);
3722 Op
= DAG
.getNode(SystemZISD::VSRL_BY_SCALAR
, DL
, VT
, Op
, Shift
);
3726 SDValue Tmp
= DAG
.getSplatBuildVector(MVT::v16i8
, DL
,
3727 DAG
.getConstant(0, DL
, MVT::i32
));
3728 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, VT
, Op
, Tmp
);
3732 SDValue Tmp
= DAG
.getSplatBuildVector(MVT::v16i8
, DL
,
3733 DAG
.getConstant(0, DL
, MVT::i32
));
3734 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, MVT::v4i32
, Op
, Tmp
);
3735 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, VT
, Op
, Tmp
);
3739 llvm_unreachable("Unexpected type");
3744 // Get the known-zero mask for the operand.
3745 KnownBits Known
= DAG
.computeKnownBits(Op
);
3746 unsigned NumSignificantBits
= Known
.getMaxValue().getActiveBits();
3747 if (NumSignificantBits
== 0)
3748 return DAG
.getConstant(0, DL
, VT
);
3750 // Skip known-zero high parts of the operand.
3751 int64_t OrigBitSize
= VT
.getSizeInBits();
3752 int64_t BitSize
= (int64_t)1 << Log2_32_Ceil(NumSignificantBits
);
3753 BitSize
= std::min(BitSize
, OrigBitSize
);
3755 // The POPCNT instruction counts the number of bits in each byte.
3756 Op
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
);
3757 Op
= DAG
.getNode(SystemZISD::POPCNT
, DL
, MVT::i64
, Op
);
3758 Op
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Op
);
3760 // Add up per-byte counts in a binary tree. All bits of Op at
3761 // position larger than BitSize remain zero throughout.
3762 for (int64_t I
= BitSize
/ 2; I
>= 8; I
= I
/ 2) {
3763 SDValue Tmp
= DAG
.getNode(ISD::SHL
, DL
, VT
, Op
, DAG
.getConstant(I
, DL
, VT
));
3764 if (BitSize
!= OrigBitSize
)
3765 Tmp
= DAG
.getNode(ISD::AND
, DL
, VT
, Tmp
,
3766 DAG
.getConstant(((uint64_t)1 << BitSize
) - 1, DL
, VT
));
3767 Op
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op
, Tmp
);
3770 // Extract overall result from high byte.
3772 Op
= DAG
.getNode(ISD::SRL
, DL
, VT
, Op
,
3773 DAG
.getConstant(BitSize
- 8, DL
, VT
));
3778 SDValue
SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op
,
3779 SelectionDAG
&DAG
) const {
3781 AtomicOrdering FenceOrdering
= static_cast<AtomicOrdering
>(
3782 cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue());
3783 SyncScope::ID FenceSSID
= static_cast<SyncScope::ID
>(
3784 cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue());
3786 // The only fence that needs an instruction is a sequentially-consistent
3787 // cross-thread fence.
3788 if (FenceOrdering
== AtomicOrdering::SequentiallyConsistent
&&
3789 FenceSSID
== SyncScope::System
) {
3790 return SDValue(DAG
.getMachineNode(SystemZ::Serialize
, DL
, MVT::Other
,
3795 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3796 return DAG
.getNode(SystemZISD::MEMBARRIER
, DL
, MVT::Other
, Op
.getOperand(0));
3799 // Op is an atomic load. Lower it into a normal volatile load.
3800 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op
,
3801 SelectionDAG
&DAG
) const {
3802 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3803 return DAG
.getExtLoad(ISD::EXTLOAD
, SDLoc(Op
), Op
.getValueType(),
3804 Node
->getChain(), Node
->getBasePtr(),
3805 Node
->getMemoryVT(), Node
->getMemOperand());
3808 // Op is an atomic store. Lower it into a normal volatile store.
3809 SDValue
SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op
,
3810 SelectionDAG
&DAG
) const {
3811 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3812 SDValue Chain
= DAG
.getTruncStore(Node
->getChain(), SDLoc(Op
), Node
->getVal(),
3813 Node
->getBasePtr(), Node
->getMemoryVT(),
3814 Node
->getMemOperand());
3815 // We have to enforce sequential consistency by performing a
3816 // serialization operation after the store.
3817 if (Node
->getOrdering() == AtomicOrdering::SequentiallyConsistent
)
3818 Chain
= SDValue(DAG
.getMachineNode(SystemZ::Serialize
, SDLoc(Op
),
3819 MVT::Other
, Chain
), 0);
3823 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3824 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3825 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op
,
3827 unsigned Opcode
) const {
3828 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3830 // 32-bit operations need no code outside the main loop.
3831 EVT NarrowVT
= Node
->getMemoryVT();
3832 EVT WideVT
= MVT::i32
;
3833 if (NarrowVT
== WideVT
)
3836 int64_t BitSize
= NarrowVT
.getSizeInBits();
3837 SDValue ChainIn
= Node
->getChain();
3838 SDValue Addr
= Node
->getBasePtr();
3839 SDValue Src2
= Node
->getVal();
3840 MachineMemOperand
*MMO
= Node
->getMemOperand();
3842 EVT PtrVT
= Addr
.getValueType();
3844 // Convert atomic subtracts of constants into additions.
3845 if (Opcode
== SystemZISD::ATOMIC_LOADW_SUB
)
3846 if (auto *Const
= dyn_cast
<ConstantSDNode
>(Src2
)) {
3847 Opcode
= SystemZISD::ATOMIC_LOADW_ADD
;
3848 Src2
= DAG
.getConstant(-Const
->getSExtValue(), DL
, Src2
.getValueType());
3851 // Get the address of the containing word.
3852 SDValue AlignedAddr
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, Addr
,
3853 DAG
.getConstant(-4, DL
, PtrVT
));
3855 // Get the number of bits that the word must be rotated left in order
3856 // to bring the field to the top bits of a GR32.
3857 SDValue BitShift
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, Addr
,
3858 DAG
.getConstant(3, DL
, PtrVT
));
3859 BitShift
= DAG
.getNode(ISD::TRUNCATE
, DL
, WideVT
, BitShift
);
3861 // Get the complementing shift amount, for rotating a field in the top
3862 // bits back to its proper position.
3863 SDValue NegBitShift
= DAG
.getNode(ISD::SUB
, DL
, WideVT
,
3864 DAG
.getConstant(0, DL
, WideVT
), BitShift
);
3866 // Extend the source operand to 32 bits and prepare it for the inner loop.
3867 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3868 // operations require the source to be shifted in advance. (This shift
3869 // can be folded if the source is constant.) For AND and NAND, the lower
3870 // bits must be set, while for other opcodes they should be left clear.
3871 if (Opcode
!= SystemZISD::ATOMIC_SWAPW
)
3872 Src2
= DAG
.getNode(ISD::SHL
, DL
, WideVT
, Src2
,
3873 DAG
.getConstant(32 - BitSize
, DL
, WideVT
));
3874 if (Opcode
== SystemZISD::ATOMIC_LOADW_AND
||
3875 Opcode
== SystemZISD::ATOMIC_LOADW_NAND
)
3876 Src2
= DAG
.getNode(ISD::OR
, DL
, WideVT
, Src2
,
3877 DAG
.getConstant(uint32_t(-1) >> BitSize
, DL
, WideVT
));
3879 // Construct the ATOMIC_LOADW_* node.
3880 SDVTList VTList
= DAG
.getVTList(WideVT
, MVT::Other
);
3881 SDValue Ops
[] = { ChainIn
, AlignedAddr
, Src2
, BitShift
, NegBitShift
,
3882 DAG
.getConstant(BitSize
, DL
, WideVT
) };
3883 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(Opcode
, DL
, VTList
, Ops
,
3886 // Rotate the result of the final CS so that the field is in the lower
3887 // bits of a GR32, then truncate it.
3888 SDValue ResultShift
= DAG
.getNode(ISD::ADD
, DL
, WideVT
, BitShift
,
3889 DAG
.getConstant(BitSize
, DL
, WideVT
));
3890 SDValue Result
= DAG
.getNode(ISD::ROTL
, DL
, WideVT
, AtomicOp
, ResultShift
);
3892 SDValue RetOps
[2] = { Result
, AtomicOp
.getValue(1) };
3893 return DAG
.getMergeValues(RetOps
, DL
);
3896 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
3897 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3898 // operations into additions.
3899 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op
,
3900 SelectionDAG
&DAG
) const {
3901 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3902 EVT MemVT
= Node
->getMemoryVT();
3903 if (MemVT
== MVT::i32
|| MemVT
== MVT::i64
) {
3904 // A full-width operation.
3905 assert(Op
.getValueType() == MemVT
&& "Mismatched VTs");
3906 SDValue Src2
= Node
->getVal();
3910 if (auto *Op2
= dyn_cast
<ConstantSDNode
>(Src2
)) {
3911 // Use an addition if the operand is constant and either LAA(G) is
3912 // available or the negative value is in the range of A(G)FHI.
3913 int64_t Value
= (-Op2
->getAPIntValue()).getSExtValue();
3914 if (isInt
<32>(Value
) || Subtarget
.hasInterlockedAccess1())
3915 NegSrc2
= DAG
.getConstant(Value
, DL
, MemVT
);
3916 } else if (Subtarget
.hasInterlockedAccess1())
3917 // Use LAA(G) if available.
3918 NegSrc2
= DAG
.getNode(ISD::SUB
, DL
, MemVT
, DAG
.getConstant(0, DL
, MemVT
),
3921 if (NegSrc2
.getNode())
3922 return DAG
.getAtomic(ISD::ATOMIC_LOAD_ADD
, DL
, MemVT
,
3923 Node
->getChain(), Node
->getBasePtr(), NegSrc2
,
3924 Node
->getMemOperand());
3926 // Use the node as-is.
3930 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_SUB
);
3933 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
3934 SDValue
SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op
,
3935 SelectionDAG
&DAG
) const {
3936 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3937 SDValue ChainIn
= Node
->getOperand(0);
3938 SDValue Addr
= Node
->getOperand(1);
3939 SDValue CmpVal
= Node
->getOperand(2);
3940 SDValue SwapVal
= Node
->getOperand(3);
3941 MachineMemOperand
*MMO
= Node
->getMemOperand();
3944 // We have native support for 32-bit and 64-bit compare and swap, but we
3945 // still need to expand extracting the "success" result from the CC.
3946 EVT NarrowVT
= Node
->getMemoryVT();
3947 EVT WideVT
= NarrowVT
== MVT::i64
? MVT::i64
: MVT::i32
;
3948 if (NarrowVT
== WideVT
) {
3949 SDVTList Tys
= DAG
.getVTList(WideVT
, MVT::i32
, MVT::Other
);
3950 SDValue Ops
[] = { ChainIn
, Addr
, CmpVal
, SwapVal
};
3951 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP
,
3952 DL
, Tys
, Ops
, NarrowVT
, MMO
);
3953 SDValue Success
= emitSETCC(DAG
, DL
, AtomicOp
.getValue(1),
3954 SystemZ::CCMASK_CS
, SystemZ::CCMASK_CS_EQ
);
3956 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(0), AtomicOp
.getValue(0));
3957 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(1), Success
);
3958 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(2), AtomicOp
.getValue(2));
3962 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
3963 // via a fullword ATOMIC_CMP_SWAPW operation.
3964 int64_t BitSize
= NarrowVT
.getSizeInBits();
3965 EVT PtrVT
= Addr
.getValueType();
3967 // Get the address of the containing word.
3968 SDValue AlignedAddr
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, Addr
,
3969 DAG
.getConstant(-4, DL
, PtrVT
));
3971 // Get the number of bits that the word must be rotated left in order
3972 // to bring the field to the top bits of a GR32.
3973 SDValue BitShift
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, Addr
,
3974 DAG
.getConstant(3, DL
, PtrVT
));
3975 BitShift
= DAG
.getNode(ISD::TRUNCATE
, DL
, WideVT
, BitShift
);
3977 // Get the complementing shift amount, for rotating a field in the top
3978 // bits back to its proper position.
3979 SDValue NegBitShift
= DAG
.getNode(ISD::SUB
, DL
, WideVT
,
3980 DAG
.getConstant(0, DL
, WideVT
), BitShift
);
3982 // Construct the ATOMIC_CMP_SWAPW node.
3983 SDVTList VTList
= DAG
.getVTList(WideVT
, MVT::i32
, MVT::Other
);
3984 SDValue Ops
[] = { ChainIn
, AlignedAddr
, CmpVal
, SwapVal
, BitShift
,
3985 NegBitShift
, DAG
.getConstant(BitSize
, DL
, WideVT
) };
3986 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW
, DL
,
3987 VTList
, Ops
, NarrowVT
, MMO
);
3988 SDValue Success
= emitSETCC(DAG
, DL
, AtomicOp
.getValue(1),
3989 SystemZ::CCMASK_ICMP
, SystemZ::CCMASK_CMP_EQ
);
3991 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(0), AtomicOp
.getValue(0));
3992 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(1), Success
);
3993 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(2), AtomicOp
.getValue(2));
3997 MachineMemOperand::Flags
3998 SystemZTargetLowering::getTargetMMOFlags(const Instruction
&I
) const {
3999 // Because of how we convert atomic_load and atomic_store to normal loads and
4000 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4001 // since DAGCombine hasn't been updated to account for atomic, but non
4002 // volatile loads. (See D57601)
4003 if (auto *SI
= dyn_cast
<StoreInst
>(&I
))
4005 return MachineMemOperand::MOVolatile
;
4006 if (auto *LI
= dyn_cast
<LoadInst
>(&I
))
4008 return MachineMemOperand::MOVolatile
;
4009 if (auto *AI
= dyn_cast
<AtomicRMWInst
>(&I
))
4011 return MachineMemOperand::MOVolatile
;
4012 if (auto *AI
= dyn_cast
<AtomicCmpXchgInst
>(&I
))
4014 return MachineMemOperand::MOVolatile
;
4015 return MachineMemOperand::MONone
;
4018 SDValue
SystemZTargetLowering::lowerSTACKSAVE(SDValue Op
,
4019 SelectionDAG
&DAG
) const {
4020 MachineFunction
&MF
= DAG
.getMachineFunction();
4021 MF
.getInfo
<SystemZMachineFunctionInfo
>()->setManipulatesSP(true);
4022 if (MF
.getFunction().getCallingConv() == CallingConv::GHC
)
4023 report_fatal_error("Variable-sized stack allocations are not supported "
4024 "in GHC calling convention");
4025 return DAG
.getCopyFromReg(Op
.getOperand(0), SDLoc(Op
),
4026 SystemZ::R15D
, Op
.getValueType());
4029 SDValue
SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op
,
4030 SelectionDAG
&DAG
) const {
4031 MachineFunction
&MF
= DAG
.getMachineFunction();
4032 MF
.getInfo
<SystemZMachineFunctionInfo
>()->setManipulatesSP(true);
4033 bool StoreBackchain
= MF
.getFunction().hasFnAttribute("backchain");
4035 if (MF
.getFunction().getCallingConv() == CallingConv::GHC
)
4036 report_fatal_error("Variable-sized stack allocations are not supported "
4037 "in GHC calling convention");
4039 SDValue Chain
= Op
.getOperand(0);
4040 SDValue NewSP
= Op
.getOperand(1);
4044 if (StoreBackchain
) {
4045 SDValue OldSP
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R15D
, MVT::i64
);
4046 Backchain
= DAG
.getLoad(MVT::i64
, DL
, Chain
, OldSP
, MachinePointerInfo());
4049 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R15D
, NewSP
);
4052 Chain
= DAG
.getStore(Chain
, DL
, Backchain
, NewSP
, MachinePointerInfo());
4057 SDValue
SystemZTargetLowering::lowerPREFETCH(SDValue Op
,
4058 SelectionDAG
&DAG
) const {
4059 bool IsData
= cast
<ConstantSDNode
>(Op
.getOperand(4))->getZExtValue();
4061 // Just preserve the chain.
4062 return Op
.getOperand(0);
4065 bool IsWrite
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue();
4066 unsigned Code
= IsWrite
? SystemZ::PFD_WRITE
: SystemZ::PFD_READ
;
4067 auto *Node
= cast
<MemIntrinsicSDNode
>(Op
.getNode());
4068 SDValue Ops
[] = {Op
.getOperand(0), DAG
.getTargetConstant(Code
, DL
, MVT::i32
),
4070 return DAG
.getMemIntrinsicNode(SystemZISD::PREFETCH
, DL
,
4071 Node
->getVTList(), Ops
,
4072 Node
->getMemoryVT(), Node
->getMemOperand());
4075 // Convert condition code in CCReg to an i32 value.
4076 static SDValue
getCCResult(SelectionDAG
&DAG
, SDValue CCReg
) {
4078 SDValue IPM
= DAG
.getNode(SystemZISD::IPM
, DL
, MVT::i32
, CCReg
);
4079 return DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, IPM
,
4080 DAG
.getConstant(SystemZ::IPM_CC
, DL
, MVT::i32
));
4084 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op
,
4085 SelectionDAG
&DAG
) const {
4086 unsigned Opcode
, CCValid
;
4087 if (isIntrinsicWithCCAndChain(Op
, Opcode
, CCValid
)) {
4088 assert(Op
->getNumValues() == 2 && "Expected only CC result and chain");
4089 SDNode
*Node
= emitIntrinsicWithCCAndChain(DAG
, Op
, Opcode
);
4090 SDValue CC
= getCCResult(DAG
, SDValue(Node
, 0));
4091 DAG
.ReplaceAllUsesOfValueWith(SDValue(Op
.getNode(), 0), CC
);
4099 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op
,
4100 SelectionDAG
&DAG
) const {
4101 unsigned Opcode
, CCValid
;
4102 if (isIntrinsicWithCC(Op
, Opcode
, CCValid
)) {
4103 SDNode
*Node
= emitIntrinsicWithCC(DAG
, Op
, Opcode
);
4104 if (Op
->getNumValues() == 1)
4105 return getCCResult(DAG
, SDValue(Node
, 0));
4106 assert(Op
->getNumValues() == 2 && "Expected a CC and non-CC result");
4107 return DAG
.getNode(ISD::MERGE_VALUES
, SDLoc(Op
), Op
->getVTList(),
4108 SDValue(Node
, 0), getCCResult(DAG
, SDValue(Node
, 1)));
4111 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
4113 case Intrinsic::thread_pointer
:
4114 return lowerThreadPointer(SDLoc(Op
), DAG
);
4116 case Intrinsic::s390_vpdi
:
4117 return DAG
.getNode(SystemZISD::PERMUTE_DWORDS
, SDLoc(Op
), Op
.getValueType(),
4118 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
4120 case Intrinsic::s390_vperm
:
4121 return DAG
.getNode(SystemZISD::PERMUTE
, SDLoc(Op
), Op
.getValueType(),
4122 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
4124 case Intrinsic::s390_vuphb
:
4125 case Intrinsic::s390_vuphh
:
4126 case Intrinsic::s390_vuphf
:
4127 return DAG
.getNode(SystemZISD::UNPACK_HIGH
, SDLoc(Op
), Op
.getValueType(),
4130 case Intrinsic::s390_vuplhb
:
4131 case Intrinsic::s390_vuplhh
:
4132 case Intrinsic::s390_vuplhf
:
4133 return DAG
.getNode(SystemZISD::UNPACKL_HIGH
, SDLoc(Op
), Op
.getValueType(),
4136 case Intrinsic::s390_vuplb
:
4137 case Intrinsic::s390_vuplhw
:
4138 case Intrinsic::s390_vuplf
:
4139 return DAG
.getNode(SystemZISD::UNPACK_LOW
, SDLoc(Op
), Op
.getValueType(),
4142 case Intrinsic::s390_vupllb
:
4143 case Intrinsic::s390_vupllh
:
4144 case Intrinsic::s390_vupllf
:
4145 return DAG
.getNode(SystemZISD::UNPACKL_LOW
, SDLoc(Op
), Op
.getValueType(),
4148 case Intrinsic::s390_vsumb
:
4149 case Intrinsic::s390_vsumh
:
4150 case Intrinsic::s390_vsumgh
:
4151 case Intrinsic::s390_vsumgf
:
4152 case Intrinsic::s390_vsumqf
:
4153 case Intrinsic::s390_vsumqg
:
4154 return DAG
.getNode(SystemZISD::VSUM
, SDLoc(Op
), Op
.getValueType(),
4155 Op
.getOperand(1), Op
.getOperand(2));
4162 // Says that SystemZISD operation Opcode can be used to perform the equivalent
4163 // of a VPERM with permute vector Bytes. If Opcode takes three operands,
4164 // Operand is the constant third operand, otherwise it is the number of
4165 // bytes in each element of the result.
4169 unsigned char Bytes
[SystemZ::VectorBytes
];
4173 static const Permute PermuteForms
[] = {
4175 { SystemZISD::MERGE_HIGH
, 8,
4176 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4178 { SystemZISD::MERGE_HIGH
, 4,
4179 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4181 { SystemZISD::MERGE_HIGH
, 2,
4182 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4184 { SystemZISD::MERGE_HIGH
, 1,
4185 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4187 { SystemZISD::MERGE_LOW
, 8,
4188 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4190 { SystemZISD::MERGE_LOW
, 4,
4191 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4193 { SystemZISD::MERGE_LOW
, 2,
4194 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4196 { SystemZISD::MERGE_LOW
, 1,
4197 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4199 { SystemZISD::PACK
, 4,
4200 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4202 { SystemZISD::PACK
, 2,
4203 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4205 { SystemZISD::PACK
, 1,
4206 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4207 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4208 { SystemZISD::PERMUTE_DWORDS
, 4,
4209 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4210 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4211 { SystemZISD::PERMUTE_DWORDS
, 1,
4212 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4215 // Called after matching a vector shuffle against a particular pattern.
4216 // Both the original shuffle and the pattern have two vector operands.
4217 // OpNos[0] is the operand of the original shuffle that should be used for
4218 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4219 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4220 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4221 // for operands 0 and 1 of the pattern.
4222 static bool chooseShuffleOpNos(int *OpNos
, unsigned &OpNo0
, unsigned &OpNo1
) {
4226 OpNo0
= OpNo1
= OpNos
[1];
4227 } else if (OpNos
[1] < 0) {
4228 OpNo0
= OpNo1
= OpNos
[0];
4236 // Bytes is a VPERM-like permute vector, except that -1 is used for
4237 // undefined bytes. Return true if the VPERM can be implemented using P.
4238 // When returning true set OpNo0 to the VPERM operand that should be
4239 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4241 // For example, if swapping the VPERM operands allows P to match, OpNo0
4242 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4243 // operand, but rewriting it to use two duplicated operands allows it to
4244 // match P, then OpNo0 and OpNo1 will be the same.
4245 static bool matchPermute(const SmallVectorImpl
<int> &Bytes
, const Permute
&P
,
4246 unsigned &OpNo0
, unsigned &OpNo1
) {
4247 int OpNos
[] = { -1, -1 };
4248 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
) {
4251 // Make sure that the two permute vectors use the same suboperand
4252 // byte number. Only the operand numbers (the high bits) are
4253 // allowed to differ.
4254 if ((Elt
^ P
.Bytes
[I
]) & (SystemZ::VectorBytes
- 1))
4256 int ModelOpNo
= P
.Bytes
[I
] / SystemZ::VectorBytes
;
4257 int RealOpNo
= unsigned(Elt
) / SystemZ::VectorBytes
;
4258 // Make sure that the operand mappings are consistent with previous
4260 if (OpNos
[ModelOpNo
] == 1 - RealOpNo
)
4262 OpNos
[ModelOpNo
] = RealOpNo
;
4265 return chooseShuffleOpNos(OpNos
, OpNo0
, OpNo1
);
4268 // As above, but search for a matching permute.
4269 static const Permute
*matchPermute(const SmallVectorImpl
<int> &Bytes
,
4270 unsigned &OpNo0
, unsigned &OpNo1
) {
4271 for (auto &P
: PermuteForms
)
4272 if (matchPermute(Bytes
, P
, OpNo0
, OpNo1
))
4277 // Bytes is a VPERM-like permute vector, except that -1 is used for
4278 // undefined bytes. This permute is an operand of an outer permute.
4279 // See whether redistributing the -1 bytes gives a shuffle that can be
4280 // implemented using P. If so, set Transform to a VPERM-like permute vector
4281 // that, when applied to the result of P, gives the original permute in Bytes.
4282 static bool matchDoublePermute(const SmallVectorImpl
<int> &Bytes
,
4284 SmallVectorImpl
<int> &Transform
) {
4286 for (unsigned From
= 0; From
< SystemZ::VectorBytes
; ++From
) {
4287 int Elt
= Bytes
[From
];
4289 // Byte number From of the result is undefined.
4290 Transform
[From
] = -1;
4292 while (P
.Bytes
[To
] != Elt
) {
4294 if (To
== SystemZ::VectorBytes
)
4297 Transform
[From
] = To
;
4303 // As above, but search for a matching permute.
4304 static const Permute
*matchDoublePermute(const SmallVectorImpl
<int> &Bytes
,
4305 SmallVectorImpl
<int> &Transform
) {
4306 for (auto &P
: PermuteForms
)
4307 if (matchDoublePermute(Bytes
, P
, Transform
))
4312 // Convert the mask of the given shuffle op into a byte-level mask,
4313 // as if it had type vNi8.
4314 static bool getVPermMask(SDValue ShuffleOp
,
4315 SmallVectorImpl
<int> &Bytes
) {
4316 EVT VT
= ShuffleOp
.getValueType();
4317 unsigned NumElements
= VT
.getVectorNumElements();
4318 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
4320 if (auto *VSN
= dyn_cast
<ShuffleVectorSDNode
>(ShuffleOp
)) {
4321 Bytes
.resize(NumElements
* BytesPerElement
, -1);
4322 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4323 int Index
= VSN
->getMaskElt(I
);
4325 for (unsigned J
= 0; J
< BytesPerElement
; ++J
)
4326 Bytes
[I
* BytesPerElement
+ J
] = Index
* BytesPerElement
+ J
;
4330 if (SystemZISD::SPLAT
== ShuffleOp
.getOpcode() &&
4331 isa
<ConstantSDNode
>(ShuffleOp
.getOperand(1))) {
4332 unsigned Index
= ShuffleOp
.getConstantOperandVal(1);
4333 Bytes
.resize(NumElements
* BytesPerElement
, -1);
4334 for (unsigned I
= 0; I
< NumElements
; ++I
)
4335 for (unsigned J
= 0; J
< BytesPerElement
; ++J
)
4336 Bytes
[I
* BytesPerElement
+ J
] = Index
* BytesPerElement
+ J
;
4342 // Bytes is a VPERM-like permute vector, except that -1 is used for
4343 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
4344 // the result come from a contiguous sequence of bytes from one input.
4345 // Set Base to the selector for the first byte if so.
4346 static bool getShuffleInput(const SmallVectorImpl
<int> &Bytes
, unsigned Start
,
4347 unsigned BytesPerElement
, int &Base
) {
4349 for (unsigned I
= 0; I
< BytesPerElement
; ++I
) {
4350 if (Bytes
[Start
+ I
] >= 0) {
4351 unsigned Elem
= Bytes
[Start
+ I
];
4354 // Make sure the bytes would come from one input operand.
4355 if (unsigned(Base
) % Bytes
.size() + BytesPerElement
> Bytes
.size())
4357 } else if (unsigned(Base
) != Elem
- I
)
4364 // Bytes is a VPERM-like permute vector, except that -1 is used for
4365 // undefined bytes. Return true if it can be performed using VSLDI.
4366 // When returning true, set StartIndex to the shift amount and OpNo0
4367 // and OpNo1 to the VPERM operands that should be used as the first
4368 // and second shift operand respectively.
4369 static bool isShlDoublePermute(const SmallVectorImpl
<int> &Bytes
,
4370 unsigned &StartIndex
, unsigned &OpNo0
,
4372 int OpNos
[] = { -1, -1 };
4374 for (unsigned I
= 0; I
< 16; ++I
) {
4375 int Index
= Bytes
[I
];
4377 int ExpectedShift
= (Index
- I
) % SystemZ::VectorBytes
;
4378 int ModelOpNo
= unsigned(ExpectedShift
+ I
) / SystemZ::VectorBytes
;
4379 int RealOpNo
= unsigned(Index
) / SystemZ::VectorBytes
;
4381 Shift
= ExpectedShift
;
4382 else if (Shift
!= ExpectedShift
)
4384 // Make sure that the operand mappings are consistent with previous
4386 if (OpNos
[ModelOpNo
] == 1 - RealOpNo
)
4388 OpNos
[ModelOpNo
] = RealOpNo
;
4392 return chooseShuffleOpNos(OpNos
, OpNo0
, OpNo1
);
4395 // Create a node that performs P on operands Op0 and Op1, casting the
4396 // operands to the appropriate type. The type of the result is determined by P.
4397 static SDValue
getPermuteNode(SelectionDAG
&DAG
, const SDLoc
&DL
,
4398 const Permute
&P
, SDValue Op0
, SDValue Op1
) {
4399 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4400 // elements of a PACK are twice as wide as the outputs.
4401 unsigned InBytes
= (P
.Opcode
== SystemZISD::PERMUTE_DWORDS
? 8 :
4402 P
.Opcode
== SystemZISD::PACK
? P
.Operand
* 2 :
4404 // Cast both operands to the appropriate type.
4405 MVT InVT
= MVT::getVectorVT(MVT::getIntegerVT(InBytes
* 8),
4406 SystemZ::VectorBytes
/ InBytes
);
4407 Op0
= DAG
.getNode(ISD::BITCAST
, DL
, InVT
, Op0
);
4408 Op1
= DAG
.getNode(ISD::BITCAST
, DL
, InVT
, Op1
);
4410 if (P
.Opcode
== SystemZISD::PERMUTE_DWORDS
) {
4411 SDValue Op2
= DAG
.getTargetConstant(P
.Operand
, DL
, MVT::i32
);
4412 Op
= DAG
.getNode(SystemZISD::PERMUTE_DWORDS
, DL
, InVT
, Op0
, Op1
, Op2
);
4413 } else if (P
.Opcode
== SystemZISD::PACK
) {
4414 MVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(P
.Operand
* 8),
4415 SystemZ::VectorBytes
/ P
.Operand
);
4416 Op
= DAG
.getNode(SystemZISD::PACK
, DL
, OutVT
, Op0
, Op1
);
4418 Op
= DAG
.getNode(P
.Opcode
, DL
, InVT
, Op0
, Op1
);
4423 // Bytes is a VPERM-like permute vector, except that -1 is used for
4424 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4426 static SDValue
getGeneralPermuteNode(SelectionDAG
&DAG
, const SDLoc
&DL
,
4428 const SmallVectorImpl
<int> &Bytes
) {
4429 for (unsigned I
= 0; I
< 2; ++I
)
4430 Ops
[I
] = DAG
.getNode(ISD::BITCAST
, DL
, MVT::v16i8
, Ops
[I
]);
4432 // First see whether VSLDI can be used.
4433 unsigned StartIndex
, OpNo0
, OpNo1
;
4434 if (isShlDoublePermute(Bytes
, StartIndex
, OpNo0
, OpNo1
))
4435 return DAG
.getNode(SystemZISD::SHL_DOUBLE
, DL
, MVT::v16i8
, Ops
[OpNo0
],
4437 DAG
.getTargetConstant(StartIndex
, DL
, MVT::i32
));
4439 // Fall back on VPERM. Construct an SDNode for the permute vector.
4440 SDValue IndexNodes
[SystemZ::VectorBytes
];
4441 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
)
4443 IndexNodes
[I
] = DAG
.getConstant(Bytes
[I
], DL
, MVT::i32
);
4445 IndexNodes
[I
] = DAG
.getUNDEF(MVT::i32
);
4446 SDValue Op2
= DAG
.getBuildVector(MVT::v16i8
, DL
, IndexNodes
);
4447 return DAG
.getNode(SystemZISD::PERMUTE
, DL
, MVT::v16i8
, Ops
[0], Ops
[1], Op2
);
4451 // Describes a general N-operand vector shuffle.
4452 struct GeneralShuffle
{
4453 GeneralShuffle(EVT vt
) : VT(vt
) {}
4455 bool add(SDValue
, unsigned);
4456 SDValue
getNode(SelectionDAG
&, const SDLoc
&);
4458 // The operands of the shuffle.
4459 SmallVector
<SDValue
, SystemZ::VectorBytes
> Ops
;
4461 // Index I is -1 if byte I of the result is undefined. Otherwise the
4462 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4463 // Bytes[I] / SystemZ::VectorBytes.
4464 SmallVector
<int, SystemZ::VectorBytes
> Bytes
;
4466 // The type of the shuffle result.
4471 // Add an extra undefined element to the shuffle.
4472 void GeneralShuffle::addUndef() {
4473 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
4474 for (unsigned I
= 0; I
< BytesPerElement
; ++I
)
4475 Bytes
.push_back(-1);
4478 // Add an extra element to the shuffle, taking it from element Elem of Op.
4479 // A null Op indicates a vector input whose value will be calculated later;
4480 // there is at most one such input per shuffle and it always has the same
4481 // type as the result. Aborts and returns false if the source vector elements
4482 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4483 // LLVM they become implicitly extended, but this is rare and not optimized.
4484 bool GeneralShuffle::add(SDValue Op
, unsigned Elem
) {
4485 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
4487 // The source vector can have wider elements than the result,
4488 // either through an explicit TRUNCATE or because of type legalization.
4489 // We want the least significant part.
4490 EVT FromVT
= Op
.getNode() ? Op
.getValueType() : VT
;
4491 unsigned FromBytesPerElement
= FromVT
.getVectorElementType().getStoreSize();
4493 // Return false if the source elements are smaller than their destination
4495 if (FromBytesPerElement
< BytesPerElement
)
4498 unsigned Byte
= ((Elem
* FromBytesPerElement
) % SystemZ::VectorBytes
+
4499 (FromBytesPerElement
- BytesPerElement
));
4501 // Look through things like shuffles and bitcasts.
4502 while (Op
.getNode()) {
4503 if (Op
.getOpcode() == ISD::BITCAST
)
4504 Op
= Op
.getOperand(0);
4505 else if (Op
.getOpcode() == ISD::VECTOR_SHUFFLE
&& Op
.hasOneUse()) {
4506 // See whether the bytes we need come from a contiguous part of one
4508 SmallVector
<int, SystemZ::VectorBytes
> OpBytes
;
4509 if (!getVPermMask(Op
, OpBytes
))
4512 if (!getShuffleInput(OpBytes
, Byte
, BytesPerElement
, NewByte
))
4518 Op
= Op
.getOperand(unsigned(NewByte
) / SystemZ::VectorBytes
);
4519 Byte
= unsigned(NewByte
) % SystemZ::VectorBytes
;
4520 } else if (Op
.isUndef()) {
4527 // Make sure that the source of the extraction is in Ops.
4529 for (; OpNo
< Ops
.size(); ++OpNo
)
4530 if (Ops
[OpNo
] == Op
)
4532 if (OpNo
== Ops
.size())
4535 // Add the element to Bytes.
4536 unsigned Base
= OpNo
* SystemZ::VectorBytes
+ Byte
;
4537 for (unsigned I
= 0; I
< BytesPerElement
; ++I
)
4538 Bytes
.push_back(Base
+ I
);
4543 // Return SDNodes for the completed shuffle.
4544 SDValue
GeneralShuffle::getNode(SelectionDAG
&DAG
, const SDLoc
&DL
) {
4545 assert(Bytes
.size() == SystemZ::VectorBytes
&& "Incomplete vector");
4547 if (Ops
.size() == 0)
4548 return DAG
.getUNDEF(VT
);
4550 // Make sure that there are at least two shuffle operands.
4551 if (Ops
.size() == 1)
4552 Ops
.push_back(DAG
.getUNDEF(MVT::v16i8
));
4554 // Create a tree of shuffles, deferring root node until after the loop.
4555 // Try to redistribute the undefined elements of non-root nodes so that
4556 // the non-root shuffles match something like a pack or merge, then adjust
4557 // the parent node's permute vector to compensate for the new order.
4558 // Among other things, this copes with vectors like <2 x i16> that were
4559 // padded with undefined elements during type legalization.
4561 // In the best case this redistribution will lead to the whole tree
4562 // using packs and merges. It should rarely be a loss in other cases.
4563 unsigned Stride
= 1;
4564 for (; Stride
* 2 < Ops
.size(); Stride
*= 2) {
4565 for (unsigned I
= 0; I
< Ops
.size() - Stride
; I
+= Stride
* 2) {
4566 SDValue SubOps
[] = { Ops
[I
], Ops
[I
+ Stride
] };
4568 // Create a mask for just these two operands.
4569 SmallVector
<int, SystemZ::VectorBytes
> NewBytes(SystemZ::VectorBytes
);
4570 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
) {
4571 unsigned OpNo
= unsigned(Bytes
[J
]) / SystemZ::VectorBytes
;
4572 unsigned Byte
= unsigned(Bytes
[J
]) % SystemZ::VectorBytes
;
4575 else if (OpNo
== I
+ Stride
)
4576 NewBytes
[J
] = SystemZ::VectorBytes
+ Byte
;
4580 // See if it would be better to reorganize NewMask to avoid using VPERM.
4581 SmallVector
<int, SystemZ::VectorBytes
> NewBytesMap(SystemZ::VectorBytes
);
4582 if (const Permute
*P
= matchDoublePermute(NewBytes
, NewBytesMap
)) {
4583 Ops
[I
] = getPermuteNode(DAG
, DL
, *P
, SubOps
[0], SubOps
[1]);
4584 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4585 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
) {
4586 if (NewBytes
[J
] >= 0) {
4587 assert(unsigned(NewBytesMap
[J
]) < SystemZ::VectorBytes
&&
4588 "Invalid double permute");
4589 Bytes
[J
] = I
* SystemZ::VectorBytes
+ NewBytesMap
[J
];
4591 assert(NewBytesMap
[J
] < 0 && "Invalid double permute");
4594 // Just use NewBytes on the operands.
4595 Ops
[I
] = getGeneralPermuteNode(DAG
, DL
, SubOps
, NewBytes
);
4596 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
)
4597 if (NewBytes
[J
] >= 0)
4598 Bytes
[J
] = I
* SystemZ::VectorBytes
+ J
;
4603 // Now we just have 2 inputs. Put the second operand in Ops[1].
4605 Ops
[1] = Ops
[Stride
];
4606 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
)
4607 if (Bytes
[I
] >= int(SystemZ::VectorBytes
))
4608 Bytes
[I
] -= (Stride
- 1) * SystemZ::VectorBytes
;
4611 // Look for an instruction that can do the permute without resorting
4613 unsigned OpNo0
, OpNo1
;
4615 if (const Permute
*P
= matchPermute(Bytes
, OpNo0
, OpNo1
))
4616 Op
= getPermuteNode(DAG
, DL
, *P
, Ops
[OpNo0
], Ops
[OpNo1
]);
4618 Op
= getGeneralPermuteNode(DAG
, DL
, &Ops
[0], Bytes
);
4619 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
4622 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4623 static bool isScalarToVector(SDValue Op
) {
4624 for (unsigned I
= 1, E
= Op
.getNumOperands(); I
!= E
; ++I
)
4625 if (!Op
.getOperand(I
).isUndef())
4630 // Return a vector of type VT that contains Value in the first element.
4631 // The other elements don't matter.
4632 static SDValue
buildScalarToVector(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4634 // If we have a constant, replicate it to all elements and let the
4635 // BUILD_VECTOR lowering take care of it.
4636 if (Value
.getOpcode() == ISD::Constant
||
4637 Value
.getOpcode() == ISD::ConstantFP
) {
4638 SmallVector
<SDValue
, 16> Ops(VT
.getVectorNumElements(), Value
);
4639 return DAG
.getBuildVector(VT
, DL
, Ops
);
4641 if (Value
.isUndef())
4642 return DAG
.getUNDEF(VT
);
4643 return DAG
.getNode(ISD::SCALAR_TO_VECTOR
, DL
, VT
, Value
);
4646 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4647 // element 1. Used for cases in which replication is cheap.
4648 static SDValue
buildMergeScalars(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4649 SDValue Op0
, SDValue Op1
) {
4650 if (Op0
.isUndef()) {
4652 return DAG
.getUNDEF(VT
);
4653 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op1
);
4656 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op0
);
4657 return DAG
.getNode(SystemZISD::MERGE_HIGH
, DL
, VT
,
4658 buildScalarToVector(DAG
, DL
, VT
, Op0
),
4659 buildScalarToVector(DAG
, DL
, VT
, Op1
));
4662 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4664 static SDValue
joinDwords(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Op0
,
4666 if (Op0
.isUndef() && Op1
.isUndef())
4667 return DAG
.getUNDEF(MVT::v2i64
);
4668 // If one of the two inputs is undefined then replicate the other one,
4669 // in order to avoid using another register unnecessarily.
4671 Op0
= Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op1
);
4672 else if (Op1
.isUndef())
4673 Op0
= Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
4675 Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
4676 Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op1
);
4678 return DAG
.getNode(SystemZISD::JOIN_DWORDS
, DL
, MVT::v2i64
, Op0
, Op1
);
4681 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4682 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4683 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
4684 // would benefit from this representation and return it if so.
4685 static SDValue
tryBuildVectorShuffle(SelectionDAG
&DAG
,
4686 BuildVectorSDNode
*BVN
) {
4687 EVT VT
= BVN
->getValueType(0);
4688 unsigned NumElements
= VT
.getVectorNumElements();
4690 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4691 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
4692 // need a BUILD_VECTOR, add an additional placeholder operand for that
4693 // BUILD_VECTOR and store its operands in ResidueOps.
4694 GeneralShuffle
GS(VT
);
4695 SmallVector
<SDValue
, SystemZ::VectorBytes
> ResidueOps
;
4696 bool FoundOne
= false;
4697 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4698 SDValue Op
= BVN
->getOperand(I
);
4699 if (Op
.getOpcode() == ISD::TRUNCATE
)
4700 Op
= Op
.getOperand(0);
4701 if (Op
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
4702 Op
.getOperand(1).getOpcode() == ISD::Constant
) {
4703 unsigned Elem
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
4704 if (!GS
.add(Op
.getOperand(0), Elem
))
4707 } else if (Op
.isUndef()) {
4710 if (!GS
.add(SDValue(), ResidueOps
.size()))
4712 ResidueOps
.push_back(BVN
->getOperand(I
));
4716 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
4720 // Create the BUILD_VECTOR for the remaining elements, if any.
4721 if (!ResidueOps
.empty()) {
4722 while (ResidueOps
.size() < NumElements
)
4723 ResidueOps
.push_back(DAG
.getUNDEF(ResidueOps
[0].getValueType()));
4724 for (auto &Op
: GS
.Ops
) {
4725 if (!Op
.getNode()) {
4726 Op
= DAG
.getBuildVector(VT
, SDLoc(BVN
), ResidueOps
);
4731 return GS
.getNode(DAG
, SDLoc(BVN
));
4734 bool SystemZTargetLowering::isVectorElementLoad(SDValue Op
) const {
4735 if (Op
.getOpcode() == ISD::LOAD
&& cast
<LoadSDNode
>(Op
)->isUnindexed())
4737 if (Subtarget
.hasVectorEnhancements2() && Op
.getOpcode() == SystemZISD::LRV
)
4742 // Combine GPR scalar values Elems into a vector of type VT.
4744 SystemZTargetLowering::buildVector(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4745 SmallVectorImpl
<SDValue
> &Elems
) const {
4746 // See whether there is a single replicated value.
4748 unsigned int NumElements
= Elems
.size();
4749 unsigned int Count
= 0;
4750 for (auto Elem
: Elems
) {
4751 if (!Elem
.isUndef()) {
4752 if (!Single
.getNode())
4754 else if (Elem
!= Single
) {
4761 // There are three cases here:
4763 // - if the only defined element is a loaded one, the best sequence
4764 // is a replicating load.
4766 // - otherwise, if the only defined element is an i64 value, we will
4767 // end up with the same VLVGP sequence regardless of whether we short-cut
4768 // for replication or fall through to the later code.
4770 // - otherwise, if the only defined element is an i32 or smaller value,
4771 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
4772 // This is only a win if the single defined element is used more than once.
4773 // In other cases we're better off using a single VLVGx.
4774 if (Single
.getNode() && (Count
> 1 || isVectorElementLoad(Single
)))
4775 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Single
);
4777 // If all elements are loads, use VLREP/VLEs (below).
4778 bool AllLoads
= true;
4779 for (auto Elem
: Elems
)
4780 if (!isVectorElementLoad(Elem
)) {
4785 // The best way of building a v2i64 from two i64s is to use VLVGP.
4786 if (VT
== MVT::v2i64
&& !AllLoads
)
4787 return joinDwords(DAG
, DL
, Elems
[0], Elems
[1]);
4789 // Use a 64-bit merge high to combine two doubles.
4790 if (VT
== MVT::v2f64
&& !AllLoads
)
4791 return buildMergeScalars(DAG
, DL
, VT
, Elems
[0], Elems
[1]);
4793 // Build v4f32 values directly from the FPRs:
4795 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
4800 if (VT
== MVT::v4f32
&& !AllLoads
) {
4801 SDValue Op01
= buildMergeScalars(DAG
, DL
, VT
, Elems
[0], Elems
[1]);
4802 SDValue Op23
= buildMergeScalars(DAG
, DL
, VT
, Elems
[2], Elems
[3]);
4803 // Avoid unnecessary undefs by reusing the other operand.
4806 else if (Op23
.isUndef())
4808 // Merging identical replications is a no-op.
4809 if (Op01
.getOpcode() == SystemZISD::REPLICATE
&& Op01
== Op23
)
4811 Op01
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Op01
);
4812 Op23
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Op23
);
4813 SDValue Op
= DAG
.getNode(SystemZISD::MERGE_HIGH
,
4814 DL
, MVT::v2i64
, Op01
, Op23
);
4815 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
4818 // Collect the constant terms.
4819 SmallVector
<SDValue
, SystemZ::VectorBytes
> Constants(NumElements
, SDValue());
4820 SmallVector
<bool, SystemZ::VectorBytes
> Done(NumElements
, false);
4822 unsigned NumConstants
= 0;
4823 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4824 SDValue Elem
= Elems
[I
];
4825 if (Elem
.getOpcode() == ISD::Constant
||
4826 Elem
.getOpcode() == ISD::ConstantFP
) {
4828 Constants
[I
] = Elem
;
4832 // If there was at least one constant, fill in the other elements of
4833 // Constants with undefs to get a full vector constant and use that
4834 // as the starting point.
4836 SDValue ReplicatedVal
;
4837 if (NumConstants
> 0) {
4838 for (unsigned I
= 0; I
< NumElements
; ++I
)
4839 if (!Constants
[I
].getNode())
4840 Constants
[I
] = DAG
.getUNDEF(Elems
[I
].getValueType());
4841 Result
= DAG
.getBuildVector(VT
, DL
, Constants
);
4843 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
4844 // avoid a false dependency on any previous contents of the vector
4847 // Use a VLREP if at least one element is a load. Make sure to replicate
4848 // the load with the most elements having its value.
4849 std::map
<const SDNode
*, unsigned> UseCounts
;
4850 SDNode
*LoadMaxUses
= nullptr;
4851 for (unsigned I
= 0; I
< NumElements
; ++I
)
4852 if (isVectorElementLoad(Elems
[I
])) {
4853 SDNode
*Ld
= Elems
[I
].getNode();
4855 if (LoadMaxUses
== nullptr || UseCounts
[LoadMaxUses
] < UseCounts
[Ld
])
4858 if (LoadMaxUses
!= nullptr) {
4859 ReplicatedVal
= SDValue(LoadMaxUses
, 0);
4860 Result
= DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, ReplicatedVal
);
4862 // Try to use VLVGP.
4863 unsigned I1
= NumElements
/ 2 - 1;
4864 unsigned I2
= NumElements
- 1;
4865 bool Def1
= !Elems
[I1
].isUndef();
4866 bool Def2
= !Elems
[I2
].isUndef();
4868 SDValue Elem1
= Elems
[Def1
? I1
: I2
];
4869 SDValue Elem2
= Elems
[Def2
? I2
: I1
];
4870 Result
= DAG
.getNode(ISD::BITCAST
, DL
, VT
,
4871 joinDwords(DAG
, DL
, Elem1
, Elem2
));
4875 Result
= DAG
.getUNDEF(VT
);
4879 // Use VLVGx to insert the other elements.
4880 for (unsigned I
= 0; I
< NumElements
; ++I
)
4881 if (!Done
[I
] && !Elems
[I
].isUndef() && Elems
[I
] != ReplicatedVal
)
4882 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
, Result
, Elems
[I
],
4883 DAG
.getConstant(I
, DL
, MVT::i32
));
4887 SDValue
SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op
,
4888 SelectionDAG
&DAG
) const {
4889 auto *BVN
= cast
<BuildVectorSDNode
>(Op
.getNode());
4891 EVT VT
= Op
.getValueType();
4893 if (BVN
->isConstant()) {
4894 if (SystemZVectorConstantInfo(BVN
).isVectorConstantLegal(Subtarget
))
4897 // Fall back to loading it from memory.
4901 // See if we should use shuffles to construct the vector from other vectors.
4902 if (SDValue Res
= tryBuildVectorShuffle(DAG
, BVN
))
4905 // Detect SCALAR_TO_VECTOR conversions.
4906 if (isOperationLegal(ISD::SCALAR_TO_VECTOR
, VT
) && isScalarToVector(Op
))
4907 return buildScalarToVector(DAG
, DL
, VT
, Op
.getOperand(0));
4909 // Otherwise use buildVector to build the vector up from GPRs.
4910 unsigned NumElements
= Op
.getNumOperands();
4911 SmallVector
<SDValue
, SystemZ::VectorBytes
> Ops(NumElements
);
4912 for (unsigned I
= 0; I
< NumElements
; ++I
)
4913 Ops
[I
] = Op
.getOperand(I
);
4914 return buildVector(DAG
, DL
, VT
, Ops
);
4917 SDValue
SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op
,
4918 SelectionDAG
&DAG
) const {
4919 auto *VSN
= cast
<ShuffleVectorSDNode
>(Op
.getNode());
4921 EVT VT
= Op
.getValueType();
4922 unsigned NumElements
= VT
.getVectorNumElements();
4924 if (VSN
->isSplat()) {
4925 SDValue Op0
= Op
.getOperand(0);
4926 unsigned Index
= VSN
->getSplatIndex();
4927 assert(Index
< VT
.getVectorNumElements() &&
4928 "Splat index should be defined and in first operand");
4929 // See whether the value we're splatting is directly available as a scalar.
4930 if ((Index
== 0 && Op0
.getOpcode() == ISD::SCALAR_TO_VECTOR
) ||
4931 Op0
.getOpcode() == ISD::BUILD_VECTOR
)
4932 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op0
.getOperand(Index
));
4933 // Otherwise keep it as a vector-to-vector operation.
4934 return DAG
.getNode(SystemZISD::SPLAT
, DL
, VT
, Op
.getOperand(0),
4935 DAG
.getTargetConstant(Index
, DL
, MVT::i32
));
4938 GeneralShuffle
GS(VT
);
4939 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4940 int Elt
= VSN
->getMaskElt(I
);
4943 else if (!GS
.add(Op
.getOperand(unsigned(Elt
) / NumElements
),
4944 unsigned(Elt
) % NumElements
))
4947 return GS
.getNode(DAG
, SDLoc(VSN
));
4950 SDValue
SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op
,
4951 SelectionDAG
&DAG
) const {
4953 // Just insert the scalar into element 0 of an undefined vector.
4954 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
4955 Op
.getValueType(), DAG
.getUNDEF(Op
.getValueType()),
4956 Op
.getOperand(0), DAG
.getConstant(0, DL
, MVT::i32
));
4959 SDValue
SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op
,
4960 SelectionDAG
&DAG
) const {
4961 // Handle insertions of floating-point values.
4963 SDValue Op0
= Op
.getOperand(0);
4964 SDValue Op1
= Op
.getOperand(1);
4965 SDValue Op2
= Op
.getOperand(2);
4966 EVT VT
= Op
.getValueType();
4968 // Insertions into constant indices of a v2f64 can be done using VPDI.
4969 // However, if the inserted value is a bitcast or a constant then it's
4970 // better to use GPRs, as below.
4971 if (VT
== MVT::v2f64
&&
4972 Op1
.getOpcode() != ISD::BITCAST
&&
4973 Op1
.getOpcode() != ISD::ConstantFP
&&
4974 Op2
.getOpcode() == ISD::Constant
) {
4975 uint64_t Index
= cast
<ConstantSDNode
>(Op2
)->getZExtValue();
4976 unsigned Mask
= VT
.getVectorNumElements() - 1;
4981 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
4982 MVT IntVT
= MVT::getIntegerVT(VT
.getScalarSizeInBits());
4983 MVT IntVecVT
= MVT::getVectorVT(IntVT
, VT
.getVectorNumElements());
4984 SDValue Res
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, IntVecVT
,
4985 DAG
.getNode(ISD::BITCAST
, DL
, IntVecVT
, Op0
),
4986 DAG
.getNode(ISD::BITCAST
, DL
, IntVT
, Op1
), Op2
);
4987 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Res
);
4991 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op
,
4992 SelectionDAG
&DAG
) const {
4993 // Handle extractions of floating-point values.
4995 SDValue Op0
= Op
.getOperand(0);
4996 SDValue Op1
= Op
.getOperand(1);
4997 EVT VT
= Op
.getValueType();
4998 EVT VecVT
= Op0
.getValueType();
5000 // Extractions of constant indices can be done directly.
5001 if (auto *CIndexN
= dyn_cast
<ConstantSDNode
>(Op1
)) {
5002 uint64_t Index
= CIndexN
->getZExtValue();
5003 unsigned Mask
= VecVT
.getVectorNumElements() - 1;
5008 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5009 MVT IntVT
= MVT::getIntegerVT(VT
.getSizeInBits());
5010 MVT IntVecVT
= MVT::getVectorVT(IntVT
, VecVT
.getVectorNumElements());
5011 SDValue Res
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, IntVT
,
5012 DAG
.getNode(ISD::BITCAST
, DL
, IntVecVT
, Op0
), Op1
);
5013 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Res
);
5017 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op
, SelectionDAG
&DAG
,
5018 unsigned UnpackHigh
) const {
5019 SDValue PackedOp
= Op
.getOperand(0);
5020 EVT OutVT
= Op
.getValueType();
5021 EVT InVT
= PackedOp
.getValueType();
5022 unsigned ToBits
= OutVT
.getScalarSizeInBits();
5023 unsigned FromBits
= InVT
.getScalarSizeInBits();
5026 EVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(FromBits
),
5027 SystemZ::VectorBits
/ FromBits
);
5028 PackedOp
= DAG
.getNode(UnpackHigh
, SDLoc(PackedOp
), OutVT
, PackedOp
);
5029 } while (FromBits
!= ToBits
);
5033 SDValue
SystemZTargetLowering::lowerShift(SDValue Op
, SelectionDAG
&DAG
,
5034 unsigned ByScalar
) const {
5035 // Look for cases where a vector shift can use the *_BY_SCALAR form.
5036 SDValue Op0
= Op
.getOperand(0);
5037 SDValue Op1
= Op
.getOperand(1);
5039 EVT VT
= Op
.getValueType();
5040 unsigned ElemBitSize
= VT
.getScalarSizeInBits();
5042 // See whether the shift vector is a splat represented as BUILD_VECTOR.
5043 if (auto *BVN
= dyn_cast
<BuildVectorSDNode
>(Op1
)) {
5044 APInt SplatBits
, SplatUndef
;
5045 unsigned SplatBitSize
;
5047 // Check for constant splats. Use ElemBitSize as the minimum element
5048 // width and reject splats that need wider elements.
5049 if (BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
5050 ElemBitSize
, true) &&
5051 SplatBitSize
== ElemBitSize
) {
5052 SDValue Shift
= DAG
.getConstant(SplatBits
.getZExtValue() & 0xfff,
5054 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
5056 // Check for variable splats.
5057 BitVector UndefElements
;
5058 SDValue Splat
= BVN
->getSplatValue(&UndefElements
);
5060 // Since i32 is the smallest legal type, we either need a no-op
5062 SDValue Shift
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Splat
);
5063 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
5067 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
5068 // and the shift amount is directly available in a GPR.
5069 if (auto *VSN
= dyn_cast
<ShuffleVectorSDNode
>(Op1
)) {
5070 if (VSN
->isSplat()) {
5071 SDValue VSNOp0
= VSN
->getOperand(0);
5072 unsigned Index
= VSN
->getSplatIndex();
5073 assert(Index
< VT
.getVectorNumElements() &&
5074 "Splat index should be defined and in first operand");
5075 if ((Index
== 0 && VSNOp0
.getOpcode() == ISD::SCALAR_TO_VECTOR
) ||
5076 VSNOp0
.getOpcode() == ISD::BUILD_VECTOR
) {
5077 // Since i32 is the smallest legal type, we either need a no-op
5079 SDValue Shift
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
,
5080 VSNOp0
.getOperand(Index
));
5081 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
5086 // Otherwise just treat the current form as legal.
5090 SDValue
SystemZTargetLowering::LowerOperation(SDValue Op
,
5091 SelectionDAG
&DAG
) const {
5092 switch (Op
.getOpcode()) {
5093 case ISD::FRAMEADDR
:
5094 return lowerFRAMEADDR(Op
, DAG
);
5095 case ISD::RETURNADDR
:
5096 return lowerRETURNADDR(Op
, DAG
);
5098 return lowerBR_CC(Op
, DAG
);
5099 case ISD::SELECT_CC
:
5100 return lowerSELECT_CC(Op
, DAG
);
5102 return lowerSETCC(Op
, DAG
);
5103 case ISD::STRICT_FSETCC
:
5104 return lowerSTRICT_FSETCC(Op
, DAG
, false);
5105 case ISD::STRICT_FSETCCS
:
5106 return lowerSTRICT_FSETCC(Op
, DAG
, true);
5107 case ISD::GlobalAddress
:
5108 return lowerGlobalAddress(cast
<GlobalAddressSDNode
>(Op
), DAG
);
5109 case ISD::GlobalTLSAddress
:
5110 return lowerGlobalTLSAddress(cast
<GlobalAddressSDNode
>(Op
), DAG
);
5111 case ISD::BlockAddress
:
5112 return lowerBlockAddress(cast
<BlockAddressSDNode
>(Op
), DAG
);
5113 case ISD::JumpTable
:
5114 return lowerJumpTable(cast
<JumpTableSDNode
>(Op
), DAG
);
5115 case ISD::ConstantPool
:
5116 return lowerConstantPool(cast
<ConstantPoolSDNode
>(Op
), DAG
);
5118 return lowerBITCAST(Op
, DAG
);
5120 return lowerVASTART(Op
, DAG
);
5122 return lowerVACOPY(Op
, DAG
);
5123 case ISD::DYNAMIC_STACKALLOC
:
5124 return lowerDYNAMIC_STACKALLOC(Op
, DAG
);
5125 case ISD::GET_DYNAMIC_AREA_OFFSET
:
5126 return lowerGET_DYNAMIC_AREA_OFFSET(Op
, DAG
);
5127 case ISD::SMUL_LOHI
:
5128 return lowerSMUL_LOHI(Op
, DAG
);
5129 case ISD::UMUL_LOHI
:
5130 return lowerUMUL_LOHI(Op
, DAG
);
5132 return lowerSDIVREM(Op
, DAG
);
5134 return lowerUDIVREM(Op
, DAG
);
5139 return lowerXALUO(Op
, DAG
);
5142 return lowerADDSUBCARRY(Op
, DAG
);
5144 return lowerOR(Op
, DAG
);
5146 return lowerCTPOP(Op
, DAG
);
5147 case ISD::ATOMIC_FENCE
:
5148 return lowerATOMIC_FENCE(Op
, DAG
);
5149 case ISD::ATOMIC_SWAP
:
5150 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_SWAPW
);
5151 case ISD::ATOMIC_STORE
:
5152 return lowerATOMIC_STORE(Op
, DAG
);
5153 case ISD::ATOMIC_LOAD
:
5154 return lowerATOMIC_LOAD(Op
, DAG
);
5155 case ISD::ATOMIC_LOAD_ADD
:
5156 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_ADD
);
5157 case ISD::ATOMIC_LOAD_SUB
:
5158 return lowerATOMIC_LOAD_SUB(Op
, DAG
);
5159 case ISD::ATOMIC_LOAD_AND
:
5160 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_AND
);
5161 case ISD::ATOMIC_LOAD_OR
:
5162 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_OR
);
5163 case ISD::ATOMIC_LOAD_XOR
:
5164 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_XOR
);
5165 case ISD::ATOMIC_LOAD_NAND
:
5166 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_NAND
);
5167 case ISD::ATOMIC_LOAD_MIN
:
5168 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_MIN
);
5169 case ISD::ATOMIC_LOAD_MAX
:
5170 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_MAX
);
5171 case ISD::ATOMIC_LOAD_UMIN
:
5172 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_UMIN
);
5173 case ISD::ATOMIC_LOAD_UMAX
:
5174 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_UMAX
);
5175 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
:
5176 return lowerATOMIC_CMP_SWAP(Op
, DAG
);
5177 case ISD::STACKSAVE
:
5178 return lowerSTACKSAVE(Op
, DAG
);
5179 case ISD::STACKRESTORE
:
5180 return lowerSTACKRESTORE(Op
, DAG
);
5182 return lowerPREFETCH(Op
, DAG
);
5183 case ISD::INTRINSIC_W_CHAIN
:
5184 return lowerINTRINSIC_W_CHAIN(Op
, DAG
);
5185 case ISD::INTRINSIC_WO_CHAIN
:
5186 return lowerINTRINSIC_WO_CHAIN(Op
, DAG
);
5187 case ISD::BUILD_VECTOR
:
5188 return lowerBUILD_VECTOR(Op
, DAG
);
5189 case ISD::VECTOR_SHUFFLE
:
5190 return lowerVECTOR_SHUFFLE(Op
, DAG
);
5191 case ISD::SCALAR_TO_VECTOR
:
5192 return lowerSCALAR_TO_VECTOR(Op
, DAG
);
5193 case ISD::INSERT_VECTOR_ELT
:
5194 return lowerINSERT_VECTOR_ELT(Op
, DAG
);
5195 case ISD::EXTRACT_VECTOR_ELT
:
5196 return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
5197 case ISD::SIGN_EXTEND_VECTOR_INREG
:
5198 return lowerExtendVectorInreg(Op
, DAG
, SystemZISD::UNPACK_HIGH
);
5199 case ISD::ZERO_EXTEND_VECTOR_INREG
:
5200 return lowerExtendVectorInreg(Op
, DAG
, SystemZISD::UNPACKL_HIGH
);
5202 return lowerShift(Op
, DAG
, SystemZISD::VSHL_BY_SCALAR
);
5204 return lowerShift(Op
, DAG
, SystemZISD::VSRL_BY_SCALAR
);
5206 return lowerShift(Op
, DAG
, SystemZISD::VSRA_BY_SCALAR
);
5208 llvm_unreachable("Unexpected node to lower");
5212 // Lower operations with invalid operand or result types (currently used
5213 // only for 128-bit integer types).
5215 static SDValue
lowerI128ToGR128(SelectionDAG
&DAG
, SDValue In
) {
5217 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i64
, In
,
5218 DAG
.getIntPtrConstant(0, DL
));
5219 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i64
, In
,
5220 DAG
.getIntPtrConstant(1, DL
));
5221 SDNode
*Pair
= DAG
.getMachineNode(SystemZ::PAIR128
, DL
,
5222 MVT::Untyped
, Hi
, Lo
);
5223 return SDValue(Pair
, 0);
5226 static SDValue
lowerGR128ToI128(SelectionDAG
&DAG
, SDValue In
) {
5228 SDValue Hi
= DAG
.getTargetExtractSubreg(SystemZ::subreg_h64
,
5230 SDValue Lo
= DAG
.getTargetExtractSubreg(SystemZ::subreg_l64
,
5232 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i128
, Lo
, Hi
);
5236 SystemZTargetLowering::LowerOperationWrapper(SDNode
*N
,
5237 SmallVectorImpl
<SDValue
> &Results
,
5238 SelectionDAG
&DAG
) const {
5239 switch (N
->getOpcode()) {
5240 case ISD::ATOMIC_LOAD
: {
5242 SDVTList Tys
= DAG
.getVTList(MVT::Untyped
, MVT::Other
);
5243 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1) };
5244 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
5245 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128
,
5246 DL
, Tys
, Ops
, MVT::i128
, MMO
);
5247 Results
.push_back(lowerGR128ToI128(DAG
, Res
));
5248 Results
.push_back(Res
.getValue(1));
5251 case ISD::ATOMIC_STORE
: {
5253 SDVTList Tys
= DAG
.getVTList(MVT::Other
);
5254 SDValue Ops
[] = { N
->getOperand(0),
5255 lowerI128ToGR128(DAG
, N
->getOperand(2)),
5257 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
5258 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128
,
5259 DL
, Tys
, Ops
, MVT::i128
, MMO
);
5260 // We have to enforce sequential consistency by performing a
5261 // serialization operation after the store.
5262 if (cast
<AtomicSDNode
>(N
)->getOrdering() ==
5263 AtomicOrdering::SequentiallyConsistent
)
5264 Res
= SDValue(DAG
.getMachineNode(SystemZ::Serialize
, DL
,
5265 MVT::Other
, Res
), 0);
5266 Results
.push_back(Res
);
5269 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
: {
5271 SDVTList Tys
= DAG
.getVTList(MVT::Untyped
, MVT::i32
, MVT::Other
);
5272 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1),
5273 lowerI128ToGR128(DAG
, N
->getOperand(2)),
5274 lowerI128ToGR128(DAG
, N
->getOperand(3)) };
5275 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
5276 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128
,
5277 DL
, Tys
, Ops
, MVT::i128
, MMO
);
5278 SDValue Success
= emitSETCC(DAG
, DL
, Res
.getValue(1),
5279 SystemZ::CCMASK_CS
, SystemZ::CCMASK_CS_EQ
);
5280 Success
= DAG
.getZExtOrTrunc(Success
, DL
, N
->getValueType(1));
5281 Results
.push_back(lowerGR128ToI128(DAG
, Res
));
5282 Results
.push_back(Success
);
5283 Results
.push_back(Res
.getValue(2));
5287 llvm_unreachable("Unexpected node to lower");
5292 SystemZTargetLowering::ReplaceNodeResults(SDNode
*N
,
5293 SmallVectorImpl
<SDValue
> &Results
,
5294 SelectionDAG
&DAG
) const {
5295 return LowerOperationWrapper(N
, Results
, DAG
);
5298 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode
) const {
5299 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
5300 switch ((SystemZISD::NodeType
)Opcode
) {
5301 case SystemZISD::FIRST_NUMBER
: break;
5307 OPCODE(PCREL_WRAPPER
);
5308 OPCODE(PCREL_OFFSET
);
5312 OPCODE(STRICT_FCMP
);
5313 OPCODE(STRICT_FCMPS
);
5316 OPCODE(SELECT_CCMASK
);
5317 OPCODE(ADJDYNALLOC
);
5342 OPCODE(SEARCH_STRING
);
5346 OPCODE(TBEGIN_NOFLOAT
);
5349 OPCODE(ROTATE_MASK
);
5351 OPCODE(JOIN_DWORDS
);
5356 OPCODE(PERMUTE_DWORDS
);
5361 OPCODE(UNPACK_HIGH
);
5362 OPCODE(UNPACKL_HIGH
);
5364 OPCODE(UNPACKL_LOW
);
5365 OPCODE(VSHL_BY_SCALAR
);
5366 OPCODE(VSRL_BY_SCALAR
);
5367 OPCODE(VSRA_BY_SCALAR
);
5376 OPCODE(STRICT_VFCMPE
);
5377 OPCODE(STRICT_VFCMPES
);
5379 OPCODE(STRICT_VFCMPH
);
5380 OPCODE(STRICT_VFCMPHS
);
5382 OPCODE(STRICT_VFCMPHE
);
5383 OPCODE(STRICT_VFCMPHES
);
5389 OPCODE(STRICT_VEXTEND
);
5391 OPCODE(STRICT_VROUND
);
5405 OPCODE(ATOMIC_SWAPW
);
5406 OPCODE(ATOMIC_LOADW_ADD
);
5407 OPCODE(ATOMIC_LOADW_SUB
);
5408 OPCODE(ATOMIC_LOADW_AND
);
5409 OPCODE(ATOMIC_LOADW_OR
);
5410 OPCODE(ATOMIC_LOADW_XOR
);
5411 OPCODE(ATOMIC_LOADW_NAND
);
5412 OPCODE(ATOMIC_LOADW_MIN
);
5413 OPCODE(ATOMIC_LOADW_MAX
);
5414 OPCODE(ATOMIC_LOADW_UMIN
);
5415 OPCODE(ATOMIC_LOADW_UMAX
);
5416 OPCODE(ATOMIC_CMP_SWAPW
);
5417 OPCODE(ATOMIC_CMP_SWAP
);
5418 OPCODE(ATOMIC_LOAD_128
);
5419 OPCODE(ATOMIC_STORE_128
);
5420 OPCODE(ATOMIC_CMP_SWAP_128
);
5431 // Return true if VT is a vector whose elements are a whole number of bytes
5432 // in width. Also check for presence of vector support.
5433 bool SystemZTargetLowering::canTreatAsByteVector(EVT VT
) const {
5434 if (!Subtarget
.hasVector())
5437 return VT
.isVector() && VT
.getScalarSizeInBits() % 8 == 0 && VT
.isSimple();
5440 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
5441 // producing a result of type ResVT. Op is a possibly bitcast version
5442 // of the input vector and Index is the index (based on type VecVT) that
5443 // should be extracted. Return the new extraction if a simplification
5444 // was possible or if Force is true.
5445 SDValue
SystemZTargetLowering::combineExtract(const SDLoc
&DL
, EVT ResVT
,
5446 EVT VecVT
, SDValue Op
,
5448 DAGCombinerInfo
&DCI
,
5450 SelectionDAG
&DAG
= DCI
.DAG
;
5452 // The number of bytes being extracted.
5453 unsigned BytesPerElement
= VecVT
.getVectorElementType().getStoreSize();
5456 unsigned Opcode
= Op
.getOpcode();
5457 if (Opcode
== ISD::BITCAST
)
5458 // Look through bitcasts.
5459 Op
= Op
.getOperand(0);
5460 else if ((Opcode
== ISD::VECTOR_SHUFFLE
|| Opcode
== SystemZISD::SPLAT
) &&
5461 canTreatAsByteVector(Op
.getValueType())) {
5462 // Get a VPERM-like permute mask and see whether the bytes covered
5463 // by the extracted element are a contiguous sequence from one
5465 SmallVector
<int, SystemZ::VectorBytes
> Bytes
;
5466 if (!getVPermMask(Op
, Bytes
))
5469 if (!getShuffleInput(Bytes
, Index
* BytesPerElement
,
5470 BytesPerElement
, First
))
5473 return DAG
.getUNDEF(ResVT
);
5474 // Make sure the contiguous sequence starts at a multiple of the
5475 // original element size.
5476 unsigned Byte
= unsigned(First
) % Bytes
.size();
5477 if (Byte
% BytesPerElement
!= 0)
5479 // We can get the extracted value directly from an input.
5480 Index
= Byte
/ BytesPerElement
;
5481 Op
= Op
.getOperand(unsigned(First
) / Bytes
.size());
5483 } else if (Opcode
== ISD::BUILD_VECTOR
&&
5484 canTreatAsByteVector(Op
.getValueType())) {
5485 // We can only optimize this case if the BUILD_VECTOR elements are
5486 // at least as wide as the extracted value.
5487 EVT OpVT
= Op
.getValueType();
5488 unsigned OpBytesPerElement
= OpVT
.getVectorElementType().getStoreSize();
5489 if (OpBytesPerElement
< BytesPerElement
)
5491 // Make sure that the least-significant bit of the extracted value
5492 // is the least significant bit of an input.
5493 unsigned End
= (Index
+ 1) * BytesPerElement
;
5494 if (End
% OpBytesPerElement
!= 0)
5496 // We're extracting the low part of one operand of the BUILD_VECTOR.
5497 Op
= Op
.getOperand(End
/ OpBytesPerElement
- 1);
5498 if (!Op
.getValueType().isInteger()) {
5499 EVT VT
= MVT::getIntegerVT(Op
.getValueSizeInBits());
5500 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
5501 DCI
.AddToWorklist(Op
.getNode());
5503 EVT VT
= MVT::getIntegerVT(ResVT
.getSizeInBits());
5504 Op
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Op
);
5506 DCI
.AddToWorklist(Op
.getNode());
5507 Op
= DAG
.getNode(ISD::BITCAST
, DL
, ResVT
, Op
);
5510 } else if ((Opcode
== ISD::SIGN_EXTEND_VECTOR_INREG
||
5511 Opcode
== ISD::ZERO_EXTEND_VECTOR_INREG
||
5512 Opcode
== ISD::ANY_EXTEND_VECTOR_INREG
) &&
5513 canTreatAsByteVector(Op
.getValueType()) &&
5514 canTreatAsByteVector(Op
.getOperand(0).getValueType())) {
5515 // Make sure that only the unextended bits are significant.
5516 EVT ExtVT
= Op
.getValueType();
5517 EVT OpVT
= Op
.getOperand(0).getValueType();
5518 unsigned ExtBytesPerElement
= ExtVT
.getVectorElementType().getStoreSize();
5519 unsigned OpBytesPerElement
= OpVT
.getVectorElementType().getStoreSize();
5520 unsigned Byte
= Index
* BytesPerElement
;
5521 unsigned SubByte
= Byte
% ExtBytesPerElement
;
5522 unsigned MinSubByte
= ExtBytesPerElement
- OpBytesPerElement
;
5523 if (SubByte
< MinSubByte
||
5524 SubByte
+ BytesPerElement
> ExtBytesPerElement
)
5526 // Get the byte offset of the unextended element
5527 Byte
= Byte
/ ExtBytesPerElement
* OpBytesPerElement
;
5528 // ...then add the byte offset relative to that element.
5529 Byte
+= SubByte
- MinSubByte
;
5530 if (Byte
% BytesPerElement
!= 0)
5532 Op
= Op
.getOperand(0);
5533 Index
= Byte
/ BytesPerElement
;
5539 if (Op
.getValueType() != VecVT
) {
5540 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VecVT
, Op
);
5541 DCI
.AddToWorklist(Op
.getNode());
5543 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, ResVT
, Op
,
5544 DAG
.getConstant(Index
, DL
, MVT::i32
));
5549 // Optimize vector operations in scalar value Op on the basis that Op
5550 // is truncated to TruncVT.
5551 SDValue
SystemZTargetLowering::combineTruncateExtract(
5552 const SDLoc
&DL
, EVT TruncVT
, SDValue Op
, DAGCombinerInfo
&DCI
) const {
5553 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
5554 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
5556 if (Op
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5557 TruncVT
.getSizeInBits() % 8 == 0) {
5558 SDValue Vec
= Op
.getOperand(0);
5559 EVT VecVT
= Vec
.getValueType();
5560 if (canTreatAsByteVector(VecVT
)) {
5561 if (auto *IndexN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
5562 unsigned BytesPerElement
= VecVT
.getVectorElementType().getStoreSize();
5563 unsigned TruncBytes
= TruncVT
.getStoreSize();
5564 if (BytesPerElement
% TruncBytes
== 0) {
5565 // Calculate the value of Y' in the above description. We are
5566 // splitting the original elements into Scale equal-sized pieces
5567 // and for truncation purposes want the last (least-significant)
5568 // of these pieces for IndexN. This is easiest to do by calculating
5569 // the start index of the following element and then subtracting 1.
5570 unsigned Scale
= BytesPerElement
/ TruncBytes
;
5571 unsigned NewIndex
= (IndexN
->getZExtValue() + 1) * Scale
- 1;
5573 // Defer the creation of the bitcast from X to combineExtract,
5574 // which might be able to optimize the extraction.
5575 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(TruncBytes
* 8),
5576 VecVT
.getStoreSize() / TruncBytes
);
5577 EVT ResVT
= (TruncBytes
< 4 ? MVT::i32
: TruncVT
);
5578 return combineExtract(DL
, ResVT
, VecVT
, Vec
, NewIndex
, DCI
, true);
5586 SDValue
SystemZTargetLowering::combineZERO_EXTEND(
5587 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5588 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
5589 SelectionDAG
&DAG
= DCI
.DAG
;
5590 SDValue N0
= N
->getOperand(0);
5591 EVT VT
= N
->getValueType(0);
5592 if (N0
.getOpcode() == SystemZISD::SELECT_CCMASK
) {
5593 auto *TrueOp
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(0));
5594 auto *FalseOp
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5595 if (TrueOp
&& FalseOp
) {
5597 SDValue Ops
[] = { DAG
.getConstant(TrueOp
->getZExtValue(), DL
, VT
),
5598 DAG
.getConstant(FalseOp
->getZExtValue(), DL
, VT
),
5599 N0
.getOperand(2), N0
.getOperand(3), N0
.getOperand(4) };
5600 SDValue NewSelect
= DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, VT
, Ops
);
5601 // If N0 has multiple uses, change other uses as well.
5602 if (!N0
.hasOneUse()) {
5603 SDValue TruncSelect
=
5604 DAG
.getNode(ISD::TRUNCATE
, DL
, N0
.getValueType(), NewSelect
);
5605 DCI
.CombineTo(N0
.getNode(), TruncSelect
);
5613 SDValue
SystemZTargetLowering::combineSIGN_EXTEND_INREG(
5614 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5615 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
5616 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
5617 // into (select_cc LHS, RHS, -1, 0, COND)
5618 SelectionDAG
&DAG
= DCI
.DAG
;
5619 SDValue N0
= N
->getOperand(0);
5620 EVT VT
= N
->getValueType(0);
5621 EVT EVT
= cast
<VTSDNode
>(N
->getOperand(1))->getVT();
5622 if (N0
.hasOneUse() && N0
.getOpcode() == ISD::ANY_EXTEND
)
5623 N0
= N0
.getOperand(0);
5624 if (EVT
== MVT::i1
&& N0
.hasOneUse() && N0
.getOpcode() == ISD::SETCC
) {
5626 SDValue Ops
[] = { N0
.getOperand(0), N0
.getOperand(1),
5627 DAG
.getConstant(-1, DL
, VT
), DAG
.getConstant(0, DL
, VT
),
5629 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, Ops
);
5634 SDValue
SystemZTargetLowering::combineSIGN_EXTEND(
5635 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5636 // Convert (sext (ashr (shl X, C1), C2)) to
5637 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
5638 // cheap as narrower ones.
5639 SelectionDAG
&DAG
= DCI
.DAG
;
5640 SDValue N0
= N
->getOperand(0);
5641 EVT VT
= N
->getValueType(0);
5642 if (N0
.hasOneUse() && N0
.getOpcode() == ISD::SRA
) {
5643 auto *SraAmt
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5644 SDValue Inner
= N0
.getOperand(0);
5645 if (SraAmt
&& Inner
.hasOneUse() && Inner
.getOpcode() == ISD::SHL
) {
5646 if (auto *ShlAmt
= dyn_cast
<ConstantSDNode
>(Inner
.getOperand(1))) {
5647 unsigned Extra
= (VT
.getSizeInBits() - N0
.getValueSizeInBits());
5648 unsigned NewShlAmt
= ShlAmt
->getZExtValue() + Extra
;
5649 unsigned NewSraAmt
= SraAmt
->getZExtValue() + Extra
;
5650 EVT ShiftVT
= N0
.getOperand(1).getValueType();
5651 SDValue Ext
= DAG
.getNode(ISD::ANY_EXTEND
, SDLoc(Inner
), VT
,
5652 Inner
.getOperand(0));
5653 SDValue Shl
= DAG
.getNode(ISD::SHL
, SDLoc(Inner
), VT
, Ext
,
5654 DAG
.getConstant(NewShlAmt
, SDLoc(Inner
),
5656 return DAG
.getNode(ISD::SRA
, SDLoc(N0
), VT
, Shl
,
5657 DAG
.getConstant(NewSraAmt
, SDLoc(N0
), ShiftVT
));
5664 SDValue
SystemZTargetLowering::combineMERGE(
5665 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5666 SelectionDAG
&DAG
= DCI
.DAG
;
5667 unsigned Opcode
= N
->getOpcode();
5668 SDValue Op0
= N
->getOperand(0);
5669 SDValue Op1
= N
->getOperand(1);
5670 if (Op0
.getOpcode() == ISD::BITCAST
)
5671 Op0
= Op0
.getOperand(0);
5672 if (ISD::isBuildVectorAllZeros(Op0
.getNode())) {
5673 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
5675 if (Op1
== N
->getOperand(0))
5677 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
5678 EVT VT
= Op1
.getValueType();
5679 unsigned ElemBytes
= VT
.getVectorElementType().getStoreSize();
5680 if (ElemBytes
<= 4) {
5681 Opcode
= (Opcode
== SystemZISD::MERGE_HIGH
?
5682 SystemZISD::UNPACKL_HIGH
: SystemZISD::UNPACKL_LOW
);
5683 EVT InVT
= VT
.changeVectorElementTypeToInteger();
5684 EVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(ElemBytes
* 16),
5685 SystemZ::VectorBytes
/ ElemBytes
/ 2);
5687 Op1
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), InVT
, Op1
);
5688 DCI
.AddToWorklist(Op1
.getNode());
5690 SDValue Op
= DAG
.getNode(Opcode
, SDLoc(N
), OutVT
, Op1
);
5691 DCI
.AddToWorklist(Op
.getNode());
5692 return DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VT
, Op
);
5698 SDValue
SystemZTargetLowering::combineLOAD(
5699 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5700 SelectionDAG
&DAG
= DCI
.DAG
;
5701 EVT LdVT
= N
->getValueType(0);
5702 if (LdVT
.isVector() || LdVT
.isInteger())
5704 // Transform a scalar load that is REPLICATEd as well as having other
5705 // use(s) to the form where the other use(s) use the first element of the
5706 // REPLICATE instead of the load. Otherwise instruction selection will not
5707 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
5711 SmallVector
<SDNode
*, 8> OtherUses
;
5712 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
5714 if (UI
->getOpcode() == SystemZISD::REPLICATE
) {
5716 return SDValue(); // Should never happen
5717 Replicate
= SDValue(*UI
, 0);
5719 else if (UI
.getUse().getResNo() == 0)
5720 OtherUses
.push_back(*UI
);
5722 if (!Replicate
|| OtherUses
.empty())
5726 SDValue Extract0
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, LdVT
,
5727 Replicate
, DAG
.getConstant(0, DL
, MVT::i32
));
5728 // Update uses of the loaded Value while preserving old chains.
5729 for (SDNode
*U
: OtherUses
) {
5730 SmallVector
<SDValue
, 8> Ops
;
5731 for (SDValue Op
: U
->ops())
5732 Ops
.push_back((Op
.getNode() == N
&& Op
.getResNo() == 0) ? Extract0
: Op
);
5733 DAG
.UpdateNodeOperands(U
, Ops
);
5735 return SDValue(N
, 0);
5738 bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT
) const {
5739 if (VT
== MVT::i16
|| VT
== MVT::i32
|| VT
== MVT::i64
)
5741 if (Subtarget
.hasVectorEnhancements2())
5742 if (VT
== MVT::v8i16
|| VT
== MVT::v4i32
|| VT
== MVT::v2i64
)
5747 static bool isVectorElementSwap(ArrayRef
<int> M
, EVT VT
) {
5748 if (!VT
.isVector() || !VT
.isSimple() ||
5749 VT
.getSizeInBits() != 128 ||
5750 VT
.getScalarSizeInBits() % 8 != 0)
5753 unsigned NumElts
= VT
.getVectorNumElements();
5754 for (unsigned i
= 0; i
< NumElts
; ++i
) {
5755 if (M
[i
] < 0) continue; // ignore UNDEF indices
5756 if ((unsigned) M
[i
] != NumElts
- 1 - i
)
5763 SDValue
SystemZTargetLowering::combineSTORE(
5764 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5765 SelectionDAG
&DAG
= DCI
.DAG
;
5766 auto *SN
= cast
<StoreSDNode
>(N
);
5767 auto &Op1
= N
->getOperand(1);
5768 EVT MemVT
= SN
->getMemoryVT();
5769 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
5770 // for the extraction to be done on a vMiN value, so that we can use VSTE.
5771 // If X has wider elements then convert it to:
5772 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
5773 if (MemVT
.isInteger() && SN
->isTruncatingStore()) {
5775 combineTruncateExtract(SDLoc(N
), MemVT
, SN
->getValue(), DCI
)) {
5776 DCI
.AddToWorklist(Value
.getNode());
5778 // Rewrite the store with the new form of stored value.
5779 return DAG
.getTruncStore(SN
->getChain(), SDLoc(SN
), Value
,
5780 SN
->getBasePtr(), SN
->getMemoryVT(),
5781 SN
->getMemOperand());
5784 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
5785 if (!SN
->isTruncatingStore() &&
5786 Op1
.getOpcode() == ISD::BSWAP
&&
5787 Op1
.getNode()->hasOneUse() &&
5788 canLoadStoreByteSwapped(Op1
.getValueType())) {
5790 SDValue BSwapOp
= Op1
.getOperand(0);
5792 if (BSwapOp
.getValueType() == MVT::i16
)
5793 BSwapOp
= DAG
.getNode(ISD::ANY_EXTEND
, SDLoc(N
), MVT::i32
, BSwapOp
);
5796 N
->getOperand(0), BSwapOp
, N
->getOperand(2)
5800 DAG
.getMemIntrinsicNode(SystemZISD::STRV
, SDLoc(N
), DAG
.getVTList(MVT::Other
),
5801 Ops
, MemVT
, SN
->getMemOperand());
5803 // Combine STORE (element-swap) into VSTER
5804 if (!SN
->isTruncatingStore() &&
5805 Op1
.getOpcode() == ISD::VECTOR_SHUFFLE
&&
5806 Op1
.getNode()->hasOneUse() &&
5807 Subtarget
.hasVectorEnhancements2()) {
5808 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(Op1
.getNode());
5809 ArrayRef
<int> ShuffleMask
= SVN
->getMask();
5810 if (isVectorElementSwap(ShuffleMask
, Op1
.getValueType())) {
5812 N
->getOperand(0), Op1
.getOperand(0), N
->getOperand(2)
5815 return DAG
.getMemIntrinsicNode(SystemZISD::VSTER
, SDLoc(N
),
5816 DAG
.getVTList(MVT::Other
),
5817 Ops
, MemVT
, SN
->getMemOperand());
5824 SDValue
SystemZTargetLowering::combineVECTOR_SHUFFLE(
5825 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5826 SelectionDAG
&DAG
= DCI
.DAG
;
5827 // Combine element-swap (LOAD) into VLER
5828 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
5829 N
->getOperand(0).hasOneUse() &&
5830 Subtarget
.hasVectorEnhancements2()) {
5831 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
);
5832 ArrayRef
<int> ShuffleMask
= SVN
->getMask();
5833 if (isVectorElementSwap(ShuffleMask
, N
->getValueType(0))) {
5834 SDValue Load
= N
->getOperand(0);
5835 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
5837 // Create the element-swapping load.
5839 LD
->getChain(), // Chain
5840 LD
->getBasePtr() // Ptr
5843 DAG
.getMemIntrinsicNode(SystemZISD::VLER
, SDLoc(N
),
5844 DAG
.getVTList(LD
->getValueType(0), MVT::Other
),
5845 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
5847 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
5848 // by the load dead.
5849 DCI
.CombineTo(N
, ESLoad
);
5851 // Next, combine the load away, we give it a bogus result value but a real
5852 // chain result. The result value is dead because the shuffle is dead.
5853 DCI
.CombineTo(Load
.getNode(), ESLoad
, ESLoad
.getValue(1));
5855 // Return N so it doesn't get rechecked!
5856 return SDValue(N
, 0);
5863 SDValue
SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
5864 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5865 SelectionDAG
&DAG
= DCI
.DAG
;
5867 if (!Subtarget
.hasVector())
5870 // Look through bitcasts that retain the number of vector elements.
5871 SDValue Op
= N
->getOperand(0);
5872 if (Op
.getOpcode() == ISD::BITCAST
&&
5873 Op
.getValueType().isVector() &&
5874 Op
.getOperand(0).getValueType().isVector() &&
5875 Op
.getValueType().getVectorNumElements() ==
5876 Op
.getOperand(0).getValueType().getVectorNumElements())
5877 Op
= Op
.getOperand(0);
5879 // Pull BSWAP out of a vector extraction.
5880 if (Op
.getOpcode() == ISD::BSWAP
&& Op
.hasOneUse()) {
5881 EVT VecVT
= Op
.getValueType();
5882 EVT EltVT
= VecVT
.getVectorElementType();
5883 Op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(N
), EltVT
,
5884 Op
.getOperand(0), N
->getOperand(1));
5885 DCI
.AddToWorklist(Op
.getNode());
5886 Op
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), EltVT
, Op
);
5887 if (EltVT
!= N
->getValueType(0)) {
5888 DCI
.AddToWorklist(Op
.getNode());
5889 Op
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), N
->getValueType(0), Op
);
5894 // Try to simplify a vector extraction.
5895 if (auto *IndexN
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
5896 SDValue Op0
= N
->getOperand(0);
5897 EVT VecVT
= Op0
.getValueType();
5898 return combineExtract(SDLoc(N
), N
->getValueType(0), VecVT
, Op0
,
5899 IndexN
->getZExtValue(), DCI
, false);
5904 SDValue
SystemZTargetLowering::combineJOIN_DWORDS(
5905 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5906 SelectionDAG
&DAG
= DCI
.DAG
;
5907 // (join_dwords X, X) == (replicate X)
5908 if (N
->getOperand(0) == N
->getOperand(1))
5909 return DAG
.getNode(SystemZISD::REPLICATE
, SDLoc(N
), N
->getValueType(0),
5914 static SDValue
MergeInputChains(SDNode
*N1
, SDNode
*N2
) {
5915 SDValue Chain1
= N1
->getOperand(0);
5916 SDValue Chain2
= N2
->getOperand(0);
5918 // Trivial case: both nodes take the same chain.
5919 if (Chain1
== Chain2
)
5922 // FIXME - we could handle more complex cases via TokenFactor,
5923 // assuming we can verify that this would not create a cycle.
5927 SDValue
SystemZTargetLowering::combineFP_ROUND(
5928 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5930 if (!Subtarget
.hasVector())
5933 // (fpround (extract_vector_elt X 0))
5934 // (fpround (extract_vector_elt X 1)) ->
5935 // (extract_vector_elt (VROUND X) 0)
5936 // (extract_vector_elt (VROUND X) 2)
5938 // This is a special case since the target doesn't really support v2f32s.
5939 unsigned OpNo
= N
->isStrictFPOpcode() ? 1 : 0;
5940 SelectionDAG
&DAG
= DCI
.DAG
;
5941 SDValue Op0
= N
->getOperand(OpNo
);
5942 if (N
->getValueType(0) == MVT::f32
&&
5944 Op0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5945 Op0
.getOperand(0).getValueType() == MVT::v2f64
&&
5946 Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
5947 cast
<ConstantSDNode
>(Op0
.getOperand(1))->getZExtValue() == 0) {
5948 SDValue Vec
= Op0
.getOperand(0);
5949 for (auto *U
: Vec
->uses()) {
5950 if (U
!= Op0
.getNode() &&
5952 U
->getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5953 U
->getOperand(0) == Vec
&&
5954 U
->getOperand(1).getOpcode() == ISD::Constant
&&
5955 cast
<ConstantSDNode
>(U
->getOperand(1))->getZExtValue() == 1) {
5956 SDValue OtherRound
= SDValue(*U
->use_begin(), 0);
5957 if (OtherRound
.getOpcode() == N
->getOpcode() &&
5958 OtherRound
.getOperand(OpNo
) == SDValue(U
, 0) &&
5959 OtherRound
.getValueType() == MVT::f32
) {
5960 SDValue VRound
, Chain
;
5961 if (N
->isStrictFPOpcode()) {
5962 Chain
= MergeInputChains(N
, OtherRound
.getNode());
5965 VRound
= DAG
.getNode(SystemZISD::STRICT_VROUND
, SDLoc(N
),
5966 {MVT::v4f32
, MVT::Other
}, {Chain
, Vec
});
5967 Chain
= VRound
.getValue(1);
5969 VRound
= DAG
.getNode(SystemZISD::VROUND
, SDLoc(N
),
5971 DCI
.AddToWorklist(VRound
.getNode());
5973 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(U
), MVT::f32
,
5974 VRound
, DAG
.getConstant(2, SDLoc(U
), MVT::i32
));
5975 DCI
.AddToWorklist(Extract1
.getNode());
5976 DAG
.ReplaceAllUsesOfValueWith(OtherRound
, Extract1
);
5978 DAG
.ReplaceAllUsesOfValueWith(OtherRound
.getValue(1), Chain
);
5980 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op0
), MVT::f32
,
5981 VRound
, DAG
.getConstant(0, SDLoc(Op0
), MVT::i32
));
5983 return DAG
.getNode(ISD::MERGE_VALUES
, SDLoc(Op0
),
5984 N
->getVTList(), Extract0
, Chain
);
5993 SDValue
SystemZTargetLowering::combineFP_EXTEND(
5994 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5996 if (!Subtarget
.hasVector())
5999 // (fpextend (extract_vector_elt X 0))
6000 // (fpextend (extract_vector_elt X 2)) ->
6001 // (extract_vector_elt (VEXTEND X) 0)
6002 // (extract_vector_elt (VEXTEND X) 1)
6004 // This is a special case since the target doesn't really support v2f32s.
6005 unsigned OpNo
= N
->isStrictFPOpcode() ? 1 : 0;
6006 SelectionDAG
&DAG
= DCI
.DAG
;
6007 SDValue Op0
= N
->getOperand(OpNo
);
6008 if (N
->getValueType(0) == MVT::f64
&&
6010 Op0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
6011 Op0
.getOperand(0).getValueType() == MVT::v4f32
&&
6012 Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
6013 cast
<ConstantSDNode
>(Op0
.getOperand(1))->getZExtValue() == 0) {
6014 SDValue Vec
= Op0
.getOperand(0);
6015 for (auto *U
: Vec
->uses()) {
6016 if (U
!= Op0
.getNode() &&
6018 U
->getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
6019 U
->getOperand(0) == Vec
&&
6020 U
->getOperand(1).getOpcode() == ISD::Constant
&&
6021 cast
<ConstantSDNode
>(U
->getOperand(1))->getZExtValue() == 2) {
6022 SDValue OtherExtend
= SDValue(*U
->use_begin(), 0);
6023 if (OtherExtend
.getOpcode() == N
->getOpcode() &&
6024 OtherExtend
.getOperand(OpNo
) == SDValue(U
, 0) &&
6025 OtherExtend
.getValueType() == MVT::f64
) {
6026 SDValue VExtend
, Chain
;
6027 if (N
->isStrictFPOpcode()) {
6028 Chain
= MergeInputChains(N
, OtherExtend
.getNode());
6031 VExtend
= DAG
.getNode(SystemZISD::STRICT_VEXTEND
, SDLoc(N
),
6032 {MVT::v2f64
, MVT::Other
}, {Chain
, Vec
});
6033 Chain
= VExtend
.getValue(1);
6035 VExtend
= DAG
.getNode(SystemZISD::VEXTEND
, SDLoc(N
),
6037 DCI
.AddToWorklist(VExtend
.getNode());
6039 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(U
), MVT::f64
,
6040 VExtend
, DAG
.getConstant(1, SDLoc(U
), MVT::i32
));
6041 DCI
.AddToWorklist(Extract1
.getNode());
6042 DAG
.ReplaceAllUsesOfValueWith(OtherExtend
, Extract1
);
6044 DAG
.ReplaceAllUsesOfValueWith(OtherExtend
.getValue(1), Chain
);
6046 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op0
), MVT::f64
,
6047 VExtend
, DAG
.getConstant(0, SDLoc(Op0
), MVT::i32
));
6049 return DAG
.getNode(ISD::MERGE_VALUES
, SDLoc(Op0
),
6050 N
->getVTList(), Extract0
, Chain
);
6059 SDValue
SystemZTargetLowering::combineBSWAP(
6060 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6061 SelectionDAG
&DAG
= DCI
.DAG
;
6062 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
6063 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
6064 N
->getOperand(0).hasOneUse() &&
6065 canLoadStoreByteSwapped(N
->getValueType(0))) {
6066 SDValue Load
= N
->getOperand(0);
6067 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
6069 // Create the byte-swapping load.
6071 LD
->getChain(), // Chain
6072 LD
->getBasePtr() // Ptr
6074 EVT LoadVT
= N
->getValueType(0);
6075 if (LoadVT
== MVT::i16
)
6078 DAG
.getMemIntrinsicNode(SystemZISD::LRV
, SDLoc(N
),
6079 DAG
.getVTList(LoadVT
, MVT::Other
),
6080 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
6082 // If this is an i16 load, insert the truncate.
6083 SDValue ResVal
= BSLoad
;
6084 if (N
->getValueType(0) == MVT::i16
)
6085 ResVal
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), MVT::i16
, BSLoad
);
6087 // First, combine the bswap away. This makes the value produced by the
6089 DCI
.CombineTo(N
, ResVal
);
6091 // Next, combine the load away, we give it a bogus result value but a real
6092 // chain result. The result value is dead because the bswap is dead.
6093 DCI
.CombineTo(Load
.getNode(), ResVal
, BSLoad
.getValue(1));
6095 // Return N so it doesn't get rechecked!
6096 return SDValue(N
, 0);
6099 // Look through bitcasts that retain the number of vector elements.
6100 SDValue Op
= N
->getOperand(0);
6101 if (Op
.getOpcode() == ISD::BITCAST
&&
6102 Op
.getValueType().isVector() &&
6103 Op
.getOperand(0).getValueType().isVector() &&
6104 Op
.getValueType().getVectorNumElements() ==
6105 Op
.getOperand(0).getValueType().getVectorNumElements())
6106 Op
= Op
.getOperand(0);
6108 // Push BSWAP into a vector insertion if at least one side then simplifies.
6109 if (Op
.getOpcode() == ISD::INSERT_VECTOR_ELT
&& Op
.hasOneUse()) {
6110 SDValue Vec
= Op
.getOperand(0);
6111 SDValue Elt
= Op
.getOperand(1);
6112 SDValue Idx
= Op
.getOperand(2);
6114 if (DAG
.isConstantIntBuildVectorOrConstantInt(Vec
) ||
6115 Vec
.getOpcode() == ISD::BSWAP
|| Vec
.isUndef() ||
6116 DAG
.isConstantIntBuildVectorOrConstantInt(Elt
) ||
6117 Elt
.getOpcode() == ISD::BSWAP
|| Elt
.isUndef() ||
6118 (canLoadStoreByteSwapped(N
->getValueType(0)) &&
6119 ISD::isNON_EXTLoad(Elt
.getNode()) && Elt
.hasOneUse())) {
6120 EVT VecVT
= N
->getValueType(0);
6121 EVT EltVT
= N
->getValueType(0).getVectorElementType();
6122 if (VecVT
!= Vec
.getValueType()) {
6123 Vec
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VecVT
, Vec
);
6124 DCI
.AddToWorklist(Vec
.getNode());
6126 if (EltVT
!= Elt
.getValueType()) {
6127 Elt
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), EltVT
, Elt
);
6128 DCI
.AddToWorklist(Elt
.getNode());
6130 Vec
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VecVT
, Vec
);
6131 DCI
.AddToWorklist(Vec
.getNode());
6132 Elt
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), EltVT
, Elt
);
6133 DCI
.AddToWorklist(Elt
.getNode());
6134 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(N
), VecVT
,
6139 // Push BSWAP into a vector shuffle if at least one side then simplifies.
6140 ShuffleVectorSDNode
*SV
= dyn_cast
<ShuffleVectorSDNode
>(Op
);
6141 if (SV
&& Op
.hasOneUse()) {
6142 SDValue Op0
= Op
.getOperand(0);
6143 SDValue Op1
= Op
.getOperand(1);
6145 if (DAG
.isConstantIntBuildVectorOrConstantInt(Op0
) ||
6146 Op0
.getOpcode() == ISD::BSWAP
|| Op0
.isUndef() ||
6147 DAG
.isConstantIntBuildVectorOrConstantInt(Op1
) ||
6148 Op1
.getOpcode() == ISD::BSWAP
|| Op1
.isUndef()) {
6149 EVT VecVT
= N
->getValueType(0);
6150 if (VecVT
!= Op0
.getValueType()) {
6151 Op0
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VecVT
, Op0
);
6152 DCI
.AddToWorklist(Op0
.getNode());
6154 if (VecVT
!= Op1
.getValueType()) {
6155 Op1
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VecVT
, Op1
);
6156 DCI
.AddToWorklist(Op1
.getNode());
6158 Op0
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VecVT
, Op0
);
6159 DCI
.AddToWorklist(Op0
.getNode());
6160 Op1
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VecVT
, Op1
);
6161 DCI
.AddToWorklist(Op1
.getNode());
6162 return DAG
.getVectorShuffle(VecVT
, SDLoc(N
), Op0
, Op1
, SV
->getMask());
6169 static bool combineCCMask(SDValue
&CCReg
, int &CCValid
, int &CCMask
) {
6170 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
6171 // set by the CCReg instruction using the CCValid / CCMask masks,
6172 // If the CCReg instruction is itself a ICMP testing the condition
6173 // code set by some other instruction, see whether we can directly
6174 // use that condition code.
6176 // Verify that we have an ICMP against some constant.
6177 if (CCValid
!= SystemZ::CCMASK_ICMP
)
6179 auto *ICmp
= CCReg
.getNode();
6180 if (ICmp
->getOpcode() != SystemZISD::ICMP
)
6182 auto *CompareLHS
= ICmp
->getOperand(0).getNode();
6183 auto *CompareRHS
= dyn_cast
<ConstantSDNode
>(ICmp
->getOperand(1));
6187 // Optimize the case where CompareLHS is a SELECT_CCMASK.
6188 if (CompareLHS
->getOpcode() == SystemZISD::SELECT_CCMASK
) {
6189 // Verify that we have an appropriate mask for a EQ or NE comparison.
6190 bool Invert
= false;
6191 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
6193 else if (CCMask
!= SystemZ::CCMASK_CMP_EQ
)
6196 // Verify that the ICMP compares against one of select values.
6197 auto *TrueVal
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(0));
6200 auto *FalseVal
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(1));
6203 if (CompareRHS
->getZExtValue() == FalseVal
->getZExtValue())
6205 else if (CompareRHS
->getZExtValue() != TrueVal
->getZExtValue())
6208 // Compute the effective CC mask for the new branch or select.
6209 auto *NewCCValid
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(2));
6210 auto *NewCCMask
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(3));
6211 if (!NewCCValid
|| !NewCCMask
)
6213 CCValid
= NewCCValid
->getZExtValue();
6214 CCMask
= NewCCMask
->getZExtValue();
6218 // Return the updated CCReg link.
6219 CCReg
= CompareLHS
->getOperand(4);
6223 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
6224 if (CompareLHS
->getOpcode() == ISD::SRA
) {
6225 auto *SRACount
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(1));
6226 if (!SRACount
|| SRACount
->getZExtValue() != 30)
6228 auto *SHL
= CompareLHS
->getOperand(0).getNode();
6229 if (SHL
->getOpcode() != ISD::SHL
)
6231 auto *SHLCount
= dyn_cast
<ConstantSDNode
>(SHL
->getOperand(1));
6232 if (!SHLCount
|| SHLCount
->getZExtValue() != 30 - SystemZ::IPM_CC
)
6234 auto *IPM
= SHL
->getOperand(0).getNode();
6235 if (IPM
->getOpcode() != SystemZISD::IPM
)
6238 // Avoid introducing CC spills (because SRA would clobber CC).
6239 if (!CompareLHS
->hasOneUse())
6241 // Verify that the ICMP compares against zero.
6242 if (CompareRHS
->getZExtValue() != 0)
6245 // Compute the effective CC mask for the new branch or select.
6247 case SystemZ::CCMASK_CMP_EQ
: break;
6248 case SystemZ::CCMASK_CMP_NE
: break;
6249 case SystemZ::CCMASK_CMP_LT
: CCMask
= SystemZ::CCMASK_CMP_GT
; break;
6250 case SystemZ::CCMASK_CMP_GT
: CCMask
= SystemZ::CCMASK_CMP_LT
; break;
6251 case SystemZ::CCMASK_CMP_LE
: CCMask
= SystemZ::CCMASK_CMP_GE
; break;
6252 case SystemZ::CCMASK_CMP_GE
: CCMask
= SystemZ::CCMASK_CMP_LE
; break;
6253 default: return false;
6256 // Return the updated CCReg link.
6257 CCReg
= IPM
->getOperand(0);
6264 SDValue
SystemZTargetLowering::combineBR_CCMASK(
6265 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6266 SelectionDAG
&DAG
= DCI
.DAG
;
6268 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
6269 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
6270 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
6271 if (!CCValid
|| !CCMask
)
6274 int CCValidVal
= CCValid
->getZExtValue();
6275 int CCMaskVal
= CCMask
->getZExtValue();
6276 SDValue Chain
= N
->getOperand(0);
6277 SDValue CCReg
= N
->getOperand(4);
6279 if (combineCCMask(CCReg
, CCValidVal
, CCMaskVal
))
6280 return DAG
.getNode(SystemZISD::BR_CCMASK
, SDLoc(N
), N
->getValueType(0),
6282 DAG
.getTargetConstant(CCValidVal
, SDLoc(N
), MVT::i32
),
6283 DAG
.getTargetConstant(CCMaskVal
, SDLoc(N
), MVT::i32
),
6284 N
->getOperand(3), CCReg
);
6288 SDValue
SystemZTargetLowering::combineSELECT_CCMASK(
6289 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6290 SelectionDAG
&DAG
= DCI
.DAG
;
6292 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
6293 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
6294 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3));
6295 if (!CCValid
|| !CCMask
)
6298 int CCValidVal
= CCValid
->getZExtValue();
6299 int CCMaskVal
= CCMask
->getZExtValue();
6300 SDValue CCReg
= N
->getOperand(4);
6302 if (combineCCMask(CCReg
, CCValidVal
, CCMaskVal
))
6303 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, SDLoc(N
), N
->getValueType(0),
6304 N
->getOperand(0), N
->getOperand(1),
6305 DAG
.getTargetConstant(CCValidVal
, SDLoc(N
), MVT::i32
),
6306 DAG
.getTargetConstant(CCMaskVal
, SDLoc(N
), MVT::i32
),
6312 SDValue
SystemZTargetLowering::combineGET_CCMASK(
6313 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6315 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
6316 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
6317 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
6318 if (!CCValid
|| !CCMask
)
6320 int CCValidVal
= CCValid
->getZExtValue();
6321 int CCMaskVal
= CCMask
->getZExtValue();
6323 SDValue Select
= N
->getOperand(0);
6324 if (Select
->getOpcode() != SystemZISD::SELECT_CCMASK
)
6327 auto *SelectCCValid
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(2));
6328 auto *SelectCCMask
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(3));
6329 if (!SelectCCValid
|| !SelectCCMask
)
6331 int SelectCCValidVal
= SelectCCValid
->getZExtValue();
6332 int SelectCCMaskVal
= SelectCCMask
->getZExtValue();
6334 auto *TrueVal
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(0));
6335 auto *FalseVal
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(1));
6336 if (!TrueVal
|| !FalseVal
)
6338 if (TrueVal
->getZExtValue() != 0 && FalseVal
->getZExtValue() == 0)
6340 else if (TrueVal
->getZExtValue() == 0 && FalseVal
->getZExtValue() != 0)
6341 SelectCCMaskVal
^= SelectCCValidVal
;
6345 if (SelectCCValidVal
& ~CCValidVal
)
6347 if (SelectCCMaskVal
!= (CCMaskVal
& SelectCCValidVal
))
6350 return Select
->getOperand(4);
6353 SDValue
SystemZTargetLowering::combineIntDIVREM(
6354 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6355 SelectionDAG
&DAG
= DCI
.DAG
;
6356 EVT VT
= N
->getValueType(0);
6357 // In the case where the divisor is a vector of constants a cheaper
6358 // sequence of instructions can replace the divide. BuildSDIV is called to
6359 // do this during DAG combining, but it only succeeds when it can build a
6360 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
6361 // since it is not Legal but Custom it can only happen before
6362 // legalization. Therefore we must scalarize this early before Combine
6363 // 1. For widened vectors, this is already the result of type legalization.
6364 if (DCI
.Level
== BeforeLegalizeTypes
&& VT
.isVector() && isTypeLegal(VT
) &&
6365 DAG
.isConstantIntBuildVectorOrConstantInt(N
->getOperand(1)))
6366 return DAG
.UnrollVectorOp(N
);
6370 SDValue
SystemZTargetLowering::unwrapAddress(SDValue N
) const {
6371 if (N
->getOpcode() == SystemZISD::PCREL_WRAPPER
)
6372 return N
->getOperand(0);
6376 SDValue
SystemZTargetLowering::PerformDAGCombine(SDNode
*N
,
6377 DAGCombinerInfo
&DCI
) const {
6378 switch(N
->getOpcode()) {
6380 case ISD::ZERO_EXTEND
: return combineZERO_EXTEND(N
, DCI
);
6381 case ISD::SIGN_EXTEND
: return combineSIGN_EXTEND(N
, DCI
);
6382 case ISD::SIGN_EXTEND_INREG
: return combineSIGN_EXTEND_INREG(N
, DCI
);
6383 case SystemZISD::MERGE_HIGH
:
6384 case SystemZISD::MERGE_LOW
: return combineMERGE(N
, DCI
);
6385 case ISD::LOAD
: return combineLOAD(N
, DCI
);
6386 case ISD::STORE
: return combineSTORE(N
, DCI
);
6387 case ISD::VECTOR_SHUFFLE
: return combineVECTOR_SHUFFLE(N
, DCI
);
6388 case ISD::EXTRACT_VECTOR_ELT
: return combineEXTRACT_VECTOR_ELT(N
, DCI
);
6389 case SystemZISD::JOIN_DWORDS
: return combineJOIN_DWORDS(N
, DCI
);
6390 case ISD::STRICT_FP_ROUND
:
6391 case ISD::FP_ROUND
: return combineFP_ROUND(N
, DCI
);
6392 case ISD::STRICT_FP_EXTEND
:
6393 case ISD::FP_EXTEND
: return combineFP_EXTEND(N
, DCI
);
6394 case ISD::BSWAP
: return combineBSWAP(N
, DCI
);
6395 case SystemZISD::BR_CCMASK
: return combineBR_CCMASK(N
, DCI
);
6396 case SystemZISD::SELECT_CCMASK
: return combineSELECT_CCMASK(N
, DCI
);
6397 case SystemZISD::GET_CCMASK
: return combineGET_CCMASK(N
, DCI
);
6401 case ISD::UREM
: return combineIntDIVREM(N
, DCI
);
6407 // Return the demanded elements for the OpNo source operand of Op. DemandedElts
6409 static APInt
getDemandedSrcElements(SDValue Op
, const APInt
&DemandedElts
,
6411 EVT VT
= Op
.getValueType();
6412 unsigned NumElts
= (VT
.isVector() ? VT
.getVectorNumElements() : 1);
6414 unsigned Opcode
= Op
.getOpcode();
6415 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
6416 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6418 case Intrinsic::s390_vpksh
: // PACKS
6419 case Intrinsic::s390_vpksf
:
6420 case Intrinsic::s390_vpksg
:
6421 case Intrinsic::s390_vpkshs
: // PACKS_CC
6422 case Intrinsic::s390_vpksfs
:
6423 case Intrinsic::s390_vpksgs
:
6424 case Intrinsic::s390_vpklsh
: // PACKLS
6425 case Intrinsic::s390_vpklsf
:
6426 case Intrinsic::s390_vpklsg
:
6427 case Intrinsic::s390_vpklshs
: // PACKLS_CC
6428 case Intrinsic::s390_vpklsfs
:
6429 case Intrinsic::s390_vpklsgs
:
6430 // VECTOR PACK truncates the elements of two source vectors into one.
6431 SrcDemE
= DemandedElts
;
6433 SrcDemE
.lshrInPlace(NumElts
/ 2);
6434 SrcDemE
= SrcDemE
.trunc(NumElts
/ 2);
6436 // VECTOR UNPACK extends half the elements of the source vector.
6437 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
6438 case Intrinsic::s390_vuphh
:
6439 case Intrinsic::s390_vuphf
:
6440 case Intrinsic::s390_vuplhb
: // VECTOR UNPACK LOGICAL HIGH
6441 case Intrinsic::s390_vuplhh
:
6442 case Intrinsic::s390_vuplhf
:
6443 SrcDemE
= APInt(NumElts
* 2, 0);
6444 SrcDemE
.insertBits(DemandedElts
, 0);
6446 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
6447 case Intrinsic::s390_vuplhw
:
6448 case Intrinsic::s390_vuplf
:
6449 case Intrinsic::s390_vupllb
: // VECTOR UNPACK LOGICAL LOW
6450 case Intrinsic::s390_vupllh
:
6451 case Intrinsic::s390_vupllf
:
6452 SrcDemE
= APInt(NumElts
* 2, 0);
6453 SrcDemE
.insertBits(DemandedElts
, NumElts
);
6455 case Intrinsic::s390_vpdi
: {
6456 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
6457 SrcDemE
= APInt(NumElts
, 0);
6458 if (!DemandedElts
[OpNo
- 1])
6460 unsigned Mask
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
6461 unsigned MaskBit
= ((OpNo
- 1) ? 1 : 4);
6462 // Demand input element 0 or 1, given by the mask bit value.
6463 SrcDemE
.setBit((Mask
& MaskBit
)? 1 : 0);
6466 case Intrinsic::s390_vsldb
: {
6467 // VECTOR SHIFT LEFT DOUBLE BY BYTE
6468 assert(VT
== MVT::v16i8
&& "Unexpected type.");
6469 unsigned FirstIdx
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
6470 assert (FirstIdx
> 0 && FirstIdx
< 16 && "Unused operand.");
6471 unsigned NumSrc0Els
= 16 - FirstIdx
;
6472 SrcDemE
= APInt(NumElts
, 0);
6474 APInt DemEls
= DemandedElts
.trunc(NumSrc0Els
);
6475 SrcDemE
.insertBits(DemEls
, FirstIdx
);
6477 APInt DemEls
= DemandedElts
.lshr(NumSrc0Els
);
6478 SrcDemE
.insertBits(DemEls
, 0);
6482 case Intrinsic::s390_vperm
:
6483 SrcDemE
= APInt(NumElts
, 1);
6486 llvm_unreachable("Unhandled intrinsic.");
6491 case SystemZISD::JOIN_DWORDS
:
6493 SrcDemE
= APInt(1, 1);
6495 case SystemZISD::SELECT_CCMASK
:
6496 SrcDemE
= DemandedElts
;
6499 llvm_unreachable("Unhandled opcode.");
6506 static void computeKnownBitsBinOp(const SDValue Op
, KnownBits
&Known
,
6507 const APInt
&DemandedElts
,
6508 const SelectionDAG
&DAG
, unsigned Depth
,
6510 APInt Src0DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
);
6511 APInt Src1DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
+ 1);
6512 KnownBits LHSKnown
=
6513 DAG
.computeKnownBits(Op
.getOperand(OpNo
), Src0DemE
, Depth
+ 1);
6514 KnownBits RHSKnown
=
6515 DAG
.computeKnownBits(Op
.getOperand(OpNo
+ 1), Src1DemE
, Depth
+ 1);
6516 Known
.Zero
= LHSKnown
.Zero
& RHSKnown
.Zero
;
6517 Known
.One
= LHSKnown
.One
& RHSKnown
.One
;
6521 SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
6523 const APInt
&DemandedElts
,
6524 const SelectionDAG
&DAG
,
6525 unsigned Depth
) const {
6528 // Intrinsic CC result is returned in the two low bits.
6529 unsigned tmp0
, tmp1
; // not used
6530 if (Op
.getResNo() == 1 && isIntrinsicWithCC(Op
, tmp0
, tmp1
)) {
6531 Known
.Zero
.setBitsFrom(2);
6534 EVT VT
= Op
.getValueType();
6535 if (Op
.getResNo() != 0 || VT
== MVT::Untyped
)
6537 assert (Known
.getBitWidth() == VT
.getScalarSizeInBits() &&
6538 "KnownBits does not match VT in bitwidth");
6539 assert ((!VT
.isVector() ||
6540 (DemandedElts
.getBitWidth() == VT
.getVectorNumElements())) &&
6541 "DemandedElts does not match VT number of elements");
6542 unsigned BitWidth
= Known
.getBitWidth();
6543 unsigned Opcode
= Op
.getOpcode();
6544 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
6545 bool IsLogical
= false;
6546 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6548 case Intrinsic::s390_vpksh
: // PACKS
6549 case Intrinsic::s390_vpksf
:
6550 case Intrinsic::s390_vpksg
:
6551 case Intrinsic::s390_vpkshs
: // PACKS_CC
6552 case Intrinsic::s390_vpksfs
:
6553 case Intrinsic::s390_vpksgs
:
6554 case Intrinsic::s390_vpklsh
: // PACKLS
6555 case Intrinsic::s390_vpklsf
:
6556 case Intrinsic::s390_vpklsg
:
6557 case Intrinsic::s390_vpklshs
: // PACKLS_CC
6558 case Intrinsic::s390_vpklsfs
:
6559 case Intrinsic::s390_vpklsgs
:
6560 case Intrinsic::s390_vpdi
:
6561 case Intrinsic::s390_vsldb
:
6562 case Intrinsic::s390_vperm
:
6563 computeKnownBitsBinOp(Op
, Known
, DemandedElts
, DAG
, Depth
, 1);
6565 case Intrinsic::s390_vuplhb
: // VECTOR UNPACK LOGICAL HIGH
6566 case Intrinsic::s390_vuplhh
:
6567 case Intrinsic::s390_vuplhf
:
6568 case Intrinsic::s390_vupllb
: // VECTOR UNPACK LOGICAL LOW
6569 case Intrinsic::s390_vupllh
:
6570 case Intrinsic::s390_vupllf
:
6573 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
6574 case Intrinsic::s390_vuphh
:
6575 case Intrinsic::s390_vuphf
:
6576 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
6577 case Intrinsic::s390_vuplhw
:
6578 case Intrinsic::s390_vuplf
: {
6579 SDValue SrcOp
= Op
.getOperand(1);
6580 APInt SrcDemE
= getDemandedSrcElements(Op
, DemandedElts
, 0);
6581 Known
= DAG
.computeKnownBits(SrcOp
, SrcDemE
, Depth
+ 1);
6583 Known
= Known
.zext(BitWidth
, true);
6585 Known
= Known
.sext(BitWidth
);
6593 case SystemZISD::JOIN_DWORDS
:
6594 case SystemZISD::SELECT_CCMASK
:
6595 computeKnownBitsBinOp(Op
, Known
, DemandedElts
, DAG
, Depth
, 0);
6597 case SystemZISD::REPLICATE
: {
6598 SDValue SrcOp
= Op
.getOperand(0);
6599 Known
= DAG
.computeKnownBits(SrcOp
, Depth
+ 1);
6600 if (Known
.getBitWidth() < BitWidth
&& isa
<ConstantSDNode
>(SrcOp
))
6601 Known
= Known
.sext(BitWidth
); // VREPI sign extends the immedate.
6609 // Known has the width of the source operand(s). Adjust if needed to match
6610 // the passed bitwidth.
6611 if (Known
.getBitWidth() != BitWidth
)
6612 Known
= Known
.zextOrTrunc(BitWidth
, false);
6615 static unsigned computeNumSignBitsBinOp(SDValue Op
, const APInt
&DemandedElts
,
6616 const SelectionDAG
&DAG
, unsigned Depth
,
6618 APInt Src0DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
);
6619 unsigned LHS
= DAG
.ComputeNumSignBits(Op
.getOperand(OpNo
), Src0DemE
, Depth
+ 1);
6620 if (LHS
== 1) return 1; // Early out.
6621 APInt Src1DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
+ 1);
6622 unsigned RHS
= DAG
.ComputeNumSignBits(Op
.getOperand(OpNo
+ 1), Src1DemE
, Depth
+ 1);
6623 if (RHS
== 1) return 1; // Early out.
6624 unsigned Common
= std::min(LHS
, RHS
);
6625 unsigned SrcBitWidth
= Op
.getOperand(OpNo
).getScalarValueSizeInBits();
6626 EVT VT
= Op
.getValueType();
6627 unsigned VTBits
= VT
.getScalarSizeInBits();
6628 if (SrcBitWidth
> VTBits
) { // PACK
6629 unsigned SrcExtraBits
= SrcBitWidth
- VTBits
;
6630 if (Common
> SrcExtraBits
)
6631 return (Common
- SrcExtraBits
);
6634 assert (SrcBitWidth
== VTBits
&& "Expected operands of same bitwidth.");
6639 SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
6640 SDValue Op
, const APInt
&DemandedElts
, const SelectionDAG
&DAG
,
6641 unsigned Depth
) const {
6642 if (Op
.getResNo() != 0)
6644 unsigned Opcode
= Op
.getOpcode();
6645 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
6646 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6648 case Intrinsic::s390_vpksh
: // PACKS
6649 case Intrinsic::s390_vpksf
:
6650 case Intrinsic::s390_vpksg
:
6651 case Intrinsic::s390_vpkshs
: // PACKS_CC
6652 case Intrinsic::s390_vpksfs
:
6653 case Intrinsic::s390_vpksgs
:
6654 case Intrinsic::s390_vpklsh
: // PACKLS
6655 case Intrinsic::s390_vpklsf
:
6656 case Intrinsic::s390_vpklsg
:
6657 case Intrinsic::s390_vpklshs
: // PACKLS_CC
6658 case Intrinsic::s390_vpklsfs
:
6659 case Intrinsic::s390_vpklsgs
:
6660 case Intrinsic::s390_vpdi
:
6661 case Intrinsic::s390_vsldb
:
6662 case Intrinsic::s390_vperm
:
6663 return computeNumSignBitsBinOp(Op
, DemandedElts
, DAG
, Depth
, 1);
6664 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
6665 case Intrinsic::s390_vuphh
:
6666 case Intrinsic::s390_vuphf
:
6667 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
6668 case Intrinsic::s390_vuplhw
:
6669 case Intrinsic::s390_vuplf
: {
6670 SDValue PackedOp
= Op
.getOperand(1);
6671 APInt SrcDemE
= getDemandedSrcElements(Op
, DemandedElts
, 1);
6672 unsigned Tmp
= DAG
.ComputeNumSignBits(PackedOp
, SrcDemE
, Depth
+ 1);
6673 EVT VT
= Op
.getValueType();
6674 unsigned VTBits
= VT
.getScalarSizeInBits();
6675 Tmp
+= VTBits
- PackedOp
.getScalarValueSizeInBits();
6683 case SystemZISD::SELECT_CCMASK
:
6684 return computeNumSignBitsBinOp(Op
, DemandedElts
, DAG
, Depth
, 0);
6693 //===----------------------------------------------------------------------===//
6695 //===----------------------------------------------------------------------===//
6697 // Create a new basic block after MBB.
6698 static MachineBasicBlock
*emitBlockAfter(MachineBasicBlock
*MBB
) {
6699 MachineFunction
&MF
= *MBB
->getParent();
6700 MachineBasicBlock
*NewMBB
= MF
.CreateMachineBasicBlock(MBB
->getBasicBlock());
6701 MF
.insert(std::next(MachineFunction::iterator(MBB
)), NewMBB
);
6705 // Split MBB after MI and return the new block (the one that contains
6706 // instructions after MI).
6707 static MachineBasicBlock
*splitBlockAfter(MachineBasicBlock::iterator MI
,
6708 MachineBasicBlock
*MBB
) {
6709 MachineBasicBlock
*NewMBB
= emitBlockAfter(MBB
);
6710 NewMBB
->splice(NewMBB
->begin(), MBB
,
6711 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
6712 NewMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
6716 // Split MBB before MI and return the new block (the one that contains MI).
6717 static MachineBasicBlock
*splitBlockBefore(MachineBasicBlock::iterator MI
,
6718 MachineBasicBlock
*MBB
) {
6719 MachineBasicBlock
*NewMBB
= emitBlockAfter(MBB
);
6720 NewMBB
->splice(NewMBB
->begin(), MBB
, MI
, MBB
->end());
6721 NewMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
6725 // Force base value Base into a register before MI. Return the register.
6726 static Register
forceReg(MachineInstr
&MI
, MachineOperand
&Base
,
6727 const SystemZInstrInfo
*TII
) {
6729 return Base
.getReg();
6731 MachineBasicBlock
*MBB
= MI
.getParent();
6732 MachineFunction
&MF
= *MBB
->getParent();
6733 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6735 Register Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
6736 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LA
), Reg
)
6743 // The CC operand of MI might be missing a kill marker because there
6744 // were multiple uses of CC, and ISel didn't know which to mark.
6745 // Figure out whether MI should have had a kill marker.
6746 static bool checkCCKill(MachineInstr
&MI
, MachineBasicBlock
*MBB
) {
6747 // Scan forward through BB for a use/def of CC.
6748 MachineBasicBlock::iterator
miI(std::next(MachineBasicBlock::iterator(MI
)));
6749 for (MachineBasicBlock::iterator miE
= MBB
->end(); miI
!= miE
; ++miI
) {
6750 const MachineInstr
& mi
= *miI
;
6751 if (mi
.readsRegister(SystemZ::CC
))
6753 if (mi
.definesRegister(SystemZ::CC
))
6754 break; // Should have kill-flag - update below.
6757 // If we hit the end of the block, check whether CC is live into a
6759 if (miI
== MBB
->end()) {
6760 for (auto SI
= MBB
->succ_begin(), SE
= MBB
->succ_end(); SI
!= SE
; ++SI
)
6761 if ((*SI
)->isLiveIn(SystemZ::CC
))
6768 // Return true if it is OK for this Select pseudo-opcode to be cascaded
6769 // together with other Select pseudo-opcodes into a single basic-block with
6770 // a conditional jump around it.
6771 static bool isSelectPseudo(MachineInstr
&MI
) {
6772 switch (MI
.getOpcode()) {
6773 case SystemZ::Select32
:
6774 case SystemZ::Select64
:
6775 case SystemZ::SelectF32
:
6776 case SystemZ::SelectF64
:
6777 case SystemZ::SelectF128
:
6778 case SystemZ::SelectVR32
:
6779 case SystemZ::SelectVR64
:
6780 case SystemZ::SelectVR128
:
6788 // Helper function, which inserts PHI functions into SinkMBB:
6789 // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
6790 // where %FalseValue(i) and %TrueValue(i) are taken from Selects.
6791 static void createPHIsForSelects(SmallVector
<MachineInstr
*, 8> &Selects
,
6792 MachineBasicBlock
*TrueMBB
,
6793 MachineBasicBlock
*FalseMBB
,
6794 MachineBasicBlock
*SinkMBB
) {
6795 MachineFunction
*MF
= TrueMBB
->getParent();
6796 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
6798 MachineInstr
*FirstMI
= Selects
.front();
6799 unsigned CCValid
= FirstMI
->getOperand(3).getImm();
6800 unsigned CCMask
= FirstMI
->getOperand(4).getImm();
6802 MachineBasicBlock::iterator SinkInsertionPoint
= SinkMBB
->begin();
6804 // As we are creating the PHIs, we have to be careful if there is more than
6805 // one. Later Selects may reference the results of earlier Selects, but later
6806 // PHIs have to reference the individual true/false inputs from earlier PHIs.
6807 // That also means that PHI construction must work forward from earlier to
6808 // later, and that the code must maintain a mapping from earlier PHI's
6809 // destination registers, and the registers that went into the PHI.
6810 DenseMap
<unsigned, std::pair
<unsigned, unsigned>> RegRewriteTable
;
6812 for (auto MI
: Selects
) {
6813 Register DestReg
= MI
->getOperand(0).getReg();
6814 Register TrueReg
= MI
->getOperand(1).getReg();
6815 Register FalseReg
= MI
->getOperand(2).getReg();
6817 // If this Select we are generating is the opposite condition from
6818 // the jump we generated, then we have to swap the operands for the
6819 // PHI that is going to be generated.
6820 if (MI
->getOperand(4).getImm() == (CCValid
^ CCMask
))
6821 std::swap(TrueReg
, FalseReg
);
6823 if (RegRewriteTable
.find(TrueReg
) != RegRewriteTable
.end())
6824 TrueReg
= RegRewriteTable
[TrueReg
].first
;
6826 if (RegRewriteTable
.find(FalseReg
) != RegRewriteTable
.end())
6827 FalseReg
= RegRewriteTable
[FalseReg
].second
;
6829 DebugLoc DL
= MI
->getDebugLoc();
6830 BuildMI(*SinkMBB
, SinkInsertionPoint
, DL
, TII
->get(SystemZ::PHI
), DestReg
)
6831 .addReg(TrueReg
).addMBB(TrueMBB
)
6832 .addReg(FalseReg
).addMBB(FalseMBB
);
6834 // Add this PHI to the rewrite table.
6835 RegRewriteTable
[DestReg
] = std::make_pair(TrueReg
, FalseReg
);
6838 MF
->getProperties().reset(MachineFunctionProperties::Property::NoPHIs
);
6841 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
6843 SystemZTargetLowering::emitSelect(MachineInstr
&MI
,
6844 MachineBasicBlock
*MBB
) const {
6845 assert(isSelectPseudo(MI
) && "Bad call to emitSelect()");
6846 const SystemZInstrInfo
*TII
=
6847 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6849 unsigned CCValid
= MI
.getOperand(3).getImm();
6850 unsigned CCMask
= MI
.getOperand(4).getImm();
6852 // If we have a sequence of Select* pseudo instructions using the
6853 // same condition code value, we want to expand all of them into
6854 // a single pair of basic blocks using the same condition.
6855 SmallVector
<MachineInstr
*, 8> Selects
;
6856 SmallVector
<MachineInstr
*, 8> DbgValues
;
6857 Selects
.push_back(&MI
);
6859 for (MachineBasicBlock::iterator NextMIIt
=
6860 std::next(MachineBasicBlock::iterator(MI
));
6861 NextMIIt
!= MBB
->end(); ++NextMIIt
) {
6862 if (NextMIIt
->definesRegister(SystemZ::CC
))
6864 if (isSelectPseudo(*NextMIIt
)) {
6865 assert(NextMIIt
->getOperand(3).getImm() == CCValid
&&
6866 "Bad CCValid operands since CC was not redefined.");
6867 if (NextMIIt
->getOperand(4).getImm() == CCMask
||
6868 NextMIIt
->getOperand(4).getImm() == (CCValid
^ CCMask
)) {
6869 Selects
.push_back(&*NextMIIt
);
6875 for (auto SelMI
: Selects
)
6876 if (NextMIIt
->readsVirtualRegister(SelMI
->getOperand(0).getReg())) {
6880 if (NextMIIt
->isDebugInstr()) {
6882 assert(NextMIIt
->isDebugValue() && "Unhandled debug opcode.");
6883 DbgValues
.push_back(&*NextMIIt
);
6886 else if (User
|| ++Count
> 20)
6890 MachineInstr
*LastMI
= Selects
.back();
6892 (LastMI
->killsRegister(SystemZ::CC
) || checkCCKill(*LastMI
, MBB
));
6893 MachineBasicBlock
*StartMBB
= MBB
;
6894 MachineBasicBlock
*JoinMBB
= splitBlockAfter(LastMI
, MBB
);
6895 MachineBasicBlock
*FalseMBB
= emitBlockAfter(StartMBB
);
6897 // Unless CC was killed in the last Select instruction, mark it as
6898 // live-in to both FalseMBB and JoinMBB.
6900 FalseMBB
->addLiveIn(SystemZ::CC
);
6901 JoinMBB
->addLiveIn(SystemZ::CC
);
6905 // BRC CCMask, JoinMBB
6906 // # fallthrough to FalseMBB
6908 BuildMI(MBB
, MI
.getDebugLoc(), TII
->get(SystemZ::BRC
))
6909 .addImm(CCValid
).addImm(CCMask
).addMBB(JoinMBB
);
6910 MBB
->addSuccessor(JoinMBB
);
6911 MBB
->addSuccessor(FalseMBB
);
6914 // # fallthrough to JoinMBB
6916 MBB
->addSuccessor(JoinMBB
);
6919 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
6922 createPHIsForSelects(Selects
, StartMBB
, FalseMBB
, MBB
);
6923 for (auto SelMI
: Selects
)
6924 SelMI
->eraseFromParent();
6926 MachineBasicBlock::iterator InsertPos
= MBB
->getFirstNonPHI();
6927 for (auto DbgMI
: DbgValues
)
6928 MBB
->splice(InsertPos
, StartMBB
, DbgMI
);
6933 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
6934 // StoreOpcode is the store to use and Invert says whether the store should
6935 // happen when the condition is false rather than true. If a STORE ON
6936 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
6937 MachineBasicBlock
*SystemZTargetLowering::emitCondStore(MachineInstr
&MI
,
6938 MachineBasicBlock
*MBB
,
6939 unsigned StoreOpcode
,
6940 unsigned STOCOpcode
,
6941 bool Invert
) const {
6942 const SystemZInstrInfo
*TII
=
6943 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6945 Register SrcReg
= MI
.getOperand(0).getReg();
6946 MachineOperand Base
= MI
.getOperand(1);
6947 int64_t Disp
= MI
.getOperand(2).getImm();
6948 Register IndexReg
= MI
.getOperand(3).getReg();
6949 unsigned CCValid
= MI
.getOperand(4).getImm();
6950 unsigned CCMask
= MI
.getOperand(5).getImm();
6951 DebugLoc DL
= MI
.getDebugLoc();
6953 StoreOpcode
= TII
->getOpcodeForOffset(StoreOpcode
, Disp
);
6955 // Use STOCOpcode if possible. We could use different store patterns in
6956 // order to avoid matching the index register, but the performance trade-offs
6957 // might be more complicated in that case.
6958 if (STOCOpcode
&& !IndexReg
&& Subtarget
.hasLoadStoreOnCond()) {
6962 // ISel pattern matching also adds a load memory operand of the same
6963 // address, so take special care to find the storing memory operand.
6964 MachineMemOperand
*MMO
= nullptr;
6965 for (auto *I
: MI
.memoperands())
6971 BuildMI(*MBB
, MI
, DL
, TII
->get(STOCOpcode
))
6977 .addMemOperand(MMO
);
6979 MI
.eraseFromParent();
6983 // Get the condition needed to branch around the store.
6987 MachineBasicBlock
*StartMBB
= MBB
;
6988 MachineBasicBlock
*JoinMBB
= splitBlockBefore(MI
, MBB
);
6989 MachineBasicBlock
*FalseMBB
= emitBlockAfter(StartMBB
);
6991 // Unless CC was killed in the CondStore instruction, mark it as
6992 // live-in to both FalseMBB and JoinMBB.
6993 if (!MI
.killsRegister(SystemZ::CC
) && !checkCCKill(MI
, JoinMBB
)) {
6994 FalseMBB
->addLiveIn(SystemZ::CC
);
6995 JoinMBB
->addLiveIn(SystemZ::CC
);
6999 // BRC CCMask, JoinMBB
7000 // # fallthrough to FalseMBB
7002 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7003 .addImm(CCValid
).addImm(CCMask
).addMBB(JoinMBB
);
7004 MBB
->addSuccessor(JoinMBB
);
7005 MBB
->addSuccessor(FalseMBB
);
7008 // store %SrcReg, %Disp(%Index,%Base)
7009 // # fallthrough to JoinMBB
7011 BuildMI(MBB
, DL
, TII
->get(StoreOpcode
))
7016 MBB
->addSuccessor(JoinMBB
);
7018 MI
.eraseFromParent();
7022 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
7023 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
7024 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
7025 // BitSize is the width of the field in bits, or 0 if this is a partword
7026 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
7027 // is one of the operands. Invert says whether the field should be
7028 // inverted after performing BinOpcode (e.g. for NAND).
7029 MachineBasicBlock
*SystemZTargetLowering::emitAtomicLoadBinary(
7030 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned BinOpcode
,
7031 unsigned BitSize
, bool Invert
) const {
7032 MachineFunction
&MF
= *MBB
->getParent();
7033 const SystemZInstrInfo
*TII
=
7034 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7035 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7036 bool IsSubWord
= (BitSize
< 32);
7038 // Extract the operands. Base can be a register or a frame index.
7039 // Src2 can be a register or immediate.
7040 Register Dest
= MI
.getOperand(0).getReg();
7041 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
7042 int64_t Disp
= MI
.getOperand(2).getImm();
7043 MachineOperand Src2
= earlyUseOperand(MI
.getOperand(3));
7044 Register BitShift
= IsSubWord
? MI
.getOperand(4).getReg() : Register();
7045 Register NegBitShift
= IsSubWord
? MI
.getOperand(5).getReg() : Register();
7046 DebugLoc DL
= MI
.getDebugLoc();
7048 BitSize
= MI
.getOperand(6).getImm();
7050 // Subword operations use 32-bit registers.
7051 const TargetRegisterClass
*RC
= (BitSize
<= 32 ?
7052 &SystemZ::GR32BitRegClass
:
7053 &SystemZ::GR64BitRegClass
);
7054 unsigned LOpcode
= BitSize
<= 32 ? SystemZ::L
: SystemZ::LG
;
7055 unsigned CSOpcode
= BitSize
<= 32 ? SystemZ::CS
: SystemZ::CSG
;
7057 // Get the right opcodes for the displacement.
7058 LOpcode
= TII
->getOpcodeForOffset(LOpcode
, Disp
);
7059 CSOpcode
= TII
->getOpcodeForOffset(CSOpcode
, Disp
);
7060 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
7062 // Create virtual registers for temporary results.
7063 Register OrigVal
= MRI
.createVirtualRegister(RC
);
7064 Register OldVal
= MRI
.createVirtualRegister(RC
);
7065 Register NewVal
= (BinOpcode
|| IsSubWord
?
7066 MRI
.createVirtualRegister(RC
) : Src2
.getReg());
7067 Register RotatedOldVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : OldVal
);
7068 Register RotatedNewVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : NewVal
);
7070 // Insert a basic block for the main loop.
7071 MachineBasicBlock
*StartMBB
= MBB
;
7072 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7073 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7077 // %OrigVal = L Disp(%Base)
7078 // # fall through to LoopMMB
7080 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigVal
).add(Base
).addImm(Disp
).addReg(0);
7081 MBB
->addSuccessor(LoopMBB
);
7084 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
7085 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
7086 // %RotatedNewVal = OP %RotatedOldVal, %Src2
7087 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
7088 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
7090 // # fall through to DoneMMB
7092 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
7093 .addReg(OrigVal
).addMBB(StartMBB
)
7094 .addReg(Dest
).addMBB(LoopMBB
);
7096 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), RotatedOldVal
)
7097 .addReg(OldVal
).addReg(BitShift
).addImm(0);
7099 // Perform the operation normally and then invert every bit of the field.
7100 Register Tmp
= MRI
.createVirtualRegister(RC
);
7101 BuildMI(MBB
, DL
, TII
->get(BinOpcode
), Tmp
).addReg(RotatedOldVal
).add(Src2
);
7103 // XILF with the upper BitSize bits set.
7104 BuildMI(MBB
, DL
, TII
->get(SystemZ::XILF
), RotatedNewVal
)
7105 .addReg(Tmp
).addImm(-1U << (32 - BitSize
));
7107 // Use LCGR and add -1 to the result, which is more compact than
7108 // an XILF, XILH pair.
7109 Register Tmp2
= MRI
.createVirtualRegister(RC
);
7110 BuildMI(MBB
, DL
, TII
->get(SystemZ::LCGR
), Tmp2
).addReg(Tmp
);
7111 BuildMI(MBB
, DL
, TII
->get(SystemZ::AGHI
), RotatedNewVal
)
7112 .addReg(Tmp2
).addImm(-1);
7114 } else if (BinOpcode
)
7115 // A simply binary operation.
7116 BuildMI(MBB
, DL
, TII
->get(BinOpcode
), RotatedNewVal
)
7117 .addReg(RotatedOldVal
)
7120 // Use RISBG to rotate Src2 into position and use it to replace the
7121 // field in RotatedOldVal.
7122 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RotatedNewVal
)
7123 .addReg(RotatedOldVal
).addReg(Src2
.getReg())
7124 .addImm(32).addImm(31 + BitSize
).addImm(32 - BitSize
);
7126 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), NewVal
)
7127 .addReg(RotatedNewVal
).addReg(NegBitShift
).addImm(0);
7128 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), Dest
)
7133 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7134 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
7135 MBB
->addSuccessor(LoopMBB
);
7136 MBB
->addSuccessor(DoneMBB
);
7138 MI
.eraseFromParent();
7142 // Implement EmitInstrWithCustomInserter for pseudo
7143 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
7144 // instruction that should be used to compare the current field with the
7145 // minimum or maximum value. KeepOldMask is the BRC condition-code mask
7146 // for when the current field should be kept. BitSize is the width of
7147 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
7148 MachineBasicBlock
*SystemZTargetLowering::emitAtomicLoadMinMax(
7149 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned CompareOpcode
,
7150 unsigned KeepOldMask
, unsigned BitSize
) const {
7151 MachineFunction
&MF
= *MBB
->getParent();
7152 const SystemZInstrInfo
*TII
=
7153 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7154 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7155 bool IsSubWord
= (BitSize
< 32);
7157 // Extract the operands. Base can be a register or a frame index.
7158 Register Dest
= MI
.getOperand(0).getReg();
7159 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
7160 int64_t Disp
= MI
.getOperand(2).getImm();
7161 Register Src2
= MI
.getOperand(3).getReg();
7162 Register BitShift
= (IsSubWord
? MI
.getOperand(4).getReg() : Register());
7163 Register NegBitShift
= (IsSubWord
? MI
.getOperand(5).getReg() : Register());
7164 DebugLoc DL
= MI
.getDebugLoc();
7166 BitSize
= MI
.getOperand(6).getImm();
7168 // Subword operations use 32-bit registers.
7169 const TargetRegisterClass
*RC
= (BitSize
<= 32 ?
7170 &SystemZ::GR32BitRegClass
:
7171 &SystemZ::GR64BitRegClass
);
7172 unsigned LOpcode
= BitSize
<= 32 ? SystemZ::L
: SystemZ::LG
;
7173 unsigned CSOpcode
= BitSize
<= 32 ? SystemZ::CS
: SystemZ::CSG
;
7175 // Get the right opcodes for the displacement.
7176 LOpcode
= TII
->getOpcodeForOffset(LOpcode
, Disp
);
7177 CSOpcode
= TII
->getOpcodeForOffset(CSOpcode
, Disp
);
7178 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
7180 // Create virtual registers for temporary results.
7181 Register OrigVal
= MRI
.createVirtualRegister(RC
);
7182 Register OldVal
= MRI
.createVirtualRegister(RC
);
7183 Register NewVal
= MRI
.createVirtualRegister(RC
);
7184 Register RotatedOldVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : OldVal
);
7185 Register RotatedAltVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : Src2
);
7186 Register RotatedNewVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : NewVal
);
7188 // Insert 3 basic blocks for the loop.
7189 MachineBasicBlock
*StartMBB
= MBB
;
7190 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7191 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7192 MachineBasicBlock
*UseAltMBB
= emitBlockAfter(LoopMBB
);
7193 MachineBasicBlock
*UpdateMBB
= emitBlockAfter(UseAltMBB
);
7197 // %OrigVal = L Disp(%Base)
7198 // # fall through to LoopMMB
7200 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigVal
).add(Base
).addImm(Disp
).addReg(0);
7201 MBB
->addSuccessor(LoopMBB
);
7204 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
7205 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
7206 // CompareOpcode %RotatedOldVal, %Src2
7207 // BRC KeepOldMask, UpdateMBB
7209 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
7210 .addReg(OrigVal
).addMBB(StartMBB
)
7211 .addReg(Dest
).addMBB(UpdateMBB
);
7213 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), RotatedOldVal
)
7214 .addReg(OldVal
).addReg(BitShift
).addImm(0);
7215 BuildMI(MBB
, DL
, TII
->get(CompareOpcode
))
7216 .addReg(RotatedOldVal
).addReg(Src2
);
7217 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7218 .addImm(SystemZ::CCMASK_ICMP
).addImm(KeepOldMask
).addMBB(UpdateMBB
);
7219 MBB
->addSuccessor(UpdateMBB
);
7220 MBB
->addSuccessor(UseAltMBB
);
7223 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
7224 // # fall through to UpdateMMB
7227 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RotatedAltVal
)
7228 .addReg(RotatedOldVal
).addReg(Src2
)
7229 .addImm(32).addImm(31 + BitSize
).addImm(0);
7230 MBB
->addSuccessor(UpdateMBB
);
7233 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
7234 // [ %RotatedAltVal, UseAltMBB ]
7235 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
7236 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
7238 // # fall through to DoneMMB
7240 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), RotatedNewVal
)
7241 .addReg(RotatedOldVal
).addMBB(LoopMBB
)
7242 .addReg(RotatedAltVal
).addMBB(UseAltMBB
);
7244 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), NewVal
)
7245 .addReg(RotatedNewVal
).addReg(NegBitShift
).addImm(0);
7246 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), Dest
)
7251 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7252 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
7253 MBB
->addSuccessor(LoopMBB
);
7254 MBB
->addSuccessor(DoneMBB
);
7256 MI
.eraseFromParent();
7260 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
7263 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr
&MI
,
7264 MachineBasicBlock
*MBB
) const {
7266 MachineFunction
&MF
= *MBB
->getParent();
7267 const SystemZInstrInfo
*TII
=
7268 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7269 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7271 // Extract the operands. Base can be a register or a frame index.
7272 Register Dest
= MI
.getOperand(0).getReg();
7273 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
7274 int64_t Disp
= MI
.getOperand(2).getImm();
7275 Register OrigCmpVal
= MI
.getOperand(3).getReg();
7276 Register OrigSwapVal
= MI
.getOperand(4).getReg();
7277 Register BitShift
= MI
.getOperand(5).getReg();
7278 Register NegBitShift
= MI
.getOperand(6).getReg();
7279 int64_t BitSize
= MI
.getOperand(7).getImm();
7280 DebugLoc DL
= MI
.getDebugLoc();
7282 const TargetRegisterClass
*RC
= &SystemZ::GR32BitRegClass
;
7284 // Get the right opcodes for the displacement.
7285 unsigned LOpcode
= TII
->getOpcodeForOffset(SystemZ::L
, Disp
);
7286 unsigned CSOpcode
= TII
->getOpcodeForOffset(SystemZ::CS
, Disp
);
7287 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
7289 // Create virtual registers for temporary results.
7290 Register OrigOldVal
= MRI
.createVirtualRegister(RC
);
7291 Register OldVal
= MRI
.createVirtualRegister(RC
);
7292 Register CmpVal
= MRI
.createVirtualRegister(RC
);
7293 Register SwapVal
= MRI
.createVirtualRegister(RC
);
7294 Register StoreVal
= MRI
.createVirtualRegister(RC
);
7295 Register RetryOldVal
= MRI
.createVirtualRegister(RC
);
7296 Register RetryCmpVal
= MRI
.createVirtualRegister(RC
);
7297 Register RetrySwapVal
= MRI
.createVirtualRegister(RC
);
7299 // Insert 2 basic blocks for the loop.
7300 MachineBasicBlock
*StartMBB
= MBB
;
7301 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7302 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7303 MachineBasicBlock
*SetMBB
= emitBlockAfter(LoopMBB
);
7307 // %OrigOldVal = L Disp(%Base)
7308 // # fall through to LoopMMB
7310 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigOldVal
)
7314 MBB
->addSuccessor(LoopMBB
);
7317 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
7318 // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
7319 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
7320 // %Dest = RLL %OldVal, BitSize(%BitShift)
7321 // ^^ The low BitSize bits contain the field
7323 // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
7324 // ^^ Replace the upper 32-BitSize bits of the
7325 // comparison value with those that we loaded,
7326 // so that we can use a full word comparison.
7327 // CR %Dest, %RetryCmpVal
7329 // # Fall through to SetMBB
7331 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
7332 .addReg(OrigOldVal
).addMBB(StartMBB
)
7333 .addReg(RetryOldVal
).addMBB(SetMBB
);
7334 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), CmpVal
)
7335 .addReg(OrigCmpVal
).addMBB(StartMBB
)
7336 .addReg(RetryCmpVal
).addMBB(SetMBB
);
7337 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), SwapVal
)
7338 .addReg(OrigSwapVal
).addMBB(StartMBB
)
7339 .addReg(RetrySwapVal
).addMBB(SetMBB
);
7340 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), Dest
)
7341 .addReg(OldVal
).addReg(BitShift
).addImm(BitSize
);
7342 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RetryCmpVal
)
7343 .addReg(CmpVal
).addReg(Dest
).addImm(32).addImm(63 - BitSize
).addImm(0);
7344 BuildMI(MBB
, DL
, TII
->get(SystemZ::CR
))
7345 .addReg(Dest
).addReg(RetryCmpVal
);
7346 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7347 .addImm(SystemZ::CCMASK_ICMP
)
7348 .addImm(SystemZ::CCMASK_CMP_NE
).addMBB(DoneMBB
);
7349 MBB
->addSuccessor(DoneMBB
);
7350 MBB
->addSuccessor(SetMBB
);
7353 // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
7354 // ^^ Replace the upper 32-BitSize bits of the new
7355 // value with those that we loaded.
7356 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
7357 // ^^ Rotate the new field to its proper position.
7358 // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
7360 // # fall through to ExitMMB
7362 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RetrySwapVal
)
7363 .addReg(SwapVal
).addReg(Dest
).addImm(32).addImm(63 - BitSize
).addImm(0);
7364 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), StoreVal
)
7365 .addReg(RetrySwapVal
).addReg(NegBitShift
).addImm(-BitSize
);
7366 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), RetryOldVal
)
7371 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7372 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
7373 MBB
->addSuccessor(LoopMBB
);
7374 MBB
->addSuccessor(DoneMBB
);
7376 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
7377 // to the block after the loop. At this point, CC may have been defined
7378 // either by the CR in LoopMBB or by the CS in SetMBB.
7379 if (!MI
.registerDefIsDead(SystemZ::CC
))
7380 DoneMBB
->addLiveIn(SystemZ::CC
);
7382 MI
.eraseFromParent();
7386 // Emit a move from two GR64s to a GR128.
7388 SystemZTargetLowering::emitPair128(MachineInstr
&MI
,
7389 MachineBasicBlock
*MBB
) const {
7390 MachineFunction
&MF
= *MBB
->getParent();
7391 const SystemZInstrInfo
*TII
=
7392 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7393 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7394 DebugLoc DL
= MI
.getDebugLoc();
7396 Register Dest
= MI
.getOperand(0).getReg();
7397 Register Hi
= MI
.getOperand(1).getReg();
7398 Register Lo
= MI
.getOperand(2).getReg();
7399 Register Tmp1
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7400 Register Tmp2
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7402 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::IMPLICIT_DEF
), Tmp1
);
7403 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Tmp2
)
7404 .addReg(Tmp1
).addReg(Hi
).addImm(SystemZ::subreg_h64
);
7405 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Dest
)
7406 .addReg(Tmp2
).addReg(Lo
).addImm(SystemZ::subreg_l64
);
7408 MI
.eraseFromParent();
7412 // Emit an extension from a GR64 to a GR128. ClearEven is true
7413 // if the high register of the GR128 value must be cleared or false if
7414 // it's "don't care".
7415 MachineBasicBlock
*SystemZTargetLowering::emitExt128(MachineInstr
&MI
,
7416 MachineBasicBlock
*MBB
,
7417 bool ClearEven
) const {
7418 MachineFunction
&MF
= *MBB
->getParent();
7419 const SystemZInstrInfo
*TII
=
7420 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7421 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7422 DebugLoc DL
= MI
.getDebugLoc();
7424 Register Dest
= MI
.getOperand(0).getReg();
7425 Register Src
= MI
.getOperand(1).getReg();
7426 Register In128
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7428 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::IMPLICIT_DEF
), In128
);
7430 Register NewIn128
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7431 Register Zero64
= MRI
.createVirtualRegister(&SystemZ::GR64BitRegClass
);
7433 BuildMI(*MBB
, MI
, DL
, TII
->get(SystemZ::LLILL
), Zero64
)
7435 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), NewIn128
)
7436 .addReg(In128
).addReg(Zero64
).addImm(SystemZ::subreg_h64
);
7439 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Dest
)
7440 .addReg(In128
).addReg(Src
).addImm(SystemZ::subreg_l64
);
7442 MI
.eraseFromParent();
7446 MachineBasicBlock
*SystemZTargetLowering::emitMemMemWrapper(
7447 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
7448 MachineFunction
&MF
= *MBB
->getParent();
7449 const SystemZInstrInfo
*TII
=
7450 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7451 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7452 DebugLoc DL
= MI
.getDebugLoc();
7454 MachineOperand DestBase
= earlyUseOperand(MI
.getOperand(0));
7455 uint64_t DestDisp
= MI
.getOperand(1).getImm();
7456 MachineOperand SrcBase
= earlyUseOperand(MI
.getOperand(2));
7457 uint64_t SrcDisp
= MI
.getOperand(3).getImm();
7458 uint64_t Length
= MI
.getOperand(4).getImm();
7460 // When generating more than one CLC, all but the last will need to
7461 // branch to the end when a difference is found.
7462 MachineBasicBlock
*EndMBB
= (Length
> 256 && Opcode
== SystemZ::CLC
?
7463 splitBlockAfter(MI
, MBB
) : nullptr);
7465 // Check for the loop form, in which operand 5 is the trip count.
7466 if (MI
.getNumExplicitOperands() > 5) {
7467 bool HaveSingleBase
= DestBase
.isIdenticalTo(SrcBase
);
7469 Register StartCountReg
= MI
.getOperand(5).getReg();
7470 Register StartSrcReg
= forceReg(MI
, SrcBase
, TII
);
7471 Register StartDestReg
= (HaveSingleBase
? StartSrcReg
:
7472 forceReg(MI
, DestBase
, TII
));
7474 const TargetRegisterClass
*RC
= &SystemZ::ADDR64BitRegClass
;
7475 Register ThisSrcReg
= MRI
.createVirtualRegister(RC
);
7476 Register ThisDestReg
= (HaveSingleBase
? ThisSrcReg
:
7477 MRI
.createVirtualRegister(RC
));
7478 Register NextSrcReg
= MRI
.createVirtualRegister(RC
);
7479 Register NextDestReg
= (HaveSingleBase
? NextSrcReg
:
7480 MRI
.createVirtualRegister(RC
));
7482 RC
= &SystemZ::GR64BitRegClass
;
7483 Register ThisCountReg
= MRI
.createVirtualRegister(RC
);
7484 Register NextCountReg
= MRI
.createVirtualRegister(RC
);
7486 MachineBasicBlock
*StartMBB
= MBB
;
7487 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7488 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7489 MachineBasicBlock
*NextMBB
= (EndMBB
? emitBlockAfter(LoopMBB
) : LoopMBB
);
7492 // # fall through to LoopMMB
7493 MBB
->addSuccessor(LoopMBB
);
7496 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
7497 // [ %NextDestReg, NextMBB ]
7498 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
7499 // [ %NextSrcReg, NextMBB ]
7500 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
7501 // [ %NextCountReg, NextMBB ]
7502 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
7503 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
7506 // The prefetch is used only for MVC. The JLH is used only for CLC.
7509 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisDestReg
)
7510 .addReg(StartDestReg
).addMBB(StartMBB
)
7511 .addReg(NextDestReg
).addMBB(NextMBB
);
7512 if (!HaveSingleBase
)
7513 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisSrcReg
)
7514 .addReg(StartSrcReg
).addMBB(StartMBB
)
7515 .addReg(NextSrcReg
).addMBB(NextMBB
);
7516 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisCountReg
)
7517 .addReg(StartCountReg
).addMBB(StartMBB
)
7518 .addReg(NextCountReg
).addMBB(NextMBB
);
7519 if (Opcode
== SystemZ::MVC
)
7520 BuildMI(MBB
, DL
, TII
->get(SystemZ::PFD
))
7521 .addImm(SystemZ::PFD_WRITE
)
7522 .addReg(ThisDestReg
).addImm(DestDisp
+ 768).addReg(0);
7523 BuildMI(MBB
, DL
, TII
->get(Opcode
))
7524 .addReg(ThisDestReg
).addImm(DestDisp
).addImm(256)
7525 .addReg(ThisSrcReg
).addImm(SrcDisp
);
7527 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7528 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
7530 MBB
->addSuccessor(EndMBB
);
7531 MBB
->addSuccessor(NextMBB
);
7535 // %NextDestReg = LA 256(%ThisDestReg)
7536 // %NextSrcReg = LA 256(%ThisSrcReg)
7537 // %NextCountReg = AGHI %ThisCountReg, -1
7538 // CGHI %NextCountReg, 0
7540 // # fall through to DoneMMB
7542 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
7545 BuildMI(MBB
, DL
, TII
->get(SystemZ::LA
), NextDestReg
)
7546 .addReg(ThisDestReg
).addImm(256).addReg(0);
7547 if (!HaveSingleBase
)
7548 BuildMI(MBB
, DL
, TII
->get(SystemZ::LA
), NextSrcReg
)
7549 .addReg(ThisSrcReg
).addImm(256).addReg(0);
7550 BuildMI(MBB
, DL
, TII
->get(SystemZ::AGHI
), NextCountReg
)
7551 .addReg(ThisCountReg
).addImm(-1);
7552 BuildMI(MBB
, DL
, TII
->get(SystemZ::CGHI
))
7553 .addReg(NextCountReg
).addImm(0);
7554 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7555 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
7557 MBB
->addSuccessor(LoopMBB
);
7558 MBB
->addSuccessor(DoneMBB
);
7560 DestBase
= MachineOperand::CreateReg(NextDestReg
, false);
7561 SrcBase
= MachineOperand::CreateReg(NextSrcReg
, false);
7563 if (EndMBB
&& !Length
)
7564 // If the loop handled the whole CLC range, DoneMBB will be empty with
7565 // CC live-through into EndMBB, so add it as live-in.
7566 DoneMBB
->addLiveIn(SystemZ::CC
);
7569 // Handle any remaining bytes with straight-line code.
7570 while (Length
> 0) {
7571 uint64_t ThisLength
= std::min(Length
, uint64_t(256));
7572 // The previous iteration might have created out-of-range displacements.
7573 // Apply them using LAY if so.
7574 if (!isUInt
<12>(DestDisp
)) {
7575 Register Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
7576 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LAY
), Reg
)
7580 DestBase
= MachineOperand::CreateReg(Reg
, false);
7583 if (!isUInt
<12>(SrcDisp
)) {
7584 Register Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
7585 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LAY
), Reg
)
7589 SrcBase
= MachineOperand::CreateReg(Reg
, false);
7592 BuildMI(*MBB
, MI
, DL
, TII
->get(Opcode
))
7598 .setMemRefs(MI
.memoperands());
7599 DestDisp
+= ThisLength
;
7600 SrcDisp
+= ThisLength
;
7601 Length
-= ThisLength
;
7602 // If there's another CLC to go, branch to the end if a difference
7604 if (EndMBB
&& Length
> 0) {
7605 MachineBasicBlock
*NextMBB
= splitBlockBefore(MI
, MBB
);
7606 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7607 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
7609 MBB
->addSuccessor(EndMBB
);
7610 MBB
->addSuccessor(NextMBB
);
7615 MBB
->addSuccessor(EndMBB
);
7617 MBB
->addLiveIn(SystemZ::CC
);
7620 MI
.eraseFromParent();
7624 // Decompose string pseudo-instruction MI into a loop that continually performs
7625 // Opcode until CC != 3.
7626 MachineBasicBlock
*SystemZTargetLowering::emitStringWrapper(
7627 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
7628 MachineFunction
&MF
= *MBB
->getParent();
7629 const SystemZInstrInfo
*TII
=
7630 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7631 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7632 DebugLoc DL
= MI
.getDebugLoc();
7634 uint64_t End1Reg
= MI
.getOperand(0).getReg();
7635 uint64_t Start1Reg
= MI
.getOperand(1).getReg();
7636 uint64_t Start2Reg
= MI
.getOperand(2).getReg();
7637 uint64_t CharReg
= MI
.getOperand(3).getReg();
7639 const TargetRegisterClass
*RC
= &SystemZ::GR64BitRegClass
;
7640 uint64_t This1Reg
= MRI
.createVirtualRegister(RC
);
7641 uint64_t This2Reg
= MRI
.createVirtualRegister(RC
);
7642 uint64_t End2Reg
= MRI
.createVirtualRegister(RC
);
7644 MachineBasicBlock
*StartMBB
= MBB
;
7645 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7646 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7649 // # fall through to LoopMMB
7650 MBB
->addSuccessor(LoopMBB
);
7653 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
7654 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
7656 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
7658 // # fall through to DoneMMB
7660 // The load of R0L can be hoisted by post-RA LICM.
7663 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), This1Reg
)
7664 .addReg(Start1Reg
).addMBB(StartMBB
)
7665 .addReg(End1Reg
).addMBB(LoopMBB
);
7666 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), This2Reg
)
7667 .addReg(Start2Reg
).addMBB(StartMBB
)
7668 .addReg(End2Reg
).addMBB(LoopMBB
);
7669 BuildMI(MBB
, DL
, TII
->get(TargetOpcode::COPY
), SystemZ::R0L
).addReg(CharReg
);
7670 BuildMI(MBB
, DL
, TII
->get(Opcode
))
7671 .addReg(End1Reg
, RegState::Define
).addReg(End2Reg
, RegState::Define
)
7672 .addReg(This1Reg
).addReg(This2Reg
);
7673 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7674 .addImm(SystemZ::CCMASK_ANY
).addImm(SystemZ::CCMASK_3
).addMBB(LoopMBB
);
7675 MBB
->addSuccessor(LoopMBB
);
7676 MBB
->addSuccessor(DoneMBB
);
7678 DoneMBB
->addLiveIn(SystemZ::CC
);
7680 MI
.eraseFromParent();
7684 // Update TBEGIN instruction with final opcode and register clobbers.
7685 MachineBasicBlock
*SystemZTargetLowering::emitTransactionBegin(
7686 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
,
7687 bool NoFloat
) const {
7688 MachineFunction
&MF
= *MBB
->getParent();
7689 const TargetFrameLowering
*TFI
= Subtarget
.getFrameLowering();
7690 const SystemZInstrInfo
*TII
= Subtarget
.getInstrInfo();
7693 MI
.setDesc(TII
->get(Opcode
));
7695 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
7696 // Make sure to add the corresponding GRSM bits if they are missing.
7697 uint64_t Control
= MI
.getOperand(2).getImm();
7698 static const unsigned GPRControlBit
[16] = {
7699 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
7700 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
7702 Control
|= GPRControlBit
[15];
7704 Control
|= GPRControlBit
[11];
7705 MI
.getOperand(2).setImm(Control
);
7707 // Add GPR clobbers.
7708 for (int I
= 0; I
< 16; I
++) {
7709 if ((Control
& GPRControlBit
[I
]) == 0) {
7710 unsigned Reg
= SystemZMC::GR64Regs
[I
];
7711 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7715 // Add FPR/VR clobbers.
7716 if (!NoFloat
&& (Control
& 4) != 0) {
7717 if (Subtarget
.hasVector()) {
7718 for (int I
= 0; I
< 32; I
++) {
7719 unsigned Reg
= SystemZMC::VR128Regs
[I
];
7720 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7723 for (int I
= 0; I
< 16; I
++) {
7724 unsigned Reg
= SystemZMC::FP64Regs
[I
];
7725 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7733 MachineBasicBlock
*SystemZTargetLowering::emitLoadAndTestCmp0(
7734 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
7735 MachineFunction
&MF
= *MBB
->getParent();
7736 MachineRegisterInfo
*MRI
= &MF
.getRegInfo();
7737 const SystemZInstrInfo
*TII
=
7738 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7739 DebugLoc DL
= MI
.getDebugLoc();
7741 Register SrcReg
= MI
.getOperand(0).getReg();
7743 // Create new virtual register of the same class as source.
7744 const TargetRegisterClass
*RC
= MRI
->getRegClass(SrcReg
);
7745 Register DstReg
= MRI
->createVirtualRegister(RC
);
7747 // Replace pseudo with a normal load-and-test that models the def as
7749 BuildMI(*MBB
, MI
, DL
, TII
->get(Opcode
), DstReg
)
7751 .setMIFlags(MI
.getFlags());
7752 MI
.eraseFromParent();
7757 MachineBasicBlock
*SystemZTargetLowering::EmitInstrWithCustomInserter(
7758 MachineInstr
&MI
, MachineBasicBlock
*MBB
) const {
7759 switch (MI
.getOpcode()) {
7760 case SystemZ::Select32
:
7761 case SystemZ::Select64
:
7762 case SystemZ::SelectF32
:
7763 case SystemZ::SelectF64
:
7764 case SystemZ::SelectF128
:
7765 case SystemZ::SelectVR32
:
7766 case SystemZ::SelectVR64
:
7767 case SystemZ::SelectVR128
:
7768 return emitSelect(MI
, MBB
);
7770 case SystemZ::CondStore8Mux
:
7771 return emitCondStore(MI
, MBB
, SystemZ::STCMux
, 0, false);
7772 case SystemZ::CondStore8MuxInv
:
7773 return emitCondStore(MI
, MBB
, SystemZ::STCMux
, 0, true);
7774 case SystemZ::CondStore16Mux
:
7775 return emitCondStore(MI
, MBB
, SystemZ::STHMux
, 0, false);
7776 case SystemZ::CondStore16MuxInv
:
7777 return emitCondStore(MI
, MBB
, SystemZ::STHMux
, 0, true);
7778 case SystemZ::CondStore32Mux
:
7779 return emitCondStore(MI
, MBB
, SystemZ::STMux
, SystemZ::STOCMux
, false);
7780 case SystemZ::CondStore32MuxInv
:
7781 return emitCondStore(MI
, MBB
, SystemZ::STMux
, SystemZ::STOCMux
, true);
7782 case SystemZ::CondStore8
:
7783 return emitCondStore(MI
, MBB
, SystemZ::STC
, 0, false);
7784 case SystemZ::CondStore8Inv
:
7785 return emitCondStore(MI
, MBB
, SystemZ::STC
, 0, true);
7786 case SystemZ::CondStore16
:
7787 return emitCondStore(MI
, MBB
, SystemZ::STH
, 0, false);
7788 case SystemZ::CondStore16Inv
:
7789 return emitCondStore(MI
, MBB
, SystemZ::STH
, 0, true);
7790 case SystemZ::CondStore32
:
7791 return emitCondStore(MI
, MBB
, SystemZ::ST
, SystemZ::STOC
, false);
7792 case SystemZ::CondStore32Inv
:
7793 return emitCondStore(MI
, MBB
, SystemZ::ST
, SystemZ::STOC
, true);
7794 case SystemZ::CondStore64
:
7795 return emitCondStore(MI
, MBB
, SystemZ::STG
, SystemZ::STOCG
, false);
7796 case SystemZ::CondStore64Inv
:
7797 return emitCondStore(MI
, MBB
, SystemZ::STG
, SystemZ::STOCG
, true);
7798 case SystemZ::CondStoreF32
:
7799 return emitCondStore(MI
, MBB
, SystemZ::STE
, 0, false);
7800 case SystemZ::CondStoreF32Inv
:
7801 return emitCondStore(MI
, MBB
, SystemZ::STE
, 0, true);
7802 case SystemZ::CondStoreF64
:
7803 return emitCondStore(MI
, MBB
, SystemZ::STD
, 0, false);
7804 case SystemZ::CondStoreF64Inv
:
7805 return emitCondStore(MI
, MBB
, SystemZ::STD
, 0, true);
7807 case SystemZ::PAIR128
:
7808 return emitPair128(MI
, MBB
);
7809 case SystemZ::AEXT128
:
7810 return emitExt128(MI
, MBB
, false);
7811 case SystemZ::ZEXT128
:
7812 return emitExt128(MI
, MBB
, true);
7814 case SystemZ::ATOMIC_SWAPW
:
7815 return emitAtomicLoadBinary(MI
, MBB
, 0, 0);
7816 case SystemZ::ATOMIC_SWAP_32
:
7817 return emitAtomicLoadBinary(MI
, MBB
, 0, 32);
7818 case SystemZ::ATOMIC_SWAP_64
:
7819 return emitAtomicLoadBinary(MI
, MBB
, 0, 64);
7821 case SystemZ::ATOMIC_LOADW_AR
:
7822 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AR
, 0);
7823 case SystemZ::ATOMIC_LOADW_AFI
:
7824 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AFI
, 0);
7825 case SystemZ::ATOMIC_LOAD_AR
:
7826 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AR
, 32);
7827 case SystemZ::ATOMIC_LOAD_AHI
:
7828 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AHI
, 32);
7829 case SystemZ::ATOMIC_LOAD_AFI
:
7830 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AFI
, 32);
7831 case SystemZ::ATOMIC_LOAD_AGR
:
7832 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGR
, 64);
7833 case SystemZ::ATOMIC_LOAD_AGHI
:
7834 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGHI
, 64);
7835 case SystemZ::ATOMIC_LOAD_AGFI
:
7836 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGFI
, 64);
7838 case SystemZ::ATOMIC_LOADW_SR
:
7839 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SR
, 0);
7840 case SystemZ::ATOMIC_LOAD_SR
:
7841 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SR
, 32);
7842 case SystemZ::ATOMIC_LOAD_SGR
:
7843 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SGR
, 64);
7845 case SystemZ::ATOMIC_LOADW_NR
:
7846 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 0);
7847 case SystemZ::ATOMIC_LOADW_NILH
:
7848 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 0);
7849 case SystemZ::ATOMIC_LOAD_NR
:
7850 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 32);
7851 case SystemZ::ATOMIC_LOAD_NILL
:
7852 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL
, 32);
7853 case SystemZ::ATOMIC_LOAD_NILH
:
7854 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 32);
7855 case SystemZ::ATOMIC_LOAD_NILF
:
7856 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF
, 32);
7857 case SystemZ::ATOMIC_LOAD_NGR
:
7858 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NGR
, 64);
7859 case SystemZ::ATOMIC_LOAD_NILL64
:
7860 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL64
, 64);
7861 case SystemZ::ATOMIC_LOAD_NILH64
:
7862 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH64
, 64);
7863 case SystemZ::ATOMIC_LOAD_NIHL64
:
7864 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHL64
, 64);
7865 case SystemZ::ATOMIC_LOAD_NIHH64
:
7866 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHH64
, 64);
7867 case SystemZ::ATOMIC_LOAD_NILF64
:
7868 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF64
, 64);
7869 case SystemZ::ATOMIC_LOAD_NIHF64
:
7870 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHF64
, 64);
7872 case SystemZ::ATOMIC_LOADW_OR
:
7873 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OR
, 0);
7874 case SystemZ::ATOMIC_LOADW_OILH
:
7875 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH
, 0);
7876 case SystemZ::ATOMIC_LOAD_OR
:
7877 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OR
, 32);
7878 case SystemZ::ATOMIC_LOAD_OILL
:
7879 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILL
, 32);
7880 case SystemZ::ATOMIC_LOAD_OILH
:
7881 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH
, 32);
7882 case SystemZ::ATOMIC_LOAD_OILF
:
7883 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILF
, 32);
7884 case SystemZ::ATOMIC_LOAD_OGR
:
7885 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OGR
, 64);
7886 case SystemZ::ATOMIC_LOAD_OILL64
:
7887 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILL64
, 64);
7888 case SystemZ::ATOMIC_LOAD_OILH64
:
7889 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH64
, 64);
7890 case SystemZ::ATOMIC_LOAD_OIHL64
:
7891 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHL64
, 64);
7892 case SystemZ::ATOMIC_LOAD_OIHH64
:
7893 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHH64
, 64);
7894 case SystemZ::ATOMIC_LOAD_OILF64
:
7895 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILF64
, 64);
7896 case SystemZ::ATOMIC_LOAD_OIHF64
:
7897 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHF64
, 64);
7899 case SystemZ::ATOMIC_LOADW_XR
:
7900 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XR
, 0);
7901 case SystemZ::ATOMIC_LOADW_XILF
:
7902 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF
, 0);
7903 case SystemZ::ATOMIC_LOAD_XR
:
7904 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XR
, 32);
7905 case SystemZ::ATOMIC_LOAD_XILF
:
7906 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF
, 32);
7907 case SystemZ::ATOMIC_LOAD_XGR
:
7908 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XGR
, 64);
7909 case SystemZ::ATOMIC_LOAD_XILF64
:
7910 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF64
, 64);
7911 case SystemZ::ATOMIC_LOAD_XIHF64
:
7912 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XIHF64
, 64);
7914 case SystemZ::ATOMIC_LOADW_NRi
:
7915 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 0, true);
7916 case SystemZ::ATOMIC_LOADW_NILHi
:
7917 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 0, true);
7918 case SystemZ::ATOMIC_LOAD_NRi
:
7919 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 32, true);
7920 case SystemZ::ATOMIC_LOAD_NILLi
:
7921 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL
, 32, true);
7922 case SystemZ::ATOMIC_LOAD_NILHi
:
7923 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 32, true);
7924 case SystemZ::ATOMIC_LOAD_NILFi
:
7925 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF
, 32, true);
7926 case SystemZ::ATOMIC_LOAD_NGRi
:
7927 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NGR
, 64, true);
7928 case SystemZ::ATOMIC_LOAD_NILL64i
:
7929 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL64
, 64, true);
7930 case SystemZ::ATOMIC_LOAD_NILH64i
:
7931 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH64
, 64, true);
7932 case SystemZ::ATOMIC_LOAD_NIHL64i
:
7933 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHL64
, 64, true);
7934 case SystemZ::ATOMIC_LOAD_NIHH64i
:
7935 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHH64
, 64, true);
7936 case SystemZ::ATOMIC_LOAD_NILF64i
:
7937 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF64
, 64, true);
7938 case SystemZ::ATOMIC_LOAD_NIHF64i
:
7939 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHF64
, 64, true);
7941 case SystemZ::ATOMIC_LOADW_MIN
:
7942 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7943 SystemZ::CCMASK_CMP_LE
, 0);
7944 case SystemZ::ATOMIC_LOAD_MIN_32
:
7945 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7946 SystemZ::CCMASK_CMP_LE
, 32);
7947 case SystemZ::ATOMIC_LOAD_MIN_64
:
7948 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CGR
,
7949 SystemZ::CCMASK_CMP_LE
, 64);
7951 case SystemZ::ATOMIC_LOADW_MAX
:
7952 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7953 SystemZ::CCMASK_CMP_GE
, 0);
7954 case SystemZ::ATOMIC_LOAD_MAX_32
:
7955 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7956 SystemZ::CCMASK_CMP_GE
, 32);
7957 case SystemZ::ATOMIC_LOAD_MAX_64
:
7958 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CGR
,
7959 SystemZ::CCMASK_CMP_GE
, 64);
7961 case SystemZ::ATOMIC_LOADW_UMIN
:
7962 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7963 SystemZ::CCMASK_CMP_LE
, 0);
7964 case SystemZ::ATOMIC_LOAD_UMIN_32
:
7965 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7966 SystemZ::CCMASK_CMP_LE
, 32);
7967 case SystemZ::ATOMIC_LOAD_UMIN_64
:
7968 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLGR
,
7969 SystemZ::CCMASK_CMP_LE
, 64);
7971 case SystemZ::ATOMIC_LOADW_UMAX
:
7972 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7973 SystemZ::CCMASK_CMP_GE
, 0);
7974 case SystemZ::ATOMIC_LOAD_UMAX_32
:
7975 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7976 SystemZ::CCMASK_CMP_GE
, 32);
7977 case SystemZ::ATOMIC_LOAD_UMAX_64
:
7978 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLGR
,
7979 SystemZ::CCMASK_CMP_GE
, 64);
7981 case SystemZ::ATOMIC_CMP_SWAPW
:
7982 return emitAtomicCmpSwapW(MI
, MBB
);
7983 case SystemZ::MVCSequence
:
7984 case SystemZ::MVCLoop
:
7985 return emitMemMemWrapper(MI
, MBB
, SystemZ::MVC
);
7986 case SystemZ::NCSequence
:
7987 case SystemZ::NCLoop
:
7988 return emitMemMemWrapper(MI
, MBB
, SystemZ::NC
);
7989 case SystemZ::OCSequence
:
7990 case SystemZ::OCLoop
:
7991 return emitMemMemWrapper(MI
, MBB
, SystemZ::OC
);
7992 case SystemZ::XCSequence
:
7993 case SystemZ::XCLoop
:
7994 return emitMemMemWrapper(MI
, MBB
, SystemZ::XC
);
7995 case SystemZ::CLCSequence
:
7996 case SystemZ::CLCLoop
:
7997 return emitMemMemWrapper(MI
, MBB
, SystemZ::CLC
);
7998 case SystemZ::CLSTLoop
:
7999 return emitStringWrapper(MI
, MBB
, SystemZ::CLST
);
8000 case SystemZ::MVSTLoop
:
8001 return emitStringWrapper(MI
, MBB
, SystemZ::MVST
);
8002 case SystemZ::SRSTLoop
:
8003 return emitStringWrapper(MI
, MBB
, SystemZ::SRST
);
8004 case SystemZ::TBEGIN
:
8005 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGIN
, false);
8006 case SystemZ::TBEGIN_nofloat
:
8007 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGIN
, true);
8008 case SystemZ::TBEGINC
:
8009 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGINC
, true);
8010 case SystemZ::LTEBRCompare_VecPseudo
:
8011 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTEBR
);
8012 case SystemZ::LTDBRCompare_VecPseudo
:
8013 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTDBR
);
8014 case SystemZ::LTXBRCompare_VecPseudo
:
8015 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTXBR
);
8017 case TargetOpcode::STACKMAP
:
8018 case TargetOpcode::PATCHPOINT
:
8019 return emitPatchPoint(MI
, MBB
);
8022 llvm_unreachable("Unexpected instr type to insert");
8026 // This is only used by the isel schedulers, and is needed only to prevent
8027 // compiler from crashing when list-ilp is used.
8028 const TargetRegisterClass
*
8029 SystemZTargetLowering::getRepRegClassFor(MVT VT
) const {
8030 if (VT
== MVT::Untyped
)
8031 return &SystemZ::ADDR128BitRegClass
;
8032 return TargetLowering::getRepRegClassFor(VT
);