1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SystemZTargetLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
15 #include "SystemZConstantPoolValue.h"
16 #include "SystemZMachineFunctionInfo.h"
17 #include "SystemZTargetMachine.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22 #include "llvm/IR/Intrinsics.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/KnownBits.h"
30 #define DEBUG_TYPE "systemz-lower"
33 // Represents information about a comparison.
35 Comparison(SDValue Op0In
, SDValue Op1In
)
36 : Op0(Op0In
), Op1(Op1In
), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
38 // The operands to the comparison.
41 // The opcode that should be used to compare Op0 and Op1.
44 // A SystemZICMP value. Only used for integer comparisons.
47 // The mask of CC values that Opcode can produce.
50 // The mask of CC values for which the original condition is true.
53 } // end anonymous namespace
55 // Classify VT as either 32 or 64 bit.
56 static bool is32Bit(EVT VT
) {
57 switch (VT
.getSimpleVT().SimpleTy
) {
63 llvm_unreachable("Unsupported type");
67 // Return a version of MachineOperand that can be safely used before the
69 static MachineOperand
earlyUseOperand(MachineOperand Op
) {
75 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine
&TM
,
76 const SystemZSubtarget
&STI
)
77 : TargetLowering(TM
), Subtarget(STI
) {
78 MVT PtrVT
= MVT::getIntegerVT(8 * TM
.getPointerSize(0));
80 // Set up the register classes.
81 if (Subtarget
.hasHighWord())
82 addRegisterClass(MVT::i32
, &SystemZ::GRX32BitRegClass
);
84 addRegisterClass(MVT::i32
, &SystemZ::GR32BitRegClass
);
85 addRegisterClass(MVT::i64
, &SystemZ::GR64BitRegClass
);
86 if (Subtarget
.hasVector()) {
87 addRegisterClass(MVT::f32
, &SystemZ::VR32BitRegClass
);
88 addRegisterClass(MVT::f64
, &SystemZ::VR64BitRegClass
);
90 addRegisterClass(MVT::f32
, &SystemZ::FP32BitRegClass
);
91 addRegisterClass(MVT::f64
, &SystemZ::FP64BitRegClass
);
93 if (Subtarget
.hasVectorEnhancements1())
94 addRegisterClass(MVT::f128
, &SystemZ::VR128BitRegClass
);
96 addRegisterClass(MVT::f128
, &SystemZ::FP128BitRegClass
);
98 if (Subtarget
.hasVector()) {
99 addRegisterClass(MVT::v16i8
, &SystemZ::VR128BitRegClass
);
100 addRegisterClass(MVT::v8i16
, &SystemZ::VR128BitRegClass
);
101 addRegisterClass(MVT::v4i32
, &SystemZ::VR128BitRegClass
);
102 addRegisterClass(MVT::v2i64
, &SystemZ::VR128BitRegClass
);
103 addRegisterClass(MVT::v4f32
, &SystemZ::VR128BitRegClass
);
104 addRegisterClass(MVT::v2f64
, &SystemZ::VR128BitRegClass
);
107 // Compute derived properties from the register classes
108 computeRegisterProperties(Subtarget
.getRegisterInfo());
110 // Set up special registers.
111 setStackPointerRegisterToSaveRestore(SystemZ::R15D
);
113 // TODO: It may be better to default to latency-oriented scheduling, however
114 // LLVM's current latency-oriented scheduler can't handle physreg definitions
115 // such as SystemZ has with CC, so set this to the register-pressure
116 // scheduler, because it can.
117 setSchedulingPreference(Sched::RegPressure
);
119 setBooleanContents(ZeroOrOneBooleanContent
);
120 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
122 // Instructions are strings of 2-byte aligned 2-byte values.
123 setMinFunctionAlignment(Align(2));
124 // For performance reasons we prefer 16-byte alignment.
125 setPrefFunctionAlignment(Align(16));
127 // Handle operations that are handled in a similar way for all types.
128 for (unsigned I
= MVT::FIRST_INTEGER_VALUETYPE
;
129 I
<= MVT::LAST_FP_VALUETYPE
;
131 MVT VT
= MVT::SimpleValueType(I
);
132 if (isTypeLegal(VT
)) {
133 // Lower SET_CC into an IPM-based sequence.
134 setOperationAction(ISD::SETCC
, VT
, Custom
);
136 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
137 setOperationAction(ISD::SELECT
, VT
, Expand
);
139 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
140 setOperationAction(ISD::SELECT_CC
, VT
, Custom
);
141 setOperationAction(ISD::BR_CC
, VT
, Custom
);
145 // Expand jump table branches as address arithmetic followed by an
147 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
149 // Expand BRCOND into a BR_CC (see above).
150 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
152 // Handle integer types.
153 for (unsigned I
= MVT::FIRST_INTEGER_VALUETYPE
;
154 I
<= MVT::LAST_INTEGER_VALUETYPE
;
156 MVT VT
= MVT::SimpleValueType(I
);
157 if (isTypeLegal(VT
)) {
158 // Expand individual DIV and REMs into DIVREMs.
159 setOperationAction(ISD::SDIV
, VT
, Expand
);
160 setOperationAction(ISD::UDIV
, VT
, Expand
);
161 setOperationAction(ISD::SREM
, VT
, Expand
);
162 setOperationAction(ISD::UREM
, VT
, Expand
);
163 setOperationAction(ISD::SDIVREM
, VT
, Custom
);
164 setOperationAction(ISD::UDIVREM
, VT
, Custom
);
166 // Support addition/subtraction with overflow.
167 setOperationAction(ISD::SADDO
, VT
, Custom
);
168 setOperationAction(ISD::SSUBO
, VT
, Custom
);
170 // Support addition/subtraction with carry.
171 setOperationAction(ISD::UADDO
, VT
, Custom
);
172 setOperationAction(ISD::USUBO
, VT
, Custom
);
174 // Support carry in as value rather than glue.
175 setOperationAction(ISD::ADDCARRY
, VT
, Custom
);
176 setOperationAction(ISD::SUBCARRY
, VT
, Custom
);
178 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
179 // stores, putting a serialization instruction after the stores.
180 setOperationAction(ISD::ATOMIC_LOAD
, VT
, Custom
);
181 setOperationAction(ISD::ATOMIC_STORE
, VT
, Custom
);
183 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
184 // available, or if the operand is constant.
185 setOperationAction(ISD::ATOMIC_LOAD_SUB
, VT
, Custom
);
187 // Use POPCNT on z196 and above.
188 if (Subtarget
.hasPopulationCount())
189 setOperationAction(ISD::CTPOP
, VT
, Custom
);
191 setOperationAction(ISD::CTPOP
, VT
, Expand
);
193 // No special instructions for these.
194 setOperationAction(ISD::CTTZ
, VT
, Expand
);
195 setOperationAction(ISD::ROTR
, VT
, Expand
);
197 // Use *MUL_LOHI where possible instead of MULH*.
198 setOperationAction(ISD::MULHS
, VT
, Expand
);
199 setOperationAction(ISD::MULHU
, VT
, Expand
);
200 setOperationAction(ISD::SMUL_LOHI
, VT
, Custom
);
201 setOperationAction(ISD::UMUL_LOHI
, VT
, Custom
);
203 // Only z196 and above have native support for conversions to unsigned.
204 // On z10, promoting to i64 doesn't generate an inexact condition for
205 // values that are outside the i32 range but in the i64 range, so use
206 // the default expansion.
207 if (!Subtarget
.hasFPExtension())
208 setOperationAction(ISD::FP_TO_UINT
, VT
, Expand
);
210 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
211 // default to Expand, so need to be modified to Legal where appropriate.
212 setOperationAction(ISD::STRICT_FP_TO_SINT
, VT
, Legal
);
213 if (Subtarget
.hasFPExtension())
214 setOperationAction(ISD::STRICT_FP_TO_UINT
, VT
, Legal
);
218 // Type legalization will convert 8- and 16-bit atomic operations into
219 // forms that operate on i32s (but still keeping the original memory VT).
220 // Lower them into full i32 operations.
221 setOperationAction(ISD::ATOMIC_SWAP
, MVT::i32
, Custom
);
222 setOperationAction(ISD::ATOMIC_LOAD_ADD
, MVT::i32
, Custom
);
223 setOperationAction(ISD::ATOMIC_LOAD_SUB
, MVT::i32
, Custom
);
224 setOperationAction(ISD::ATOMIC_LOAD_AND
, MVT::i32
, Custom
);
225 setOperationAction(ISD::ATOMIC_LOAD_OR
, MVT::i32
, Custom
);
226 setOperationAction(ISD::ATOMIC_LOAD_XOR
, MVT::i32
, Custom
);
227 setOperationAction(ISD::ATOMIC_LOAD_NAND
, MVT::i32
, Custom
);
228 setOperationAction(ISD::ATOMIC_LOAD_MIN
, MVT::i32
, Custom
);
229 setOperationAction(ISD::ATOMIC_LOAD_MAX
, MVT::i32
, Custom
);
230 setOperationAction(ISD::ATOMIC_LOAD_UMIN
, MVT::i32
, Custom
);
231 setOperationAction(ISD::ATOMIC_LOAD_UMAX
, MVT::i32
, Custom
);
233 // Even though i128 is not a legal type, we still need to custom lower
234 // the atomic operations in order to exploit SystemZ instructions.
235 setOperationAction(ISD::ATOMIC_LOAD
, MVT::i128
, Custom
);
236 setOperationAction(ISD::ATOMIC_STORE
, MVT::i128
, Custom
);
238 // We can use the CC result of compare-and-swap to implement
239 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
240 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i32
, Custom
);
241 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i64
, Custom
);
242 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i128
, Custom
);
244 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
246 // Traps are legal, as we will convert them to "j .+2".
247 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
249 // z10 has instructions for signed but not unsigned FP conversion.
250 // Handle unsigned 32-bit types as signed 64-bit types.
251 if (!Subtarget
.hasFPExtension()) {
252 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Promote
);
253 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
256 // We have native support for a 64-bit CTLZ, via FLOGR.
257 setOperationAction(ISD::CTLZ
, MVT::i32
, Promote
);
258 setOperationAction(ISD::CTLZ_ZERO_UNDEF
, MVT::i32
, Promote
);
259 setOperationAction(ISD::CTLZ
, MVT::i64
, Legal
);
261 // On z15 we have native support for a 64-bit CTPOP.
262 if (Subtarget
.hasMiscellaneousExtensions3()) {
263 setOperationAction(ISD::CTPOP
, MVT::i32
, Promote
);
264 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
267 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
268 setOperationAction(ISD::OR
, MVT::i64
, Custom
);
270 // FIXME: Can we support these natively?
271 setOperationAction(ISD::SRL_PARTS
, MVT::i64
, Expand
);
272 setOperationAction(ISD::SHL_PARTS
, MVT::i64
, Expand
);
273 setOperationAction(ISD::SRA_PARTS
, MVT::i64
, Expand
);
275 // We have native instructions for i8, i16 and i32 extensions, but not i1.
276 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
277 for (MVT VT
: MVT::integer_valuetypes()) {
278 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
279 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
280 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i1
, Promote
);
283 // Handle the various types of symbolic address.
284 setOperationAction(ISD::ConstantPool
, PtrVT
, Custom
);
285 setOperationAction(ISD::GlobalAddress
, PtrVT
, Custom
);
286 setOperationAction(ISD::GlobalTLSAddress
, PtrVT
, Custom
);
287 setOperationAction(ISD::BlockAddress
, PtrVT
, Custom
);
288 setOperationAction(ISD::JumpTable
, PtrVT
, Custom
);
290 // We need to handle dynamic allocations specially because of the
291 // 160-byte area at the bottom of the stack.
292 setOperationAction(ISD::DYNAMIC_STACKALLOC
, PtrVT
, Custom
);
293 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, PtrVT
, Custom
);
295 // Use custom expanders so that we can force the function to use
297 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Custom
);
298 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Custom
);
300 // Handle prefetches with PFD or PFDRL.
301 setOperationAction(ISD::PREFETCH
, MVT::Other
, Custom
);
303 for (MVT VT
: MVT::fixedlen_vector_valuetypes()) {
304 // Assume by default that all vector operations need to be expanded.
305 for (unsigned Opcode
= 0; Opcode
< ISD::BUILTIN_OP_END
; ++Opcode
)
306 if (getOperationAction(Opcode
, VT
) == Legal
)
307 setOperationAction(Opcode
, VT
, Expand
);
309 // Likewise all truncating stores and extending loads.
310 for (MVT InnerVT
: MVT::fixedlen_vector_valuetypes()) {
311 setTruncStoreAction(VT
, InnerVT
, Expand
);
312 setLoadExtAction(ISD::SEXTLOAD
, VT
, InnerVT
, Expand
);
313 setLoadExtAction(ISD::ZEXTLOAD
, VT
, InnerVT
, Expand
);
314 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
317 if (isTypeLegal(VT
)) {
318 // These operations are legal for anything that can be stored in a
319 // vector register, even if there is no native support for the format
320 // as such. In particular, we can do these for v4f32 even though there
321 // are no specific instructions for that format.
322 setOperationAction(ISD::LOAD
, VT
, Legal
);
323 setOperationAction(ISD::STORE
, VT
, Legal
);
324 setOperationAction(ISD::VSELECT
, VT
, Legal
);
325 setOperationAction(ISD::BITCAST
, VT
, Legal
);
326 setOperationAction(ISD::UNDEF
, VT
, Legal
);
328 // Likewise, except that we need to replace the nodes with something
330 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
331 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Custom
);
335 // Handle integer vector types.
336 for (MVT VT
: MVT::integer_fixedlen_vector_valuetypes()) {
337 if (isTypeLegal(VT
)) {
338 // These operations have direct equivalents.
339 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Legal
);
340 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Legal
);
341 setOperationAction(ISD::ADD
, VT
, Legal
);
342 setOperationAction(ISD::SUB
, VT
, Legal
);
343 if (VT
!= MVT::v2i64
)
344 setOperationAction(ISD::MUL
, VT
, Legal
);
345 setOperationAction(ISD::AND
, VT
, Legal
);
346 setOperationAction(ISD::OR
, VT
, Legal
);
347 setOperationAction(ISD::XOR
, VT
, Legal
);
348 if (Subtarget
.hasVectorEnhancements1())
349 setOperationAction(ISD::CTPOP
, VT
, Legal
);
351 setOperationAction(ISD::CTPOP
, VT
, Custom
);
352 setOperationAction(ISD::CTTZ
, VT
, Legal
);
353 setOperationAction(ISD::CTLZ
, VT
, Legal
);
355 // Convert a GPR scalar to a vector by inserting it into element 0.
356 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
358 // Use a series of unpacks for extensions.
359 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, VT
, Custom
);
360 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, VT
, Custom
);
362 // Detect shifts by a scalar amount and convert them into
364 setOperationAction(ISD::SHL
, VT
, Custom
);
365 setOperationAction(ISD::SRA
, VT
, Custom
);
366 setOperationAction(ISD::SRL
, VT
, Custom
);
368 // At present ROTL isn't matched by DAGCombiner. ROTR should be
369 // converted into ROTL.
370 setOperationAction(ISD::ROTL
, VT
, Expand
);
371 setOperationAction(ISD::ROTR
, VT
, Expand
);
373 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
374 // and inverting the result as necessary.
375 setOperationAction(ISD::SETCC
, VT
, Custom
);
379 if (Subtarget
.hasVector()) {
380 // There should be no need to check for float types other than v2f64
381 // since <2 x f32> isn't a legal type.
382 setOperationAction(ISD::FP_TO_SINT
, MVT::v2i64
, Legal
);
383 setOperationAction(ISD::FP_TO_SINT
, MVT::v2f64
, Legal
);
384 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Legal
);
385 setOperationAction(ISD::FP_TO_UINT
, MVT::v2f64
, Legal
);
386 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i64
, Legal
);
387 setOperationAction(ISD::SINT_TO_FP
, MVT::v2f64
, Legal
);
388 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Legal
);
389 setOperationAction(ISD::UINT_TO_FP
, MVT::v2f64
, Legal
);
391 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v2i64
, Legal
);
392 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v2f64
, Legal
);
393 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v2i64
, Legal
);
394 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v2f64
, Legal
);
397 if (Subtarget
.hasVectorEnhancements2()) {
398 setOperationAction(ISD::FP_TO_SINT
, MVT::v4i32
, Legal
);
399 setOperationAction(ISD::FP_TO_SINT
, MVT::v4f32
, Legal
);
400 setOperationAction(ISD::FP_TO_UINT
, MVT::v4i32
, Legal
);
401 setOperationAction(ISD::FP_TO_UINT
, MVT::v4f32
, Legal
);
402 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i32
, Legal
);
403 setOperationAction(ISD::SINT_TO_FP
, MVT::v4f32
, Legal
);
404 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i32
, Legal
);
405 setOperationAction(ISD::UINT_TO_FP
, MVT::v4f32
, Legal
);
407 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v4i32
, Legal
);
408 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v4f32
, Legal
);
409 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v4i32
, Legal
);
410 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v4f32
, Legal
);
413 // Handle floating-point types.
414 for (unsigned I
= MVT::FIRST_FP_VALUETYPE
;
415 I
<= MVT::LAST_FP_VALUETYPE
;
417 MVT VT
= MVT::SimpleValueType(I
);
418 if (isTypeLegal(VT
)) {
419 // We can use FI for FRINT.
420 setOperationAction(ISD::FRINT
, VT
, Legal
);
422 // We can use the extended form of FI for other rounding operations.
423 if (Subtarget
.hasFPExtension()) {
424 setOperationAction(ISD::FNEARBYINT
, VT
, Legal
);
425 setOperationAction(ISD::FFLOOR
, VT
, Legal
);
426 setOperationAction(ISD::FCEIL
, VT
, Legal
);
427 setOperationAction(ISD::FTRUNC
, VT
, Legal
);
428 setOperationAction(ISD::FROUND
, VT
, Legal
);
431 // No special instructions for these.
432 setOperationAction(ISD::FSIN
, VT
, Expand
);
433 setOperationAction(ISD::FCOS
, VT
, Expand
);
434 setOperationAction(ISD::FSINCOS
, VT
, Expand
);
435 setOperationAction(ISD::FREM
, VT
, Expand
);
436 setOperationAction(ISD::FPOW
, VT
, Expand
);
438 // Handle constrained floating-point operations.
439 setOperationAction(ISD::STRICT_FADD
, VT
, Legal
);
440 setOperationAction(ISD::STRICT_FSUB
, VT
, Legal
);
441 setOperationAction(ISD::STRICT_FMUL
, VT
, Legal
);
442 setOperationAction(ISD::STRICT_FDIV
, VT
, Legal
);
443 setOperationAction(ISD::STRICT_FMA
, VT
, Legal
);
444 setOperationAction(ISD::STRICT_FSQRT
, VT
, Legal
);
445 setOperationAction(ISD::STRICT_FRINT
, VT
, Legal
);
446 setOperationAction(ISD::STRICT_FP_ROUND
, VT
, Legal
);
447 setOperationAction(ISD::STRICT_FP_EXTEND
, VT
, Legal
);
448 if (Subtarget
.hasFPExtension()) {
449 setOperationAction(ISD::STRICT_FNEARBYINT
, VT
, Legal
);
450 setOperationAction(ISD::STRICT_FFLOOR
, VT
, Legal
);
451 setOperationAction(ISD::STRICT_FCEIL
, VT
, Legal
);
452 setOperationAction(ISD::STRICT_FROUND
, VT
, Legal
);
453 setOperationAction(ISD::STRICT_FTRUNC
, VT
, Legal
);
458 // Handle floating-point vector types.
459 if (Subtarget
.hasVector()) {
460 // Scalar-to-vector conversion is just a subreg.
461 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
462 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2f64
, Legal
);
464 // Some insertions and extractions can be done directly but others
465 // need to go via integers.
466 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
467 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v2f64
, Custom
);
468 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Custom
);
469 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Custom
);
471 // These operations have direct equivalents.
472 setOperationAction(ISD::FADD
, MVT::v2f64
, Legal
);
473 setOperationAction(ISD::FNEG
, MVT::v2f64
, Legal
);
474 setOperationAction(ISD::FSUB
, MVT::v2f64
, Legal
);
475 setOperationAction(ISD::FMUL
, MVT::v2f64
, Legal
);
476 setOperationAction(ISD::FMA
, MVT::v2f64
, Legal
);
477 setOperationAction(ISD::FDIV
, MVT::v2f64
, Legal
);
478 setOperationAction(ISD::FABS
, MVT::v2f64
, Legal
);
479 setOperationAction(ISD::FSQRT
, MVT::v2f64
, Legal
);
480 setOperationAction(ISD::FRINT
, MVT::v2f64
, Legal
);
481 setOperationAction(ISD::FNEARBYINT
, MVT::v2f64
, Legal
);
482 setOperationAction(ISD::FFLOOR
, MVT::v2f64
, Legal
);
483 setOperationAction(ISD::FCEIL
, MVT::v2f64
, Legal
);
484 setOperationAction(ISD::FTRUNC
, MVT::v2f64
, Legal
);
485 setOperationAction(ISD::FROUND
, MVT::v2f64
, Legal
);
487 // Handle constrained floating-point operations.
488 setOperationAction(ISD::STRICT_FADD
, MVT::v2f64
, Legal
);
489 setOperationAction(ISD::STRICT_FSUB
, MVT::v2f64
, Legal
);
490 setOperationAction(ISD::STRICT_FMUL
, MVT::v2f64
, Legal
);
491 setOperationAction(ISD::STRICT_FMA
, MVT::v2f64
, Legal
);
492 setOperationAction(ISD::STRICT_FDIV
, MVT::v2f64
, Legal
);
493 setOperationAction(ISD::STRICT_FSQRT
, MVT::v2f64
, Legal
);
494 setOperationAction(ISD::STRICT_FRINT
, MVT::v2f64
, Legal
);
495 setOperationAction(ISD::STRICT_FNEARBYINT
, MVT::v2f64
, Legal
);
496 setOperationAction(ISD::STRICT_FFLOOR
, MVT::v2f64
, Legal
);
497 setOperationAction(ISD::STRICT_FCEIL
, MVT::v2f64
, Legal
);
498 setOperationAction(ISD::STRICT_FTRUNC
, MVT::v2f64
, Legal
);
499 setOperationAction(ISD::STRICT_FROUND
, MVT::v2f64
, Legal
);
502 // The vector enhancements facility 1 has instructions for these.
503 if (Subtarget
.hasVectorEnhancements1()) {
504 setOperationAction(ISD::FADD
, MVT::v4f32
, Legal
);
505 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
506 setOperationAction(ISD::FSUB
, MVT::v4f32
, Legal
);
507 setOperationAction(ISD::FMUL
, MVT::v4f32
, Legal
);
508 setOperationAction(ISD::FMA
, MVT::v4f32
, Legal
);
509 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
510 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
511 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
512 setOperationAction(ISD::FRINT
, MVT::v4f32
, Legal
);
513 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Legal
);
514 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
515 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
516 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
517 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
519 setOperationAction(ISD::FMAXNUM
, MVT::f64
, Legal
);
520 setOperationAction(ISD::FMAXIMUM
, MVT::f64
, Legal
);
521 setOperationAction(ISD::FMINNUM
, MVT::f64
, Legal
);
522 setOperationAction(ISD::FMINIMUM
, MVT::f64
, Legal
);
524 setOperationAction(ISD::FMAXNUM
, MVT::v2f64
, Legal
);
525 setOperationAction(ISD::FMAXIMUM
, MVT::v2f64
, Legal
);
526 setOperationAction(ISD::FMINNUM
, MVT::v2f64
, Legal
);
527 setOperationAction(ISD::FMINIMUM
, MVT::v2f64
, Legal
);
529 setOperationAction(ISD::FMAXNUM
, MVT::f32
, Legal
);
530 setOperationAction(ISD::FMAXIMUM
, MVT::f32
, Legal
);
531 setOperationAction(ISD::FMINNUM
, MVT::f32
, Legal
);
532 setOperationAction(ISD::FMINIMUM
, MVT::f32
, Legal
);
534 setOperationAction(ISD::FMAXNUM
, MVT::v4f32
, Legal
);
535 setOperationAction(ISD::FMAXIMUM
, MVT::v4f32
, Legal
);
536 setOperationAction(ISD::FMINNUM
, MVT::v4f32
, Legal
);
537 setOperationAction(ISD::FMINIMUM
, MVT::v4f32
, Legal
);
539 setOperationAction(ISD::FMAXNUM
, MVT::f128
, Legal
);
540 setOperationAction(ISD::FMAXIMUM
, MVT::f128
, Legal
);
541 setOperationAction(ISD::FMINNUM
, MVT::f128
, Legal
);
542 setOperationAction(ISD::FMINIMUM
, MVT::f128
, Legal
);
544 // Handle constrained floating-point operations.
545 setOperationAction(ISD::STRICT_FADD
, MVT::v4f32
, Legal
);
546 setOperationAction(ISD::STRICT_FSUB
, MVT::v4f32
, Legal
);
547 setOperationAction(ISD::STRICT_FMUL
, MVT::v4f32
, Legal
);
548 setOperationAction(ISD::STRICT_FMA
, MVT::v4f32
, Legal
);
549 setOperationAction(ISD::STRICT_FDIV
, MVT::v4f32
, Legal
);
550 setOperationAction(ISD::STRICT_FSQRT
, MVT::v4f32
, Legal
);
551 setOperationAction(ISD::STRICT_FRINT
, MVT::v4f32
, Legal
);
552 setOperationAction(ISD::STRICT_FNEARBYINT
, MVT::v4f32
, Legal
);
553 setOperationAction(ISD::STRICT_FFLOOR
, MVT::v4f32
, Legal
);
554 setOperationAction(ISD::STRICT_FCEIL
, MVT::v4f32
, Legal
);
555 setOperationAction(ISD::STRICT_FROUND
, MVT::v4f32
, Legal
);
556 setOperationAction(ISD::STRICT_FTRUNC
, MVT::v4f32
, Legal
);
557 for (auto VT
: { MVT::f32
, MVT::f64
, MVT::f128
,
558 MVT::v4f32
, MVT::v2f64
}) {
559 setOperationAction(ISD::STRICT_FMAXNUM
, VT
, Legal
);
560 setOperationAction(ISD::STRICT_FMINNUM
, VT
, Legal
);
564 // We have fused multiply-addition for f32 and f64 but not f128.
565 setOperationAction(ISD::FMA
, MVT::f32
, Legal
);
566 setOperationAction(ISD::FMA
, MVT::f64
, Legal
);
567 if (Subtarget
.hasVectorEnhancements1())
568 setOperationAction(ISD::FMA
, MVT::f128
, Legal
);
570 setOperationAction(ISD::FMA
, MVT::f128
, Expand
);
572 // We don't have a copysign instruction on vector registers.
573 if (Subtarget
.hasVectorEnhancements1())
574 setOperationAction(ISD::FCOPYSIGN
, MVT::f128
, Expand
);
576 // Needed so that we don't try to implement f128 constant loads using
577 // a load-and-extend of a f80 constant (in cases where the constant
578 // would fit in an f80).
579 for (MVT VT
: MVT::fp_valuetypes())
580 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::f80
, Expand
);
582 // We don't have extending load instruction on vector registers.
583 if (Subtarget
.hasVectorEnhancements1()) {
584 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, MVT::f32
, Expand
);
585 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, MVT::f64
, Expand
);
588 // Floating-point truncation and stores need to be done separately.
589 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
590 setTruncStoreAction(MVT::f128
, MVT::f32
, Expand
);
591 setTruncStoreAction(MVT::f128
, MVT::f64
, Expand
);
593 // We have 64-bit FPR<->GPR moves, but need special handling for
595 if (!Subtarget
.hasVector()) {
596 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
597 setOperationAction(ISD::BITCAST
, MVT::f32
, Custom
);
600 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
601 // structure, but VAEND is a no-op.
602 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
603 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
604 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
606 // Codes for which we want to perform some z-specific combinations.
607 setTargetDAGCombine(ISD::ZERO_EXTEND
);
608 setTargetDAGCombine(ISD::SIGN_EXTEND
);
609 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG
);
610 setTargetDAGCombine(ISD::LOAD
);
611 setTargetDAGCombine(ISD::STORE
);
612 setTargetDAGCombine(ISD::VECTOR_SHUFFLE
);
613 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT
);
614 setTargetDAGCombine(ISD::FP_ROUND
);
615 setTargetDAGCombine(ISD::FP_EXTEND
);
616 setTargetDAGCombine(ISD::BSWAP
);
617 setTargetDAGCombine(ISD::SDIV
);
618 setTargetDAGCombine(ISD::UDIV
);
619 setTargetDAGCombine(ISD::SREM
);
620 setTargetDAGCombine(ISD::UREM
);
622 // Handle intrinsics.
623 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
624 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
626 // We want to use MVC in preference to even a single load/store pair.
627 MaxStoresPerMemcpy
= 0;
628 MaxStoresPerMemcpyOptSize
= 0;
630 // The main memset sequence is a byte store followed by an MVC.
631 // Two STC or MV..I stores win over that, but the kind of fused stores
632 // generated by target-independent code don't when the byte value is
633 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
634 // than "STC;MVC". Handle the choice in target-specific code instead.
635 MaxStoresPerMemset
= 0;
636 MaxStoresPerMemsetOptSize
= 0;
639 EVT
SystemZTargetLowering::getSetCCResultType(const DataLayout
&DL
,
640 LLVMContext
&, EVT VT
) const {
643 return VT
.changeVectorElementTypeToInteger();
646 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT
) const {
647 VT
= VT
.getScalarType();
652 switch (VT
.getSimpleVT().SimpleTy
) {
657 return Subtarget
.hasVectorEnhancements1();
665 // Return true if the constant can be generated with a vector instruction,
666 // such as VGM, VGMB or VREPI.
667 bool SystemZVectorConstantInfo::isVectorConstantLegal(
668 const SystemZSubtarget
&Subtarget
) {
669 const SystemZInstrInfo
*TII
=
670 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
671 if (!Subtarget
.hasVector() ||
672 (isFP128
&& !Subtarget
.hasVectorEnhancements1()))
675 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
676 // preferred way of creating all-zero and all-one vectors so give it
677 // priority over other methods below.
680 for (; I
< SystemZ::VectorBytes
; ++I
) {
681 uint64_t Byte
= IntBits
.lshr(I
* 8).trunc(8).getZExtValue();
687 if (I
== SystemZ::VectorBytes
) {
688 Opcode
= SystemZISD::BYTE_MASK
;
689 OpVals
.push_back(Mask
);
690 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(8), 16);
694 if (SplatBitSize
> 64)
697 auto tryValue
= [&](uint64_t Value
) -> bool {
698 // Try VECTOR REPLICATE IMMEDIATE
699 int64_t SignedValue
= SignExtend64(Value
, SplatBitSize
);
700 if (isInt
<16>(SignedValue
)) {
701 OpVals
.push_back(((unsigned) SignedValue
));
702 Opcode
= SystemZISD::REPLICATE
;
703 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize
),
704 SystemZ::VectorBits
/ SplatBitSize
);
707 // Try VECTOR GENERATE MASK
709 if (TII
->isRxSBGMask(Value
, SplatBitSize
, Start
, End
)) {
710 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
711 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
712 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
713 OpVals
.push_back(Start
- (64 - SplatBitSize
));
714 OpVals
.push_back(End
- (64 - SplatBitSize
));
715 Opcode
= SystemZISD::ROTATE_MASK
;
716 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize
),
717 SystemZ::VectorBits
/ SplatBitSize
);
723 // First try assuming that any undefined bits above the highest set bit
724 // and below the lowest set bit are 1s. This increases the likelihood of
725 // being able to use a sign-extended element value in VECTOR REPLICATE
726 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
727 uint64_t SplatBitsZ
= SplatBits
.getZExtValue();
728 uint64_t SplatUndefZ
= SplatUndef
.getZExtValue();
730 (SplatUndefZ
& ((uint64_t(1) << findFirstSet(SplatBitsZ
)) - 1));
732 (SplatUndefZ
& ~((uint64_t(1) << findLastSet(SplatBitsZ
)) - 1));
733 if (tryValue(SplatBitsZ
| Upper
| Lower
))
736 // Now try assuming that any undefined bits between the first and
737 // last defined set bits are set. This increases the chances of
738 // using a non-wraparound mask.
739 uint64_t Middle
= SplatUndefZ
& ~Upper
& ~Lower
;
740 return tryValue(SplatBitsZ
| Middle
);
743 SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm
) {
744 IntBits
= FPImm
.bitcastToAPInt().zextOrSelf(128);
745 isFP128
= (&FPImm
.getSemantics() == &APFloat::IEEEquad());
747 // Find the smallest splat.
748 SplatBits
= FPImm
.bitcastToAPInt();
749 unsigned Width
= SplatBits
.getBitWidth();
751 unsigned HalfSize
= Width
/ 2;
752 APInt HighValue
= SplatBits
.lshr(HalfSize
).trunc(HalfSize
);
753 APInt LowValue
= SplatBits
.trunc(HalfSize
);
755 // If the two halves do not match, stop here.
756 if (HighValue
!= LowValue
|| 8 > HalfSize
)
759 SplatBits
= HighValue
;
763 SplatBitSize
= Width
;
766 SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode
*BVN
) {
767 assert(BVN
->isConstant() && "Expected a constant BUILD_VECTOR");
770 // Get IntBits by finding the 128 bit splat.
771 BVN
->isConstantSplat(IntBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
, 128,
774 // Get SplatBits by finding the 8 bit or greater splat.
775 BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
, 8,
779 bool SystemZTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
780 bool ForCodeSize
) const {
781 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
782 if (Imm
.isZero() || Imm
.isNegZero())
785 return SystemZVectorConstantInfo(Imm
).isVectorConstantLegal(Subtarget
);
788 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
789 // We can use CGFI or CLGFI.
790 return isInt
<32>(Imm
) || isUInt
<32>(Imm
);
793 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
794 // We can use ALGFI or SLGFI.
795 return isUInt
<32>(Imm
) || isUInt
<32>(-Imm
);
798 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
799 EVT VT
, unsigned, unsigned, MachineMemOperand::Flags
, bool *Fast
) const {
800 // Unaligned accesses should never be slower than the expanded version.
801 // We check specifically for aligned accesses in the few cases where
802 // they are required.
808 // Information about the addressing mode for a memory access.
809 struct AddressingMode
{
810 // True if a long displacement is supported.
811 bool LongDisplacement
;
813 // True if use of index register is supported.
816 AddressingMode(bool LongDispl
, bool IdxReg
) :
817 LongDisplacement(LongDispl
), IndexReg(IdxReg
) {}
820 // Return the desired addressing mode for a Load which has only one use (in
821 // the same block) which is a Store.
822 static AddressingMode
getLoadStoreAddrMode(bool HasVector
,
824 // With vector support a Load->Store combination may be combined to either
825 // an MVC or vector operations and it seems to work best to allow the
826 // vector addressing mode.
828 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
830 // Otherwise only the MVC case is special.
831 bool MVC
= Ty
->isIntegerTy(8);
832 return AddressingMode(!MVC
/*LongDispl*/, !MVC
/*IdxReg*/);
835 // Return the addressing mode which seems most desirable given an LLVM
836 // Instruction pointer.
837 static AddressingMode
838 supportedAddressingMode(Instruction
*I
, bool HasVector
) {
839 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
840 switch (II
->getIntrinsicID()) {
842 case Intrinsic::memset
:
843 case Intrinsic::memmove
:
844 case Intrinsic::memcpy
:
845 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
849 if (isa
<LoadInst
>(I
) && I
->hasOneUse()) {
850 auto *SingleUser
= cast
<Instruction
>(*I
->user_begin());
851 if (SingleUser
->getParent() == I
->getParent()) {
852 if (isa
<ICmpInst
>(SingleUser
)) {
853 if (auto *C
= dyn_cast
<ConstantInt
>(SingleUser
->getOperand(1)))
854 if (C
->getBitWidth() <= 64 &&
855 (isInt
<16>(C
->getSExtValue()) || isUInt
<16>(C
->getZExtValue())))
856 // Comparison of memory with 16 bit signed / unsigned immediate
857 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
858 } else if (isa
<StoreInst
>(SingleUser
))
860 return getLoadStoreAddrMode(HasVector
, I
->getType());
862 } else if (auto *StoreI
= dyn_cast
<StoreInst
>(I
)) {
863 if (auto *LoadI
= dyn_cast
<LoadInst
>(StoreI
->getValueOperand()))
864 if (LoadI
->hasOneUse() && LoadI
->getParent() == I
->getParent())
866 return getLoadStoreAddrMode(HasVector
, LoadI
->getType());
869 if (HasVector
&& (isa
<LoadInst
>(I
) || isa
<StoreInst
>(I
))) {
871 // * Use LDE instead of LE/LEY for z13 to avoid partial register
872 // dependencies (LDE only supports small offsets).
873 // * Utilize the vector registers to hold floating point
874 // values (vector load / store instructions only support small
877 Type
*MemAccessTy
= (isa
<LoadInst
>(I
) ? I
->getType() :
878 I
->getOperand(0)->getType());
879 bool IsFPAccess
= MemAccessTy
->isFloatingPointTy();
880 bool IsVectorAccess
= MemAccessTy
->isVectorTy();
882 // A store of an extracted vector element will be combined into a VSTE type
884 if (!IsVectorAccess
&& isa
<StoreInst
>(I
)) {
885 Value
*DataOp
= I
->getOperand(0);
886 if (isa
<ExtractElementInst
>(DataOp
))
887 IsVectorAccess
= true;
890 // A load which gets inserted into a vector element will be combined into a
891 // VLE type instruction.
892 if (!IsVectorAccess
&& isa
<LoadInst
>(I
) && I
->hasOneUse()) {
893 User
*LoadUser
= *I
->user_begin();
894 if (isa
<InsertElementInst
>(LoadUser
))
895 IsVectorAccess
= true;
898 if (IsFPAccess
|| IsVectorAccess
)
899 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
902 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
905 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
906 const AddrMode
&AM
, Type
*Ty
, unsigned AS
, Instruction
*I
) const {
907 // Punt on globals for now, although they can be used in limited
908 // RELATIVE LONG cases.
912 // Require a 20-bit signed offset.
913 if (!isInt
<20>(AM
.BaseOffs
))
916 AddressingMode
SupportedAM(true, true);
918 SupportedAM
= supportedAddressingMode(I
, Subtarget
.hasVector());
920 if (!SupportedAM
.LongDisplacement
&& !isUInt
<12>(AM
.BaseOffs
))
923 if (!SupportedAM
.IndexReg
)
924 // No indexing allowed.
925 return AM
.Scale
== 0;
927 // Indexing is OK but no scale factor can be applied.
928 return AM
.Scale
== 0 || AM
.Scale
== 1;
931 bool SystemZTargetLowering::isTruncateFree(Type
*FromType
, Type
*ToType
) const {
932 if (!FromType
->isIntegerTy() || !ToType
->isIntegerTy())
934 unsigned FromBits
= FromType
->getPrimitiveSizeInBits();
935 unsigned ToBits
= ToType
->getPrimitiveSizeInBits();
936 return FromBits
> ToBits
;
939 bool SystemZTargetLowering::isTruncateFree(EVT FromVT
, EVT ToVT
) const {
940 if (!FromVT
.isInteger() || !ToVT
.isInteger())
942 unsigned FromBits
= FromVT
.getSizeInBits();
943 unsigned ToBits
= ToVT
.getSizeInBits();
944 return FromBits
> ToBits
;
947 //===----------------------------------------------------------------------===//
948 // Inline asm support
949 //===----------------------------------------------------------------------===//
951 TargetLowering::ConstraintType
952 SystemZTargetLowering::getConstraintType(StringRef Constraint
) const {
953 if (Constraint
.size() == 1) {
954 switch (Constraint
[0]) {
955 case 'a': // Address register
956 case 'd': // Data register (equivalent to 'r')
957 case 'f': // Floating-point register
958 case 'h': // High-part register
959 case 'r': // General-purpose register
960 case 'v': // Vector register
961 return C_RegisterClass
;
963 case 'Q': // Memory with base and unsigned 12-bit displacement
964 case 'R': // Likewise, plus an index
965 case 'S': // Memory with base and signed 20-bit displacement
966 case 'T': // Likewise, plus an index
967 case 'm': // Equivalent to 'T'.
970 case 'I': // Unsigned 8-bit constant
971 case 'J': // Unsigned 12-bit constant
972 case 'K': // Signed 16-bit constant
973 case 'L': // Signed 20-bit displacement (on all targets we support)
974 case 'M': // 0x7fffffff
981 return TargetLowering::getConstraintType(Constraint
);
984 TargetLowering::ConstraintWeight
SystemZTargetLowering::
985 getSingleConstraintMatchWeight(AsmOperandInfo
&info
,
986 const char *constraint
) const {
987 ConstraintWeight weight
= CW_Invalid
;
988 Value
*CallOperandVal
= info
.CallOperandVal
;
989 // If we don't have a value, we can't do a match,
990 // but allow it at the lowest weight.
993 Type
*type
= CallOperandVal
->getType();
994 // Look at the constraint type.
995 switch (*constraint
) {
997 weight
= TargetLowering::getSingleConstraintMatchWeight(info
, constraint
);
1000 case 'a': // Address register
1001 case 'd': // Data register (equivalent to 'r')
1002 case 'h': // High-part register
1003 case 'r': // General-purpose register
1004 if (CallOperandVal
->getType()->isIntegerTy())
1005 weight
= CW_Register
;
1008 case 'f': // Floating-point register
1009 if (type
->isFloatingPointTy())
1010 weight
= CW_Register
;
1013 case 'v': // Vector register
1014 if ((type
->isVectorTy() || type
->isFloatingPointTy()) &&
1015 Subtarget
.hasVector())
1016 weight
= CW_Register
;
1019 case 'I': // Unsigned 8-bit constant
1020 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1021 if (isUInt
<8>(C
->getZExtValue()))
1022 weight
= CW_Constant
;
1025 case 'J': // Unsigned 12-bit constant
1026 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1027 if (isUInt
<12>(C
->getZExtValue()))
1028 weight
= CW_Constant
;
1031 case 'K': // Signed 16-bit constant
1032 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1033 if (isInt
<16>(C
->getSExtValue()))
1034 weight
= CW_Constant
;
1037 case 'L': // Signed 20-bit displacement (on all targets we support)
1038 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1039 if (isInt
<20>(C
->getSExtValue()))
1040 weight
= CW_Constant
;
1043 case 'M': // 0x7fffffff
1044 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1045 if (C
->getZExtValue() == 0x7fffffff)
1046 weight
= CW_Constant
;
1052 // Parse a "{tNNN}" register constraint for which the register type "t"
1053 // has already been verified. MC is the class associated with "t" and
1054 // Map maps 0-based register numbers to LLVM register numbers.
1055 static std::pair
<unsigned, const TargetRegisterClass
*>
1056 parseRegisterNumber(StringRef Constraint
, const TargetRegisterClass
*RC
,
1057 const unsigned *Map
, unsigned Size
) {
1058 assert(*(Constraint
.end()-1) == '}' && "Missing '}'");
1059 if (isdigit(Constraint
[2])) {
1062 Constraint
.slice(2, Constraint
.size() - 1).getAsInteger(10, Index
);
1063 if (!Failed
&& Index
< Size
&& Map
[Index
])
1064 return std::make_pair(Map
[Index
], RC
);
1066 return std::make_pair(0U, nullptr);
1069 std::pair
<unsigned, const TargetRegisterClass
*>
1070 SystemZTargetLowering::getRegForInlineAsmConstraint(
1071 const TargetRegisterInfo
*TRI
, StringRef Constraint
, MVT VT
) const {
1072 if (Constraint
.size() == 1) {
1073 // GCC Constraint Letters
1074 switch (Constraint
[0]) {
1076 case 'd': // Data register (equivalent to 'r')
1077 case 'r': // General-purpose register
1079 return std::make_pair(0U, &SystemZ::GR64BitRegClass
);
1080 else if (VT
== MVT::i128
)
1081 return std::make_pair(0U, &SystemZ::GR128BitRegClass
);
1082 return std::make_pair(0U, &SystemZ::GR32BitRegClass
);
1084 case 'a': // Address register
1086 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass
);
1087 else if (VT
== MVT::i128
)
1088 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass
);
1089 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass
);
1091 case 'h': // High-part register (an LLVM extension)
1092 return std::make_pair(0U, &SystemZ::GRH32BitRegClass
);
1094 case 'f': // Floating-point register
1096 return std::make_pair(0U, &SystemZ::FP64BitRegClass
);
1097 else if (VT
== MVT::f128
)
1098 return std::make_pair(0U, &SystemZ::FP128BitRegClass
);
1099 return std::make_pair(0U, &SystemZ::FP32BitRegClass
);
1101 case 'v': // Vector register
1102 if (Subtarget
.hasVector()) {
1104 return std::make_pair(0U, &SystemZ::VR32BitRegClass
);
1106 return std::make_pair(0U, &SystemZ::VR64BitRegClass
);
1107 return std::make_pair(0U, &SystemZ::VR128BitRegClass
);
1112 if (Constraint
.size() > 0 && Constraint
[0] == '{') {
1113 // We need to override the default register parsing for GPRs and FPRs
1114 // because the interpretation depends on VT. The internal names of
1115 // the registers are also different from the external names
1116 // (F0D and F0S instead of F0, etc.).
1117 if (Constraint
[1] == 'r') {
1119 return parseRegisterNumber(Constraint
, &SystemZ::GR32BitRegClass
,
1120 SystemZMC::GR32Regs
, 16);
1121 if (VT
== MVT::i128
)
1122 return parseRegisterNumber(Constraint
, &SystemZ::GR128BitRegClass
,
1123 SystemZMC::GR128Regs
, 16);
1124 return parseRegisterNumber(Constraint
, &SystemZ::GR64BitRegClass
,
1125 SystemZMC::GR64Regs
, 16);
1127 if (Constraint
[1] == 'f') {
1129 return parseRegisterNumber(Constraint
, &SystemZ::FP32BitRegClass
,
1130 SystemZMC::FP32Regs
, 16);
1131 if (VT
== MVT::f128
)
1132 return parseRegisterNumber(Constraint
, &SystemZ::FP128BitRegClass
,
1133 SystemZMC::FP128Regs
, 16);
1134 return parseRegisterNumber(Constraint
, &SystemZ::FP64BitRegClass
,
1135 SystemZMC::FP64Regs
, 16);
1137 if (Constraint
[1] == 'v') {
1139 return parseRegisterNumber(Constraint
, &SystemZ::VR32BitRegClass
,
1140 SystemZMC::VR32Regs
, 32);
1142 return parseRegisterNumber(Constraint
, &SystemZ::VR64BitRegClass
,
1143 SystemZMC::VR64Regs
, 32);
1144 return parseRegisterNumber(Constraint
, &SystemZ::VR128BitRegClass
,
1145 SystemZMC::VR128Regs
, 32);
1148 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
1151 void SystemZTargetLowering::
1152 LowerAsmOperandForConstraint(SDValue Op
, std::string
&Constraint
,
1153 std::vector
<SDValue
> &Ops
,
1154 SelectionDAG
&DAG
) const {
1155 // Only support length 1 constraints for now.
1156 if (Constraint
.length() == 1) {
1157 switch (Constraint
[0]) {
1158 case 'I': // Unsigned 8-bit constant
1159 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1160 if (isUInt
<8>(C
->getZExtValue()))
1161 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
1162 Op
.getValueType()));
1165 case 'J': // Unsigned 12-bit constant
1166 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1167 if (isUInt
<12>(C
->getZExtValue()))
1168 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
1169 Op
.getValueType()));
1172 case 'K': // Signed 16-bit constant
1173 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1174 if (isInt
<16>(C
->getSExtValue()))
1175 Ops
.push_back(DAG
.getTargetConstant(C
->getSExtValue(), SDLoc(Op
),
1176 Op
.getValueType()));
1179 case 'L': // Signed 20-bit displacement (on all targets we support)
1180 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1181 if (isInt
<20>(C
->getSExtValue()))
1182 Ops
.push_back(DAG
.getTargetConstant(C
->getSExtValue(), SDLoc(Op
),
1183 Op
.getValueType()));
1186 case 'M': // 0x7fffffff
1187 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1188 if (C
->getZExtValue() == 0x7fffffff)
1189 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
1190 Op
.getValueType()));
1194 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
1197 //===----------------------------------------------------------------------===//
1198 // Calling conventions
1199 //===----------------------------------------------------------------------===//
1201 #include "SystemZGenCallingConv.inc"
1203 const MCPhysReg
*SystemZTargetLowering::getScratchRegisters(
1204 CallingConv::ID
) const {
1205 static const MCPhysReg ScratchRegs
[] = { SystemZ::R0D
, SystemZ::R1D
,
1210 bool SystemZTargetLowering::allowTruncateForTailCall(Type
*FromType
,
1211 Type
*ToType
) const {
1212 return isTruncateFree(FromType
, ToType
);
1215 bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
1216 return CI
->isTailCall();
1219 // We do not yet support 128-bit single-element vector types. If the user
1220 // attempts to use such types as function argument or return type, prefer
1221 // to error out instead of emitting code violating the ABI.
1222 static void VerifyVectorType(MVT VT
, EVT ArgVT
) {
1223 if (ArgVT
.isVector() && !VT
.isVector())
1224 report_fatal_error("Unsupported vector argument or return type");
1227 static void VerifyVectorTypes(const SmallVectorImpl
<ISD::InputArg
> &Ins
) {
1228 for (unsigned i
= 0; i
< Ins
.size(); ++i
)
1229 VerifyVectorType(Ins
[i
].VT
, Ins
[i
].ArgVT
);
1232 static void VerifyVectorTypes(const SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
1233 for (unsigned i
= 0; i
< Outs
.size(); ++i
)
1234 VerifyVectorType(Outs
[i
].VT
, Outs
[i
].ArgVT
);
1237 // Value is a value that has been passed to us in the location described by VA
1238 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1239 // any loads onto Chain.
1240 static SDValue
convertLocVTToValVT(SelectionDAG
&DAG
, const SDLoc
&DL
,
1241 CCValAssign
&VA
, SDValue Chain
,
1243 // If the argument has been promoted from a smaller type, insert an
1244 // assertion to capture this.
1245 if (VA
.getLocInfo() == CCValAssign::SExt
)
1246 Value
= DAG
.getNode(ISD::AssertSext
, DL
, VA
.getLocVT(), Value
,
1247 DAG
.getValueType(VA
.getValVT()));
1248 else if (VA
.getLocInfo() == CCValAssign::ZExt
)
1249 Value
= DAG
.getNode(ISD::AssertZext
, DL
, VA
.getLocVT(), Value
,
1250 DAG
.getValueType(VA
.getValVT()));
1252 if (VA
.isExtInLoc())
1253 Value
= DAG
.getNode(ISD::TRUNCATE
, DL
, VA
.getValVT(), Value
);
1254 else if (VA
.getLocInfo() == CCValAssign::BCvt
) {
1255 // If this is a short vector argument loaded from the stack,
1256 // extend from i64 to full vector size and then bitcast.
1257 assert(VA
.getLocVT() == MVT::i64
);
1258 assert(VA
.getValVT().isVector());
1259 Value
= DAG
.getBuildVector(MVT::v2i64
, DL
, {Value
, DAG
.getUNDEF(MVT::i64
)});
1260 Value
= DAG
.getNode(ISD::BITCAST
, DL
, VA
.getValVT(), Value
);
1262 assert(VA
.getLocInfo() == CCValAssign::Full
&& "Unsupported getLocInfo");
1266 // Value is a value of type VA.getValVT() that we need to copy into
1267 // the location described by VA. Return a copy of Value converted to
1268 // VA.getValVT(). The caller is responsible for handling indirect values.
1269 static SDValue
convertValVTToLocVT(SelectionDAG
&DAG
, const SDLoc
&DL
,
1270 CCValAssign
&VA
, SDValue Value
) {
1271 switch (VA
.getLocInfo()) {
1272 case CCValAssign::SExt
:
1273 return DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VA
.getLocVT(), Value
);
1274 case CCValAssign::ZExt
:
1275 return DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VA
.getLocVT(), Value
);
1276 case CCValAssign::AExt
:
1277 return DAG
.getNode(ISD::ANY_EXTEND
, DL
, VA
.getLocVT(), Value
);
1278 case CCValAssign::BCvt
:
1279 // If this is a short vector argument to be stored to the stack,
1280 // bitcast to v2i64 and then extract first element.
1281 assert(VA
.getLocVT() == MVT::i64
);
1282 assert(VA
.getValVT().isVector());
1283 Value
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Value
);
1284 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VA
.getLocVT(), Value
,
1285 DAG
.getConstant(0, DL
, MVT::i32
));
1286 case CCValAssign::Full
:
1289 llvm_unreachable("Unhandled getLocInfo()");
1293 SDValue
SystemZTargetLowering::LowerFormalArguments(
1294 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
1295 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
1296 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
1297 MachineFunction
&MF
= DAG
.getMachineFunction();
1298 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1299 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1300 SystemZMachineFunctionInfo
*FuncInfo
=
1301 MF
.getInfo
<SystemZMachineFunctionInfo
>();
1303 static_cast<const SystemZFrameLowering
*>(Subtarget
.getFrameLowering());
1304 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1306 // Detect unsupported vector argument types.
1307 if (Subtarget
.hasVector())
1308 VerifyVectorTypes(Ins
);
1310 // Assign locations to all of the incoming arguments.
1311 SmallVector
<CCValAssign
, 16> ArgLocs
;
1312 SystemZCCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1313 CCInfo
.AnalyzeFormalArguments(Ins
, CC_SystemZ
);
1315 unsigned NumFixedGPRs
= 0;
1316 unsigned NumFixedFPRs
= 0;
1317 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1319 CCValAssign
&VA
= ArgLocs
[I
];
1320 EVT LocVT
= VA
.getLocVT();
1321 if (VA
.isRegLoc()) {
1322 // Arguments passed in registers
1323 const TargetRegisterClass
*RC
;
1324 switch (LocVT
.getSimpleVT().SimpleTy
) {
1326 // Integers smaller than i64 should be promoted to i64.
1327 llvm_unreachable("Unexpected argument type");
1330 RC
= &SystemZ::GR32BitRegClass
;
1334 RC
= &SystemZ::GR64BitRegClass
;
1338 RC
= &SystemZ::FP32BitRegClass
;
1342 RC
= &SystemZ::FP64BitRegClass
;
1350 RC
= &SystemZ::VR128BitRegClass
;
1354 Register VReg
= MRI
.createVirtualRegister(RC
);
1355 MRI
.addLiveIn(VA
.getLocReg(), VReg
);
1356 ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, LocVT
);
1358 assert(VA
.isMemLoc() && "Argument not register or memory");
1360 // Create the frame index object for this incoming parameter.
1361 int FI
= MFI
.CreateFixedObject(LocVT
.getSizeInBits() / 8,
1362 VA
.getLocMemOffset(), true);
1364 // Create the SelectionDAG nodes corresponding to a load
1365 // from this parameter. Unpromoted ints and floats are
1366 // passed as right-justified 8-byte values.
1367 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
1368 if (VA
.getLocVT() == MVT::i32
|| VA
.getLocVT() == MVT::f32
)
1369 FIN
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, FIN
,
1370 DAG
.getIntPtrConstant(4, DL
));
1371 ArgValue
= DAG
.getLoad(LocVT
, DL
, Chain
, FIN
,
1372 MachinePointerInfo::getFixedStack(MF
, FI
));
1375 // Convert the value of the argument register into the value that's
1377 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
1378 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), DL
, Chain
, ArgValue
,
1379 MachinePointerInfo()));
1380 // If the original argument was split (e.g. i128), we need
1381 // to load all parts of it here (using the same address).
1382 unsigned ArgIndex
= Ins
[I
].OrigArgIndex
;
1383 assert (Ins
[I
].PartOffset
== 0);
1384 while (I
+ 1 != E
&& Ins
[I
+ 1].OrigArgIndex
== ArgIndex
) {
1385 CCValAssign
&PartVA
= ArgLocs
[I
+ 1];
1386 unsigned PartOffset
= Ins
[I
+ 1].PartOffset
;
1387 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, ArgValue
,
1388 DAG
.getIntPtrConstant(PartOffset
, DL
));
1389 InVals
.push_back(DAG
.getLoad(PartVA
.getValVT(), DL
, Chain
, Address
,
1390 MachinePointerInfo()));
1394 InVals
.push_back(convertLocVTToValVT(DAG
, DL
, VA
, Chain
, ArgValue
));
1398 // Save the number of non-varargs registers for later use by va_start, etc.
1399 FuncInfo
->setVarArgsFirstGPR(NumFixedGPRs
);
1400 FuncInfo
->setVarArgsFirstFPR(NumFixedFPRs
);
1402 // Likewise the address (in the form of a frame index) of where the
1403 // first stack vararg would be. The 1-byte size here is arbitrary.
1404 int64_t StackSize
= CCInfo
.getNextStackOffset();
1405 FuncInfo
->setVarArgsFrameIndex(MFI
.CreateFixedObject(1, StackSize
, true));
1407 // ...and a similar frame index for the caller-allocated save area
1408 // that will be used to store the incoming registers.
1409 int64_t RegSaveOffset
= TFL
->getOffsetOfLocalArea();
1410 unsigned RegSaveIndex
= MFI
.CreateFixedObject(1, RegSaveOffset
, true);
1411 FuncInfo
->setRegSaveFrameIndex(RegSaveIndex
);
1413 // Store the FPR varargs in the reserved frame slots. (We store the
1414 // GPRs as part of the prologue.)
1415 if (NumFixedFPRs
< SystemZ::NumArgFPRs
) {
1416 SDValue MemOps
[SystemZ::NumArgFPRs
];
1417 for (unsigned I
= NumFixedFPRs
; I
< SystemZ::NumArgFPRs
; ++I
) {
1418 unsigned Offset
= TFL
->getRegSpillOffset(SystemZ::ArgFPRs
[I
]);
1419 int FI
= MFI
.CreateFixedObject(8, RegSaveOffset
+ Offset
, true);
1420 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
1421 unsigned VReg
= MF
.addLiveIn(SystemZ::ArgFPRs
[I
],
1422 &SystemZ::FP64BitRegClass
);
1423 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, MVT::f64
);
1424 MemOps
[I
] = DAG
.getStore(ArgValue
.getValue(1), DL
, ArgValue
, FIN
,
1425 MachinePointerInfo::getFixedStack(MF
, FI
));
1427 // Join the stores, which are independent of one another.
1428 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
,
1429 makeArrayRef(&MemOps
[NumFixedFPRs
],
1430 SystemZ::NumArgFPRs
-NumFixedFPRs
));
1437 static bool canUseSiblingCall(const CCState
&ArgCCInfo
,
1438 SmallVectorImpl
<CCValAssign
> &ArgLocs
,
1439 SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
1440 // Punt if there are any indirect or stack arguments, or if the call
1441 // needs the callee-saved argument register R6, or if the call uses
1442 // the callee-saved register arguments SwiftSelf and SwiftError.
1443 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1444 CCValAssign
&VA
= ArgLocs
[I
];
1445 if (VA
.getLocInfo() == CCValAssign::Indirect
)
1449 Register Reg
= VA
.getLocReg();
1450 if (Reg
== SystemZ::R6H
|| Reg
== SystemZ::R6L
|| Reg
== SystemZ::R6D
)
1452 if (Outs
[I
].Flags
.isSwiftSelf() || Outs
[I
].Flags
.isSwiftError())
1459 SystemZTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
1460 SmallVectorImpl
<SDValue
> &InVals
) const {
1461 SelectionDAG
&DAG
= CLI
.DAG
;
1463 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
1464 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
1465 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
1466 SDValue Chain
= CLI
.Chain
;
1467 SDValue Callee
= CLI
.Callee
;
1468 bool &IsTailCall
= CLI
.IsTailCall
;
1469 CallingConv::ID CallConv
= CLI
.CallConv
;
1470 bool IsVarArg
= CLI
.IsVarArg
;
1471 MachineFunction
&MF
= DAG
.getMachineFunction();
1472 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
1474 // Detect unsupported vector argument and return types.
1475 if (Subtarget
.hasVector()) {
1476 VerifyVectorTypes(Outs
);
1477 VerifyVectorTypes(Ins
);
1480 // Analyze the operands of the call, assigning locations to each operand.
1481 SmallVector
<CCValAssign
, 16> ArgLocs
;
1482 SystemZCCState
ArgCCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1483 ArgCCInfo
.AnalyzeCallOperands(Outs
, CC_SystemZ
);
1485 // We don't support GuaranteedTailCallOpt, only automatically-detected
1487 if (IsTailCall
&& !canUseSiblingCall(ArgCCInfo
, ArgLocs
, Outs
))
1490 // Get a count of how many bytes are to be pushed on the stack.
1491 unsigned NumBytes
= ArgCCInfo
.getNextStackOffset();
1493 // Mark the start of the call.
1495 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, DL
);
1497 // Copy argument values to their designated locations.
1498 SmallVector
<std::pair
<unsigned, SDValue
>, 9> RegsToPass
;
1499 SmallVector
<SDValue
, 8> MemOpChains
;
1501 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1502 CCValAssign
&VA
= ArgLocs
[I
];
1503 SDValue ArgValue
= OutVals
[I
];
1505 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
1506 // Store the argument in a stack slot and pass its address.
1507 SDValue SpillSlot
= DAG
.CreateStackTemporary(Outs
[I
].ArgVT
);
1508 int FI
= cast
<FrameIndexSDNode
>(SpillSlot
)->getIndex();
1509 MemOpChains
.push_back(
1510 DAG
.getStore(Chain
, DL
, ArgValue
, SpillSlot
,
1511 MachinePointerInfo::getFixedStack(MF
, FI
)));
1512 // If the original argument was split (e.g. i128), we need
1513 // to store all parts of it here (and pass just one address).
1514 unsigned ArgIndex
= Outs
[I
].OrigArgIndex
;
1515 assert (Outs
[I
].PartOffset
== 0);
1516 while (I
+ 1 != E
&& Outs
[I
+ 1].OrigArgIndex
== ArgIndex
) {
1517 SDValue PartValue
= OutVals
[I
+ 1];
1518 unsigned PartOffset
= Outs
[I
+ 1].PartOffset
;
1519 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, SpillSlot
,
1520 DAG
.getIntPtrConstant(PartOffset
, DL
));
1521 MemOpChains
.push_back(
1522 DAG
.getStore(Chain
, DL
, PartValue
, Address
,
1523 MachinePointerInfo::getFixedStack(MF
, FI
)));
1526 ArgValue
= SpillSlot
;
1528 ArgValue
= convertValVTToLocVT(DAG
, DL
, VA
, ArgValue
);
1531 // Queue up the argument copies and emit them at the end.
1532 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), ArgValue
));
1534 assert(VA
.isMemLoc() && "Argument not register or memory");
1536 // Work out the address of the stack slot. Unpromoted ints and
1537 // floats are passed as right-justified 8-byte values.
1538 if (!StackPtr
.getNode())
1539 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R15D
, PtrVT
);
1540 unsigned Offset
= SystemZMC::CallFrameSize
+ VA
.getLocMemOffset();
1541 if (VA
.getLocVT() == MVT::i32
|| VA
.getLocVT() == MVT::f32
)
1543 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
1544 DAG
.getIntPtrConstant(Offset
, DL
));
1547 MemOpChains
.push_back(
1548 DAG
.getStore(Chain
, DL
, ArgValue
, Address
, MachinePointerInfo()));
1552 // Join the stores, which are independent of one another.
1553 if (!MemOpChains
.empty())
1554 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
1556 // Accept direct calls by converting symbolic call addresses to the
1557 // associated Target* opcodes. Force %r1 to be used for indirect
1560 if (auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1561 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), DL
, PtrVT
);
1562 Callee
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Callee
);
1563 } else if (auto *E
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
1564 Callee
= DAG
.getTargetExternalSymbol(E
->getSymbol(), PtrVT
);
1565 Callee
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Callee
);
1566 } else if (IsTailCall
) {
1567 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R1D
, Callee
, Glue
);
1568 Glue
= Chain
.getValue(1);
1569 Callee
= DAG
.getRegister(SystemZ::R1D
, Callee
.getValueType());
1572 // Build a sequence of copy-to-reg nodes, chained and glued together.
1573 for (unsigned I
= 0, E
= RegsToPass
.size(); I
!= E
; ++I
) {
1574 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegsToPass
[I
].first
,
1575 RegsToPass
[I
].second
, Glue
);
1576 Glue
= Chain
.getValue(1);
1579 // The first call operand is the chain and the second is the target address.
1580 SmallVector
<SDValue
, 8> Ops
;
1581 Ops
.push_back(Chain
);
1582 Ops
.push_back(Callee
);
1584 // Add argument registers to the end of the list so that they are
1585 // known live into the call.
1586 for (unsigned I
= 0, E
= RegsToPass
.size(); I
!= E
; ++I
)
1587 Ops
.push_back(DAG
.getRegister(RegsToPass
[I
].first
,
1588 RegsToPass
[I
].second
.getValueType()));
1590 // Add a register mask operand representing the call-preserved registers.
1591 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
1592 const uint32_t *Mask
= TRI
->getCallPreservedMask(MF
, CallConv
);
1593 assert(Mask
&& "Missing call preserved mask for calling convention");
1594 Ops
.push_back(DAG
.getRegisterMask(Mask
));
1596 // Glue the call to the argument copies, if any.
1598 Ops
.push_back(Glue
);
1601 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1603 return DAG
.getNode(SystemZISD::SIBCALL
, DL
, NodeTys
, Ops
);
1604 Chain
= DAG
.getNode(SystemZISD::CALL
, DL
, NodeTys
, Ops
);
1605 Glue
= Chain
.getValue(1);
1607 // Mark the end of the call, which is glued to the call itself.
1608 Chain
= DAG
.getCALLSEQ_END(Chain
,
1609 DAG
.getConstant(NumBytes
, DL
, PtrVT
, true),
1610 DAG
.getConstant(0, DL
, PtrVT
, true),
1612 Glue
= Chain
.getValue(1);
1614 // Assign locations to each value returned by this call.
1615 SmallVector
<CCValAssign
, 16> RetLocs
;
1616 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RetLocs
, *DAG
.getContext());
1617 RetCCInfo
.AnalyzeCallResult(Ins
, RetCC_SystemZ
);
1619 // Copy all of the result registers out of their specified physreg.
1620 for (unsigned I
= 0, E
= RetLocs
.size(); I
!= E
; ++I
) {
1621 CCValAssign
&VA
= RetLocs
[I
];
1623 // Copy the value out, gluing the copy to the end of the call sequence.
1624 SDValue RetValue
= DAG
.getCopyFromReg(Chain
, DL
, VA
.getLocReg(),
1625 VA
.getLocVT(), Glue
);
1626 Chain
= RetValue
.getValue(1);
1627 Glue
= RetValue
.getValue(2);
1629 // Convert the value of the return register into the value that's
1631 InVals
.push_back(convertLocVTToValVT(DAG
, DL
, VA
, Chain
, RetValue
));
1637 bool SystemZTargetLowering::
1638 CanLowerReturn(CallingConv::ID CallConv
,
1639 MachineFunction
&MF
, bool isVarArg
,
1640 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1641 LLVMContext
&Context
) const {
1642 // Detect unsupported vector return types.
1643 if (Subtarget
.hasVector())
1644 VerifyVectorTypes(Outs
);
1646 // Special case that we cannot easily detect in RetCC_SystemZ since
1647 // i128 is not a legal type.
1648 for (auto &Out
: Outs
)
1649 if (Out
.ArgVT
== MVT::i128
)
1652 SmallVector
<CCValAssign
, 16> RetLocs
;
1653 CCState
RetCCInfo(CallConv
, isVarArg
, MF
, RetLocs
, Context
);
1654 return RetCCInfo
.CheckReturn(Outs
, RetCC_SystemZ
);
1658 SystemZTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
1660 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1661 const SmallVectorImpl
<SDValue
> &OutVals
,
1662 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
1663 MachineFunction
&MF
= DAG
.getMachineFunction();
1665 // Detect unsupported vector return types.
1666 if (Subtarget
.hasVector())
1667 VerifyVectorTypes(Outs
);
1669 // Assign locations to each returned value.
1670 SmallVector
<CCValAssign
, 16> RetLocs
;
1671 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RetLocs
, *DAG
.getContext());
1672 RetCCInfo
.AnalyzeReturn(Outs
, RetCC_SystemZ
);
1674 // Quick exit for void returns
1675 if (RetLocs
.empty())
1676 return DAG
.getNode(SystemZISD::RET_FLAG
, DL
, MVT::Other
, Chain
);
1678 // Copy the result values into the output registers.
1680 SmallVector
<SDValue
, 4> RetOps
;
1681 RetOps
.push_back(Chain
);
1682 for (unsigned I
= 0, E
= RetLocs
.size(); I
!= E
; ++I
) {
1683 CCValAssign
&VA
= RetLocs
[I
];
1684 SDValue RetValue
= OutVals
[I
];
1686 // Make the return register live on exit.
1687 assert(VA
.isRegLoc() && "Can only return in registers!");
1689 // Promote the value as required.
1690 RetValue
= convertValVTToLocVT(DAG
, DL
, VA
, RetValue
);
1692 // Chain and glue the copies together.
1693 Register Reg
= VA
.getLocReg();
1694 Chain
= DAG
.getCopyToReg(Chain
, DL
, Reg
, RetValue
, Glue
);
1695 Glue
= Chain
.getValue(1);
1696 RetOps
.push_back(DAG
.getRegister(Reg
, VA
.getLocVT()));
1699 // Update chain and glue.
1702 RetOps
.push_back(Glue
);
1704 return DAG
.getNode(SystemZISD::RET_FLAG
, DL
, MVT::Other
, RetOps
);
1707 // Return true if Op is an intrinsic node with chain that returns the CC value
1708 // as its only (other) argument. Provide the associated SystemZISD opcode and
1709 // the mask of valid CC values if so.
1710 static bool isIntrinsicWithCCAndChain(SDValue Op
, unsigned &Opcode
,
1711 unsigned &CCValid
) {
1712 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
1714 case Intrinsic::s390_tbegin
:
1715 Opcode
= SystemZISD::TBEGIN
;
1716 CCValid
= SystemZ::CCMASK_TBEGIN
;
1719 case Intrinsic::s390_tbegin_nofloat
:
1720 Opcode
= SystemZISD::TBEGIN_NOFLOAT
;
1721 CCValid
= SystemZ::CCMASK_TBEGIN
;
1724 case Intrinsic::s390_tend
:
1725 Opcode
= SystemZISD::TEND
;
1726 CCValid
= SystemZ::CCMASK_TEND
;
1734 // Return true if Op is an intrinsic node without chain that returns the
1735 // CC value as its final argument. Provide the associated SystemZISD
1736 // opcode and the mask of valid CC values if so.
1737 static bool isIntrinsicWithCC(SDValue Op
, unsigned &Opcode
, unsigned &CCValid
) {
1738 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1740 case Intrinsic::s390_vpkshs
:
1741 case Intrinsic::s390_vpksfs
:
1742 case Intrinsic::s390_vpksgs
:
1743 Opcode
= SystemZISD::PACKS_CC
;
1744 CCValid
= SystemZ::CCMASK_VCMP
;
1747 case Intrinsic::s390_vpklshs
:
1748 case Intrinsic::s390_vpklsfs
:
1749 case Intrinsic::s390_vpklsgs
:
1750 Opcode
= SystemZISD::PACKLS_CC
;
1751 CCValid
= SystemZ::CCMASK_VCMP
;
1754 case Intrinsic::s390_vceqbs
:
1755 case Intrinsic::s390_vceqhs
:
1756 case Intrinsic::s390_vceqfs
:
1757 case Intrinsic::s390_vceqgs
:
1758 Opcode
= SystemZISD::VICMPES
;
1759 CCValid
= SystemZ::CCMASK_VCMP
;
1762 case Intrinsic::s390_vchbs
:
1763 case Intrinsic::s390_vchhs
:
1764 case Intrinsic::s390_vchfs
:
1765 case Intrinsic::s390_vchgs
:
1766 Opcode
= SystemZISD::VICMPHS
;
1767 CCValid
= SystemZ::CCMASK_VCMP
;
1770 case Intrinsic::s390_vchlbs
:
1771 case Intrinsic::s390_vchlhs
:
1772 case Intrinsic::s390_vchlfs
:
1773 case Intrinsic::s390_vchlgs
:
1774 Opcode
= SystemZISD::VICMPHLS
;
1775 CCValid
= SystemZ::CCMASK_VCMP
;
1778 case Intrinsic::s390_vtm
:
1779 Opcode
= SystemZISD::VTM
;
1780 CCValid
= SystemZ::CCMASK_VCMP
;
1783 case Intrinsic::s390_vfaebs
:
1784 case Intrinsic::s390_vfaehs
:
1785 case Intrinsic::s390_vfaefs
:
1786 Opcode
= SystemZISD::VFAE_CC
;
1787 CCValid
= SystemZ::CCMASK_ANY
;
1790 case Intrinsic::s390_vfaezbs
:
1791 case Intrinsic::s390_vfaezhs
:
1792 case Intrinsic::s390_vfaezfs
:
1793 Opcode
= SystemZISD::VFAEZ_CC
;
1794 CCValid
= SystemZ::CCMASK_ANY
;
1797 case Intrinsic::s390_vfeebs
:
1798 case Intrinsic::s390_vfeehs
:
1799 case Intrinsic::s390_vfeefs
:
1800 Opcode
= SystemZISD::VFEE_CC
;
1801 CCValid
= SystemZ::CCMASK_ANY
;
1804 case Intrinsic::s390_vfeezbs
:
1805 case Intrinsic::s390_vfeezhs
:
1806 case Intrinsic::s390_vfeezfs
:
1807 Opcode
= SystemZISD::VFEEZ_CC
;
1808 CCValid
= SystemZ::CCMASK_ANY
;
1811 case Intrinsic::s390_vfenebs
:
1812 case Intrinsic::s390_vfenehs
:
1813 case Intrinsic::s390_vfenefs
:
1814 Opcode
= SystemZISD::VFENE_CC
;
1815 CCValid
= SystemZ::CCMASK_ANY
;
1818 case Intrinsic::s390_vfenezbs
:
1819 case Intrinsic::s390_vfenezhs
:
1820 case Intrinsic::s390_vfenezfs
:
1821 Opcode
= SystemZISD::VFENEZ_CC
;
1822 CCValid
= SystemZ::CCMASK_ANY
;
1825 case Intrinsic::s390_vistrbs
:
1826 case Intrinsic::s390_vistrhs
:
1827 case Intrinsic::s390_vistrfs
:
1828 Opcode
= SystemZISD::VISTR_CC
;
1829 CCValid
= SystemZ::CCMASK_0
| SystemZ::CCMASK_3
;
1832 case Intrinsic::s390_vstrcbs
:
1833 case Intrinsic::s390_vstrchs
:
1834 case Intrinsic::s390_vstrcfs
:
1835 Opcode
= SystemZISD::VSTRC_CC
;
1836 CCValid
= SystemZ::CCMASK_ANY
;
1839 case Intrinsic::s390_vstrczbs
:
1840 case Intrinsic::s390_vstrczhs
:
1841 case Intrinsic::s390_vstrczfs
:
1842 Opcode
= SystemZISD::VSTRCZ_CC
;
1843 CCValid
= SystemZ::CCMASK_ANY
;
1846 case Intrinsic::s390_vstrsb
:
1847 case Intrinsic::s390_vstrsh
:
1848 case Intrinsic::s390_vstrsf
:
1849 Opcode
= SystemZISD::VSTRS_CC
;
1850 CCValid
= SystemZ::CCMASK_ANY
;
1853 case Intrinsic::s390_vstrszb
:
1854 case Intrinsic::s390_vstrszh
:
1855 case Intrinsic::s390_vstrszf
:
1856 Opcode
= SystemZISD::VSTRSZ_CC
;
1857 CCValid
= SystemZ::CCMASK_ANY
;
1860 case Intrinsic::s390_vfcedbs
:
1861 case Intrinsic::s390_vfcesbs
:
1862 Opcode
= SystemZISD::VFCMPES
;
1863 CCValid
= SystemZ::CCMASK_VCMP
;
1866 case Intrinsic::s390_vfchdbs
:
1867 case Intrinsic::s390_vfchsbs
:
1868 Opcode
= SystemZISD::VFCMPHS
;
1869 CCValid
= SystemZ::CCMASK_VCMP
;
1872 case Intrinsic::s390_vfchedbs
:
1873 case Intrinsic::s390_vfchesbs
:
1874 Opcode
= SystemZISD::VFCMPHES
;
1875 CCValid
= SystemZ::CCMASK_VCMP
;
1878 case Intrinsic::s390_vftcidb
:
1879 case Intrinsic::s390_vftcisb
:
1880 Opcode
= SystemZISD::VFTCI
;
1881 CCValid
= SystemZ::CCMASK_VCMP
;
1884 case Intrinsic::s390_tdc
:
1885 Opcode
= SystemZISD::TDC
;
1886 CCValid
= SystemZ::CCMASK_TDC
;
1894 // Emit an intrinsic with chain and an explicit CC register result.
1895 static SDNode
*emitIntrinsicWithCCAndChain(SelectionDAG
&DAG
, SDValue Op
,
1897 // Copy all operands except the intrinsic ID.
1898 unsigned NumOps
= Op
.getNumOperands();
1899 SmallVector
<SDValue
, 6> Ops
;
1900 Ops
.reserve(NumOps
- 1);
1901 Ops
.push_back(Op
.getOperand(0));
1902 for (unsigned I
= 2; I
< NumOps
; ++I
)
1903 Ops
.push_back(Op
.getOperand(I
));
1905 assert(Op
->getNumValues() == 2 && "Expected only CC result and chain");
1906 SDVTList RawVTs
= DAG
.getVTList(MVT::i32
, MVT::Other
);
1907 SDValue Intr
= DAG
.getNode(Opcode
, SDLoc(Op
), RawVTs
, Ops
);
1908 SDValue OldChain
= SDValue(Op
.getNode(), 1);
1909 SDValue NewChain
= SDValue(Intr
.getNode(), 1);
1910 DAG
.ReplaceAllUsesOfValueWith(OldChain
, NewChain
);
1911 return Intr
.getNode();
1914 // Emit an intrinsic with an explicit CC register result.
1915 static SDNode
*emitIntrinsicWithCC(SelectionDAG
&DAG
, SDValue Op
,
1917 // Copy all operands except the intrinsic ID.
1918 unsigned NumOps
= Op
.getNumOperands();
1919 SmallVector
<SDValue
, 6> Ops
;
1920 Ops
.reserve(NumOps
- 1);
1921 for (unsigned I
= 1; I
< NumOps
; ++I
)
1922 Ops
.push_back(Op
.getOperand(I
));
1924 SDValue Intr
= DAG
.getNode(Opcode
, SDLoc(Op
), Op
->getVTList(), Ops
);
1925 return Intr
.getNode();
1928 // CC is a comparison that will be implemented using an integer or
1929 // floating-point comparison. Return the condition code mask for
1930 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
1931 // unsigned comparisons and clear for signed ones. In the floating-point
1932 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1933 static unsigned CCMaskForCondCode(ISD::CondCode CC
) {
1935 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1936 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1937 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1941 llvm_unreachable("Invalid integer condition!");
1950 case ISD::SETO
: return SystemZ::CCMASK_CMP_O
;
1951 case ISD::SETUO
: return SystemZ::CCMASK_CMP_UO
;
1956 // If C can be converted to a comparison against zero, adjust the operands
1958 static void adjustZeroCmp(SelectionDAG
&DAG
, const SDLoc
&DL
, Comparison
&C
) {
1959 if (C
.ICmpType
== SystemZICMP::UnsignedOnly
)
1962 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
.getNode());
1966 int64_t Value
= ConstOp1
->getSExtValue();
1967 if ((Value
== -1 && C
.CCMask
== SystemZ::CCMASK_CMP_GT
) ||
1968 (Value
== -1 && C
.CCMask
== SystemZ::CCMASK_CMP_LE
) ||
1969 (Value
== 1 && C
.CCMask
== SystemZ::CCMASK_CMP_LT
) ||
1970 (Value
== 1 && C
.CCMask
== SystemZ::CCMASK_CMP_GE
)) {
1971 C
.CCMask
^= SystemZ::CCMASK_CMP_EQ
;
1972 C
.Op1
= DAG
.getConstant(0, DL
, C
.Op1
.getValueType());
1976 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1977 // adjust the operands as necessary.
1978 static void adjustSubwordCmp(SelectionDAG
&DAG
, const SDLoc
&DL
,
1980 // For us to make any changes, it must a comparison between a single-use
1981 // load and a constant.
1982 if (!C
.Op0
.hasOneUse() ||
1983 C
.Op0
.getOpcode() != ISD::LOAD
||
1984 C
.Op1
.getOpcode() != ISD::Constant
)
1987 // We must have an 8- or 16-bit load.
1988 auto *Load
= cast
<LoadSDNode
>(C
.Op0
);
1989 unsigned NumBits
= Load
->getMemoryVT().getStoreSizeInBits();
1990 if (NumBits
!= 8 && NumBits
!= 16)
1993 // The load must be an extending one and the constant must be within the
1994 // range of the unextended value.
1995 auto *ConstOp1
= cast
<ConstantSDNode
>(C
.Op1
);
1996 uint64_t Value
= ConstOp1
->getZExtValue();
1997 uint64_t Mask
= (1 << NumBits
) - 1;
1998 if (Load
->getExtensionType() == ISD::SEXTLOAD
) {
1999 // Make sure that ConstOp1 is in range of C.Op0.
2000 int64_t SignedValue
= ConstOp1
->getSExtValue();
2001 if (uint64_t(SignedValue
) + (uint64_t(1) << (NumBits
- 1)) > Mask
)
2003 if (C
.ICmpType
!= SystemZICMP::SignedOnly
) {
2004 // Unsigned comparison between two sign-extended values is equivalent
2005 // to unsigned comparison between two zero-extended values.
2007 } else if (NumBits
== 8) {
2008 // Try to treat the comparison as unsigned, so that we can use CLI.
2009 // Adjust CCMask and Value as necessary.
2010 if (Value
== 0 && C
.CCMask
== SystemZ::CCMASK_CMP_LT
)
2011 // Test whether the high bit of the byte is set.
2012 Value
= 127, C
.CCMask
= SystemZ::CCMASK_CMP_GT
;
2013 else if (Value
== 0 && C
.CCMask
== SystemZ::CCMASK_CMP_GE
)
2014 // Test whether the high bit of the byte is clear.
2015 Value
= 128, C
.CCMask
= SystemZ::CCMASK_CMP_LT
;
2017 // No instruction exists for this combination.
2019 C
.ICmpType
= SystemZICMP::UnsignedOnly
;
2021 } else if (Load
->getExtensionType() == ISD::ZEXTLOAD
) {
2024 // If the constant is in range, we can use any comparison.
2025 C
.ICmpType
= SystemZICMP::Any
;
2029 // Make sure that the first operand is an i32 of the right extension type.
2030 ISD::LoadExtType ExtType
= (C
.ICmpType
== SystemZICMP::SignedOnly
?
2033 if (C
.Op0
.getValueType() != MVT::i32
||
2034 Load
->getExtensionType() != ExtType
) {
2035 C
.Op0
= DAG
.getExtLoad(ExtType
, SDLoc(Load
), MVT::i32
, Load
->getChain(),
2036 Load
->getBasePtr(), Load
->getPointerInfo(),
2037 Load
->getMemoryVT(), Load
->getAlignment(),
2038 Load
->getMemOperand()->getFlags());
2039 // Update the chain uses.
2040 DAG
.ReplaceAllUsesOfValueWith(SDValue(Load
, 1), C
.Op0
.getValue(1));
2043 // Make sure that the second operand is an i32 with the right value.
2044 if (C
.Op1
.getValueType() != MVT::i32
||
2045 Value
!= ConstOp1
->getZExtValue())
2046 C
.Op1
= DAG
.getConstant(Value
, DL
, MVT::i32
);
2049 // Return true if Op is either an unextended load, or a load suitable
2050 // for integer register-memory comparisons of type ICmpType.
2051 static bool isNaturalMemoryOperand(SDValue Op
, unsigned ICmpType
) {
2052 auto *Load
= dyn_cast
<LoadSDNode
>(Op
.getNode());
2054 // There are no instructions to compare a register with a memory byte.
2055 if (Load
->getMemoryVT() == MVT::i8
)
2057 // Otherwise decide on extension type.
2058 switch (Load
->getExtensionType()) {
2059 case ISD::NON_EXTLOAD
:
2062 return ICmpType
!= SystemZICMP::UnsignedOnly
;
2064 return ICmpType
!= SystemZICMP::SignedOnly
;
2072 // Return true if it is better to swap the operands of C.
2073 static bool shouldSwapCmpOperands(const Comparison
&C
) {
2074 // Leave f128 comparisons alone, since they have no memory forms.
2075 if (C
.Op0
.getValueType() == MVT::f128
)
2078 // Always keep a floating-point constant second, since comparisons with
2079 // zero can use LOAD TEST and comparisons with other constants make a
2080 // natural memory operand.
2081 if (isa
<ConstantFPSDNode
>(C
.Op1
))
2084 // Never swap comparisons with zero since there are many ways to optimize
2086 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
);
2087 if (ConstOp1
&& ConstOp1
->getZExtValue() == 0)
2090 // Also keep natural memory operands second if the loaded value is
2091 // only used here. Several comparisons have memory forms.
2092 if (isNaturalMemoryOperand(C
.Op1
, C
.ICmpType
) && C
.Op1
.hasOneUse())
2095 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2096 // In that case we generally prefer the memory to be second.
2097 if (isNaturalMemoryOperand(C
.Op0
, C
.ICmpType
) && C
.Op0
.hasOneUse()) {
2098 // The only exceptions are when the second operand is a constant and
2099 // we can use things like CHHSI.
2102 // The unsigned memory-immediate instructions can handle 16-bit
2103 // unsigned integers.
2104 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&&
2105 isUInt
<16>(ConstOp1
->getZExtValue()))
2107 // The signed memory-immediate instructions can handle 16-bit
2109 if (C
.ICmpType
!= SystemZICMP::UnsignedOnly
&&
2110 isInt
<16>(ConstOp1
->getSExtValue()))
2115 // Try to promote the use of CGFR and CLGFR.
2116 unsigned Opcode0
= C
.Op0
.getOpcode();
2117 if (C
.ICmpType
!= SystemZICMP::UnsignedOnly
&& Opcode0
== ISD::SIGN_EXTEND
)
2119 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&& Opcode0
== ISD::ZERO_EXTEND
)
2121 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&&
2122 Opcode0
== ISD::AND
&&
2123 C
.Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
2124 cast
<ConstantSDNode
>(C
.Op0
.getOperand(1))->getZExtValue() == 0xffffffff)
2130 // Return a version of comparison CC mask CCMask in which the LT and GT
2131 // actions are swapped.
2132 static unsigned reverseCCMask(unsigned CCMask
) {
2133 return ((CCMask
& SystemZ::CCMASK_CMP_EQ
) |
2134 (CCMask
& SystemZ::CCMASK_CMP_GT
? SystemZ::CCMASK_CMP_LT
: 0) |
2135 (CCMask
& SystemZ::CCMASK_CMP_LT
? SystemZ::CCMASK_CMP_GT
: 0) |
2136 (CCMask
& SystemZ::CCMASK_CMP_UO
));
2139 // Check whether C tests for equality between X and Y and whether X - Y
2140 // or Y - X is also computed. In that case it's better to compare the
2141 // result of the subtraction against zero.
2142 static void adjustForSubtraction(SelectionDAG
&DAG
, const SDLoc
&DL
,
2144 if (C
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
2145 C
.CCMask
== SystemZ::CCMASK_CMP_NE
) {
2146 for (auto I
= C
.Op0
->use_begin(), E
= C
.Op0
->use_end(); I
!= E
; ++I
) {
2148 if (N
->getOpcode() == ISD::SUB
&&
2149 ((N
->getOperand(0) == C
.Op0
&& N
->getOperand(1) == C
.Op1
) ||
2150 (N
->getOperand(0) == C
.Op1
&& N
->getOperand(1) == C
.Op0
))) {
2151 C
.Op0
= SDValue(N
, 0);
2152 C
.Op1
= DAG
.getConstant(0, DL
, N
->getValueType(0));
2159 // Check whether C compares a floating-point value with zero and if that
2160 // floating-point value is also negated. In this case we can use the
2161 // negation to set CC, so avoiding separate LOAD AND TEST and
2162 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2163 static void adjustForFNeg(Comparison
&C
) {
2164 auto *C1
= dyn_cast
<ConstantFPSDNode
>(C
.Op1
);
2165 if (C1
&& C1
->isZero()) {
2166 for (auto I
= C
.Op0
->use_begin(), E
= C
.Op0
->use_end(); I
!= E
; ++I
) {
2168 if (N
->getOpcode() == ISD::FNEG
) {
2169 C
.Op0
= SDValue(N
, 0);
2170 C
.CCMask
= reverseCCMask(C
.CCMask
);
2177 // Check whether C compares (shl X, 32) with 0 and whether X is
2178 // also sign-extended. In that case it is better to test the result
2179 // of the sign extension using LTGFR.
2181 // This case is important because InstCombine transforms a comparison
2182 // with (sext (trunc X)) into a comparison with (shl X, 32).
2183 static void adjustForLTGFR(Comparison
&C
) {
2184 // Check for a comparison between (shl X, 32) and 0.
2185 if (C
.Op0
.getOpcode() == ISD::SHL
&&
2186 C
.Op0
.getValueType() == MVT::i64
&&
2187 C
.Op1
.getOpcode() == ISD::Constant
&&
2188 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2189 auto *C1
= dyn_cast
<ConstantSDNode
>(C
.Op0
.getOperand(1));
2190 if (C1
&& C1
->getZExtValue() == 32) {
2191 SDValue ShlOp0
= C
.Op0
.getOperand(0);
2192 // See whether X has any SIGN_EXTEND_INREG uses.
2193 for (auto I
= ShlOp0
->use_begin(), E
= ShlOp0
->use_end(); I
!= E
; ++I
) {
2195 if (N
->getOpcode() == ISD::SIGN_EXTEND_INREG
&&
2196 cast
<VTSDNode
>(N
->getOperand(1))->getVT() == MVT::i32
) {
2197 C
.Op0
= SDValue(N
, 0);
2205 // If C compares the truncation of an extending load, try to compare
2206 // the untruncated value instead. This exposes more opportunities to
2208 static void adjustICmpTruncate(SelectionDAG
&DAG
, const SDLoc
&DL
,
2210 if (C
.Op0
.getOpcode() == ISD::TRUNCATE
&&
2211 C
.Op0
.getOperand(0).getOpcode() == ISD::LOAD
&&
2212 C
.Op1
.getOpcode() == ISD::Constant
&&
2213 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2214 auto *L
= cast
<LoadSDNode
>(C
.Op0
.getOperand(0));
2215 if (L
->getMemoryVT().getStoreSizeInBits() <= C
.Op0
.getValueSizeInBits()) {
2216 unsigned Type
= L
->getExtensionType();
2217 if ((Type
== ISD::ZEXTLOAD
&& C
.ICmpType
!= SystemZICMP::SignedOnly
) ||
2218 (Type
== ISD::SEXTLOAD
&& C
.ICmpType
!= SystemZICMP::UnsignedOnly
)) {
2219 C
.Op0
= C
.Op0
.getOperand(0);
2220 C
.Op1
= DAG
.getConstant(0, DL
, C
.Op0
.getValueType());
2226 // Return true if shift operation N has an in-range constant shift value.
2227 // Store it in ShiftVal if so.
2228 static bool isSimpleShift(SDValue N
, unsigned &ShiftVal
) {
2229 auto *Shift
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
2233 uint64_t Amount
= Shift
->getZExtValue();
2234 if (Amount
>= N
.getValueSizeInBits())
2241 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2242 // instruction and whether the CC value is descriptive enough to handle
2243 // a comparison of type Opcode between the AND result and CmpVal.
2244 // CCMask says which comparison result is being tested and BitSize is
2245 // the number of bits in the operands. If TEST UNDER MASK can be used,
2246 // return the corresponding CC mask, otherwise return 0.
2247 static unsigned getTestUnderMaskCond(unsigned BitSize
, unsigned CCMask
,
2248 uint64_t Mask
, uint64_t CmpVal
,
2249 unsigned ICmpType
) {
2250 assert(Mask
!= 0 && "ANDs with zero should have been removed by now");
2252 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2253 if (!SystemZ::isImmLL(Mask
) && !SystemZ::isImmLH(Mask
) &&
2254 !SystemZ::isImmHL(Mask
) && !SystemZ::isImmHH(Mask
))
2257 // Work out the masks for the lowest and highest bits.
2258 unsigned HighShift
= 63 - countLeadingZeros(Mask
);
2259 uint64_t High
= uint64_t(1) << HighShift
;
2260 uint64_t Low
= uint64_t(1) << countTrailingZeros(Mask
);
2262 // Signed ordered comparisons are effectively unsigned if the sign
2264 bool EffectivelyUnsigned
= (ICmpType
!= SystemZICMP::SignedOnly
);
2266 // Check for equality comparisons with 0, or the equivalent.
2268 if (CCMask
== SystemZ::CCMASK_CMP_EQ
)
2269 return SystemZ::CCMASK_TM_ALL_0
;
2270 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
2271 return SystemZ::CCMASK_TM_SOME_1
;
2273 if (EffectivelyUnsigned
&& CmpVal
> 0 && CmpVal
<= Low
) {
2274 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2275 return SystemZ::CCMASK_TM_ALL_0
;
2276 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2277 return SystemZ::CCMASK_TM_SOME_1
;
2279 if (EffectivelyUnsigned
&& CmpVal
< Low
) {
2280 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2281 return SystemZ::CCMASK_TM_ALL_0
;
2282 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2283 return SystemZ::CCMASK_TM_SOME_1
;
2286 // Check for equality comparisons with the mask, or the equivalent.
2287 if (CmpVal
== Mask
) {
2288 if (CCMask
== SystemZ::CCMASK_CMP_EQ
)
2289 return SystemZ::CCMASK_TM_ALL_1
;
2290 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
2291 return SystemZ::CCMASK_TM_SOME_0
;
2293 if (EffectivelyUnsigned
&& CmpVal
>= Mask
- Low
&& CmpVal
< Mask
) {
2294 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2295 return SystemZ::CCMASK_TM_ALL_1
;
2296 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2297 return SystemZ::CCMASK_TM_SOME_0
;
2299 if (EffectivelyUnsigned
&& CmpVal
> Mask
- Low
&& CmpVal
<= Mask
) {
2300 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2301 return SystemZ::CCMASK_TM_ALL_1
;
2302 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2303 return SystemZ::CCMASK_TM_SOME_0
;
2306 // Check for ordered comparisons with the top bit.
2307 if (EffectivelyUnsigned
&& CmpVal
>= Mask
- High
&& CmpVal
< High
) {
2308 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2309 return SystemZ::CCMASK_TM_MSB_0
;
2310 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2311 return SystemZ::CCMASK_TM_MSB_1
;
2313 if (EffectivelyUnsigned
&& CmpVal
> Mask
- High
&& CmpVal
<= High
) {
2314 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2315 return SystemZ::CCMASK_TM_MSB_0
;
2316 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2317 return SystemZ::CCMASK_TM_MSB_1
;
2320 // If there are just two bits, we can do equality checks for Low and High
2322 if (Mask
== Low
+ High
) {
2323 if (CCMask
== SystemZ::CCMASK_CMP_EQ
&& CmpVal
== Low
)
2324 return SystemZ::CCMASK_TM_MIXED_MSB_0
;
2325 if (CCMask
== SystemZ::CCMASK_CMP_NE
&& CmpVal
== Low
)
2326 return SystemZ::CCMASK_TM_MIXED_MSB_0
^ SystemZ::CCMASK_ANY
;
2327 if (CCMask
== SystemZ::CCMASK_CMP_EQ
&& CmpVal
== High
)
2328 return SystemZ::CCMASK_TM_MIXED_MSB_1
;
2329 if (CCMask
== SystemZ::CCMASK_CMP_NE
&& CmpVal
== High
)
2330 return SystemZ::CCMASK_TM_MIXED_MSB_1
^ SystemZ::CCMASK_ANY
;
2333 // Looks like we've exhausted our options.
2337 // See whether C can be implemented as a TEST UNDER MASK instruction.
2338 // Update the arguments with the TM version if so.
2339 static void adjustForTestUnderMask(SelectionDAG
&DAG
, const SDLoc
&DL
,
2341 // Check that we have a comparison with a constant.
2342 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
);
2345 uint64_t CmpVal
= ConstOp1
->getZExtValue();
2347 // Check whether the nonconstant input is an AND with a constant mask.
2350 ConstantSDNode
*Mask
= nullptr;
2351 if (C
.Op0
.getOpcode() == ISD::AND
) {
2352 NewC
.Op0
= C
.Op0
.getOperand(0);
2353 NewC
.Op1
= C
.Op0
.getOperand(1);
2354 Mask
= dyn_cast
<ConstantSDNode
>(NewC
.Op1
);
2357 MaskVal
= Mask
->getZExtValue();
2359 // There is no instruction to compare with a 64-bit immediate
2360 // so use TMHH instead if possible. We need an unsigned ordered
2361 // comparison with an i64 immediate.
2362 if (NewC
.Op0
.getValueType() != MVT::i64
||
2363 NewC
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
2364 NewC
.CCMask
== SystemZ::CCMASK_CMP_NE
||
2365 NewC
.ICmpType
== SystemZICMP::SignedOnly
)
2367 // Convert LE and GT comparisons into LT and GE.
2368 if (NewC
.CCMask
== SystemZ::CCMASK_CMP_LE
||
2369 NewC
.CCMask
== SystemZ::CCMASK_CMP_GT
) {
2370 if (CmpVal
== uint64_t(-1))
2373 NewC
.CCMask
^= SystemZ::CCMASK_CMP_EQ
;
2375 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2376 // be masked off without changing the result.
2377 MaskVal
= -(CmpVal
& -CmpVal
);
2378 NewC
.ICmpType
= SystemZICMP::UnsignedOnly
;
2383 // Check whether the combination of mask, comparison value and comparison
2384 // type are suitable.
2385 unsigned BitSize
= NewC
.Op0
.getValueSizeInBits();
2386 unsigned NewCCMask
, ShiftVal
;
2387 if (NewC
.ICmpType
!= SystemZICMP::SignedOnly
&&
2388 NewC
.Op0
.getOpcode() == ISD::SHL
&&
2389 isSimpleShift(NewC
.Op0
, ShiftVal
) &&
2390 (MaskVal
>> ShiftVal
!= 0) &&
2391 ((CmpVal
>> ShiftVal
) << ShiftVal
) == CmpVal
&&
2392 (NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
,
2393 MaskVal
>> ShiftVal
,
2395 SystemZICMP::Any
))) {
2396 NewC
.Op0
= NewC
.Op0
.getOperand(0);
2397 MaskVal
>>= ShiftVal
;
2398 } else if (NewC
.ICmpType
!= SystemZICMP::SignedOnly
&&
2399 NewC
.Op0
.getOpcode() == ISD::SRL
&&
2400 isSimpleShift(NewC
.Op0
, ShiftVal
) &&
2401 (MaskVal
<< ShiftVal
!= 0) &&
2402 ((CmpVal
<< ShiftVal
) >> ShiftVal
) == CmpVal
&&
2403 (NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
,
2404 MaskVal
<< ShiftVal
,
2406 SystemZICMP::UnsignedOnly
))) {
2407 NewC
.Op0
= NewC
.Op0
.getOperand(0);
2408 MaskVal
<<= ShiftVal
;
2410 NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
, MaskVal
, CmpVal
,
2416 // Go ahead and make the change.
2417 C
.Opcode
= SystemZISD::TM
;
2419 if (Mask
&& Mask
->getZExtValue() == MaskVal
)
2420 C
.Op1
= SDValue(Mask
, 0);
2422 C
.Op1
= DAG
.getConstant(MaskVal
, DL
, C
.Op0
.getValueType());
2423 C
.CCValid
= SystemZ::CCMASK_TM
;
2424 C
.CCMask
= NewCCMask
;
2427 // See whether the comparison argument contains a redundant AND
2428 // and remove it if so. This sometimes happens due to the generic
2429 // BRCOND expansion.
2430 static void adjustForRedundantAnd(SelectionDAG
&DAG
, const SDLoc
&DL
,
2432 if (C
.Op0
.getOpcode() != ISD::AND
)
2434 auto *Mask
= dyn_cast
<ConstantSDNode
>(C
.Op0
.getOperand(1));
2437 KnownBits Known
= DAG
.computeKnownBits(C
.Op0
.getOperand(0));
2438 if ((~Known
.Zero
).getZExtValue() & ~Mask
->getZExtValue())
2441 C
.Op0
= C
.Op0
.getOperand(0);
2444 // Return a Comparison that tests the condition-code result of intrinsic
2445 // node Call against constant integer CC using comparison code Cond.
2446 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2447 // and CCValid is the set of possible condition-code results.
2448 static Comparison
getIntrinsicCmp(SelectionDAG
&DAG
, unsigned Opcode
,
2449 SDValue Call
, unsigned CCValid
, uint64_t CC
,
2450 ISD::CondCode Cond
) {
2451 Comparison
C(Call
, SDValue());
2453 C
.CCValid
= CCValid
;
2454 if (Cond
== ISD::SETEQ
)
2455 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2456 C
.CCMask
= CC
< 4 ? 1 << (3 - CC
) : 0;
2457 else if (Cond
== ISD::SETNE
)
2458 // ...and the inverse of that.
2459 C
.CCMask
= CC
< 4 ? ~(1 << (3 - CC
)) : -1;
2460 else if (Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
)
2461 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2462 // always true for CC>3.
2463 C
.CCMask
= CC
< 4 ? ~0U << (4 - CC
) : -1;
2464 else if (Cond
== ISD::SETGE
|| Cond
== ISD::SETUGE
)
2465 // ...and the inverse of that.
2466 C
.CCMask
= CC
< 4 ? ~(~0U << (4 - CC
)) : 0;
2467 else if (Cond
== ISD::SETLE
|| Cond
== ISD::SETULE
)
2468 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2469 // always true for CC>3.
2470 C
.CCMask
= CC
< 4 ? ~0U << (3 - CC
) : -1;
2471 else if (Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
)
2472 // ...and the inverse of that.
2473 C
.CCMask
= CC
< 4 ? ~(~0U << (3 - CC
)) : 0;
2475 llvm_unreachable("Unexpected integer comparison type");
2476 C
.CCMask
&= CCValid
;
2480 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2481 static Comparison
getCmp(SelectionDAG
&DAG
, SDValue CmpOp0
, SDValue CmpOp1
,
2482 ISD::CondCode Cond
, const SDLoc
&DL
) {
2483 if (CmpOp1
.getOpcode() == ISD::Constant
) {
2484 uint64_t Constant
= cast
<ConstantSDNode
>(CmpOp1
)->getZExtValue();
2485 unsigned Opcode
, CCValid
;
2486 if (CmpOp0
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
2487 CmpOp0
.getResNo() == 0 && CmpOp0
->hasNUsesOfValue(1, 0) &&
2488 isIntrinsicWithCCAndChain(CmpOp0
, Opcode
, CCValid
))
2489 return getIntrinsicCmp(DAG
, Opcode
, CmpOp0
, CCValid
, Constant
, Cond
);
2490 if (CmpOp0
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
2491 CmpOp0
.getResNo() == CmpOp0
->getNumValues() - 1 &&
2492 isIntrinsicWithCC(CmpOp0
, Opcode
, CCValid
))
2493 return getIntrinsicCmp(DAG
, Opcode
, CmpOp0
, CCValid
, Constant
, Cond
);
2495 Comparison
C(CmpOp0
, CmpOp1
);
2496 C
.CCMask
= CCMaskForCondCode(Cond
);
2497 if (C
.Op0
.getValueType().isFloatingPoint()) {
2498 C
.CCValid
= SystemZ::CCMASK_FCMP
;
2499 C
.Opcode
= SystemZISD::FCMP
;
2502 C
.CCValid
= SystemZ::CCMASK_ICMP
;
2503 C
.Opcode
= SystemZISD::ICMP
;
2504 // Choose the type of comparison. Equality and inequality tests can
2505 // use either signed or unsigned comparisons. The choice also doesn't
2506 // matter if both sign bits are known to be clear. In those cases we
2507 // want to give the main isel code the freedom to choose whichever
2509 if (C
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
2510 C
.CCMask
== SystemZ::CCMASK_CMP_NE
||
2511 (DAG
.SignBitIsZero(C
.Op0
) && DAG
.SignBitIsZero(C
.Op1
)))
2512 C
.ICmpType
= SystemZICMP::Any
;
2513 else if (C
.CCMask
& SystemZ::CCMASK_CMP_UO
)
2514 C
.ICmpType
= SystemZICMP::UnsignedOnly
;
2516 C
.ICmpType
= SystemZICMP::SignedOnly
;
2517 C
.CCMask
&= ~SystemZ::CCMASK_CMP_UO
;
2518 adjustForRedundantAnd(DAG
, DL
, C
);
2519 adjustZeroCmp(DAG
, DL
, C
);
2520 adjustSubwordCmp(DAG
, DL
, C
);
2521 adjustForSubtraction(DAG
, DL
, C
);
2523 adjustICmpTruncate(DAG
, DL
, C
);
2526 if (shouldSwapCmpOperands(C
)) {
2527 std::swap(C
.Op0
, C
.Op1
);
2528 C
.CCMask
= reverseCCMask(C
.CCMask
);
2531 adjustForTestUnderMask(DAG
, DL
, C
);
2535 // Emit the comparison instruction described by C.
2536 static SDValue
emitCmp(SelectionDAG
&DAG
, const SDLoc
&DL
, Comparison
&C
) {
2537 if (!C
.Op1
.getNode()) {
2539 switch (C
.Op0
.getOpcode()) {
2540 case ISD::INTRINSIC_W_CHAIN
:
2541 Node
= emitIntrinsicWithCCAndChain(DAG
, C
.Op0
, C
.Opcode
);
2542 return SDValue(Node
, 0);
2543 case ISD::INTRINSIC_WO_CHAIN
:
2544 Node
= emitIntrinsicWithCC(DAG
, C
.Op0
, C
.Opcode
);
2545 return SDValue(Node
, Node
->getNumValues() - 1);
2547 llvm_unreachable("Invalid comparison operands");
2550 if (C
.Opcode
== SystemZISD::ICMP
)
2551 return DAG
.getNode(SystemZISD::ICMP
, DL
, MVT::i32
, C
.Op0
, C
.Op1
,
2552 DAG
.getTargetConstant(C
.ICmpType
, DL
, MVT::i32
));
2553 if (C
.Opcode
== SystemZISD::TM
) {
2554 bool RegisterOnly
= (bool(C
.CCMask
& SystemZ::CCMASK_TM_MIXED_MSB_0
) !=
2555 bool(C
.CCMask
& SystemZ::CCMASK_TM_MIXED_MSB_1
));
2556 return DAG
.getNode(SystemZISD::TM
, DL
, MVT::i32
, C
.Op0
, C
.Op1
,
2557 DAG
.getTargetConstant(RegisterOnly
, DL
, MVT::i32
));
2559 return DAG
.getNode(C
.Opcode
, DL
, MVT::i32
, C
.Op0
, C
.Op1
);
2562 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2563 // 64 bits. Extend is the extension type to use. Store the high part
2564 // in Hi and the low part in Lo.
2565 static void lowerMUL_LOHI32(SelectionDAG
&DAG
, const SDLoc
&DL
, unsigned Extend
,
2566 SDValue Op0
, SDValue Op1
, SDValue
&Hi
,
2568 Op0
= DAG
.getNode(Extend
, DL
, MVT::i64
, Op0
);
2569 Op1
= DAG
.getNode(Extend
, DL
, MVT::i64
, Op1
);
2570 SDValue Mul
= DAG
.getNode(ISD::MUL
, DL
, MVT::i64
, Op0
, Op1
);
2571 Hi
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Mul
,
2572 DAG
.getConstant(32, DL
, MVT::i64
));
2573 Hi
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Hi
);
2574 Lo
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Mul
);
2577 // Lower a binary operation that produces two VT results, one in each
2578 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2579 // and Opcode performs the GR128 operation. Store the even register result
2580 // in Even and the odd register result in Odd.
2581 static void lowerGR128Binary(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
2582 unsigned Opcode
, SDValue Op0
, SDValue Op1
,
2583 SDValue
&Even
, SDValue
&Odd
) {
2584 SDValue Result
= DAG
.getNode(Opcode
, DL
, MVT::Untyped
, Op0
, Op1
);
2585 bool Is32Bit
= is32Bit(VT
);
2586 Even
= DAG
.getTargetExtractSubreg(SystemZ::even128(Is32Bit
), DL
, VT
, Result
);
2587 Odd
= DAG
.getTargetExtractSubreg(SystemZ::odd128(Is32Bit
), DL
, VT
, Result
);
2590 // Return an i32 value that is 1 if the CC value produced by CCReg is
2591 // in the mask CCMask and 0 otherwise. CC is known to have a value
2592 // in CCValid, so other values can be ignored.
2593 static SDValue
emitSETCC(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue CCReg
,
2594 unsigned CCValid
, unsigned CCMask
) {
2595 SDValue Ops
[] = {DAG
.getConstant(1, DL
, MVT::i32
),
2596 DAG
.getConstant(0, DL
, MVT::i32
),
2597 DAG
.getTargetConstant(CCValid
, DL
, MVT::i32
),
2598 DAG
.getTargetConstant(CCMask
, DL
, MVT::i32
), CCReg
};
2599 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, MVT::i32
, Ops
);
2602 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2603 // be done directly. IsFP is true if CC is for a floating-point rather than
2604 // integer comparison.
2605 static unsigned getVectorComparison(ISD::CondCode CC
, bool IsFP
) {
2609 return IsFP
? SystemZISD::VFCMPE
: SystemZISD::VICMPE
;
2613 return IsFP
? SystemZISD::VFCMPHE
: static_cast<SystemZISD::NodeType
>(0);
2617 return IsFP
? SystemZISD::VFCMPH
: SystemZISD::VICMPH
;
2620 return IsFP
? static_cast<SystemZISD::NodeType
>(0) : SystemZISD::VICMPHL
;
2627 // Return the SystemZISD vector comparison operation for CC or its inverse,
2628 // or 0 if neither can be done directly. Indicate in Invert whether the
2629 // result is for the inverse of CC. IsFP is true if CC is for a
2630 // floating-point rather than integer comparison.
2631 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC
, bool IsFP
,
2633 if (unsigned Opcode
= getVectorComparison(CC
, IsFP
)) {
2638 CC
= ISD::getSetCCInverse(CC
, !IsFP
);
2639 if (unsigned Opcode
= getVectorComparison(CC
, IsFP
)) {
2647 // Return a v2f64 that contains the extended form of elements Start and Start+1
2648 // of v4f32 value Op.
2649 static SDValue
expandV4F32ToV2F64(SelectionDAG
&DAG
, int Start
, const SDLoc
&DL
,
2651 int Mask
[] = { Start
, -1, Start
+ 1, -1 };
2652 Op
= DAG
.getVectorShuffle(MVT::v4f32
, DL
, Op
, DAG
.getUNDEF(MVT::v4f32
), Mask
);
2653 return DAG
.getNode(SystemZISD::VEXTEND
, DL
, MVT::v2f64
, Op
);
2656 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2657 // producing a result of type VT.
2658 SDValue
SystemZTargetLowering::getVectorCmp(SelectionDAG
&DAG
, unsigned Opcode
,
2659 const SDLoc
&DL
, EVT VT
,
2661 SDValue CmpOp1
) const {
2662 // There is no hardware support for v4f32 (unless we have the vector
2663 // enhancements facility 1), so extend the vector into two v2f64s
2664 // and compare those.
2665 if (CmpOp0
.getValueType() == MVT::v4f32
&&
2666 !Subtarget
.hasVectorEnhancements1()) {
2667 SDValue H0
= expandV4F32ToV2F64(DAG
, 0, DL
, CmpOp0
);
2668 SDValue L0
= expandV4F32ToV2F64(DAG
, 2, DL
, CmpOp0
);
2669 SDValue H1
= expandV4F32ToV2F64(DAG
, 0, DL
, CmpOp1
);
2670 SDValue L1
= expandV4F32ToV2F64(DAG
, 2, DL
, CmpOp1
);
2671 SDValue HRes
= DAG
.getNode(Opcode
, DL
, MVT::v2i64
, H0
, H1
);
2672 SDValue LRes
= DAG
.getNode(Opcode
, DL
, MVT::v2i64
, L0
, L1
);
2673 return DAG
.getNode(SystemZISD::PACK
, DL
, VT
, HRes
, LRes
);
2675 return DAG
.getNode(Opcode
, DL
, VT
, CmpOp0
, CmpOp1
);
2678 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2679 // an integer mask of type VT.
2680 SDValue
SystemZTargetLowering::lowerVectorSETCC(SelectionDAG
&DAG
,
2681 const SDLoc
&DL
, EVT VT
,
2684 SDValue CmpOp1
) const {
2685 bool IsFP
= CmpOp0
.getValueType().isFloatingPoint();
2686 bool Invert
= false;
2689 // Handle tests for order using (or (ogt y x) (oge x y)).
2694 assert(IsFP
&& "Unexpected integer comparison");
2695 SDValue LT
= getVectorCmp(DAG
, SystemZISD::VFCMPH
, DL
, VT
, CmpOp1
, CmpOp0
);
2696 SDValue GE
= getVectorCmp(DAG
, SystemZISD::VFCMPHE
, DL
, VT
, CmpOp0
, CmpOp1
);
2697 Cmp
= DAG
.getNode(ISD::OR
, DL
, VT
, LT
, GE
);
2701 // Handle <> tests using (or (ogt y x) (ogt x y)).
2706 assert(IsFP
&& "Unexpected integer comparison");
2707 SDValue LT
= getVectorCmp(DAG
, SystemZISD::VFCMPH
, DL
, VT
, CmpOp1
, CmpOp0
);
2708 SDValue GT
= getVectorCmp(DAG
, SystemZISD::VFCMPH
, DL
, VT
, CmpOp0
, CmpOp1
);
2709 Cmp
= DAG
.getNode(ISD::OR
, DL
, VT
, LT
, GT
);
2713 // Otherwise a single comparison is enough. It doesn't really
2714 // matter whether we try the inversion or the swap first, since
2715 // there are no cases where both work.
2717 if (unsigned Opcode
= getVectorComparisonOrInvert(CC
, IsFP
, Invert
))
2718 Cmp
= getVectorCmp(DAG
, Opcode
, DL
, VT
, CmpOp0
, CmpOp1
);
2720 CC
= ISD::getSetCCSwappedOperands(CC
);
2721 if (unsigned Opcode
= getVectorComparisonOrInvert(CC
, IsFP
, Invert
))
2722 Cmp
= getVectorCmp(DAG
, Opcode
, DL
, VT
, CmpOp1
, CmpOp0
);
2724 llvm_unreachable("Unhandled comparison");
2730 DAG
.getSplatBuildVector(VT
, DL
, DAG
.getConstant(-1, DL
, MVT::i64
));
2731 Cmp
= DAG
.getNode(ISD::XOR
, DL
, VT
, Cmp
, Mask
);
2736 SDValue
SystemZTargetLowering::lowerSETCC(SDValue Op
,
2737 SelectionDAG
&DAG
) const {
2738 SDValue CmpOp0
= Op
.getOperand(0);
2739 SDValue CmpOp1
= Op
.getOperand(1);
2740 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
2742 EVT VT
= Op
.getValueType();
2744 return lowerVectorSETCC(DAG
, DL
, VT
, CC
, CmpOp0
, CmpOp1
);
2746 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2747 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2748 return emitSETCC(DAG
, DL
, CCReg
, C
.CCValid
, C
.CCMask
);
2751 SDValue
SystemZTargetLowering::lowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const {
2752 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(1))->get();
2753 SDValue CmpOp0
= Op
.getOperand(2);
2754 SDValue CmpOp1
= Op
.getOperand(3);
2755 SDValue Dest
= Op
.getOperand(4);
2758 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2759 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2761 SystemZISD::BR_CCMASK
, DL
, Op
.getValueType(), Op
.getOperand(0),
2762 DAG
.getTargetConstant(C
.CCValid
, DL
, MVT::i32
),
2763 DAG
.getTargetConstant(C
.CCMask
, DL
, MVT::i32
), Dest
, CCReg
);
2766 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2767 // allowing Pos and Neg to be wider than CmpOp.
2768 static bool isAbsolute(SDValue CmpOp
, SDValue Pos
, SDValue Neg
) {
2769 return (Neg
.getOpcode() == ISD::SUB
&&
2770 Neg
.getOperand(0).getOpcode() == ISD::Constant
&&
2771 cast
<ConstantSDNode
>(Neg
.getOperand(0))->getZExtValue() == 0 &&
2772 Neg
.getOperand(1) == Pos
&&
2774 (Pos
.getOpcode() == ISD::SIGN_EXTEND
&&
2775 Pos
.getOperand(0) == CmpOp
)));
2778 // Return the absolute or negative absolute of Op; IsNegative decides which.
2779 static SDValue
getAbsolute(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Op
,
2781 Op
= DAG
.getNode(SystemZISD::IABS
, DL
, Op
.getValueType(), Op
);
2783 Op
= DAG
.getNode(ISD::SUB
, DL
, Op
.getValueType(),
2784 DAG
.getConstant(0, DL
, Op
.getValueType()), Op
);
2788 SDValue
SystemZTargetLowering::lowerSELECT_CC(SDValue Op
,
2789 SelectionDAG
&DAG
) const {
2790 SDValue CmpOp0
= Op
.getOperand(0);
2791 SDValue CmpOp1
= Op
.getOperand(1);
2792 SDValue TrueOp
= Op
.getOperand(2);
2793 SDValue FalseOp
= Op
.getOperand(3);
2794 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
2797 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2799 // Check for absolute and negative-absolute selections, including those
2800 // where the comparison value is sign-extended (for LPGFR and LNGFR).
2801 // This check supplements the one in DAGCombiner.
2802 if (C
.Opcode
== SystemZISD::ICMP
&&
2803 C
.CCMask
!= SystemZ::CCMASK_CMP_EQ
&&
2804 C
.CCMask
!= SystemZ::CCMASK_CMP_NE
&&
2805 C
.Op1
.getOpcode() == ISD::Constant
&&
2806 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2807 if (isAbsolute(C
.Op0
, TrueOp
, FalseOp
))
2808 return getAbsolute(DAG
, DL
, TrueOp
, C
.CCMask
& SystemZ::CCMASK_CMP_LT
);
2809 if (isAbsolute(C
.Op0
, FalseOp
, TrueOp
))
2810 return getAbsolute(DAG
, DL
, FalseOp
, C
.CCMask
& SystemZ::CCMASK_CMP_GT
);
2813 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2814 SDValue Ops
[] = {TrueOp
, FalseOp
,
2815 DAG
.getTargetConstant(C
.CCValid
, DL
, MVT::i32
),
2816 DAG
.getTargetConstant(C
.CCMask
, DL
, MVT::i32
), CCReg
};
2818 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, Op
.getValueType(), Ops
);
2821 SDValue
SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode
*Node
,
2822 SelectionDAG
&DAG
) const {
2824 const GlobalValue
*GV
= Node
->getGlobal();
2825 int64_t Offset
= Node
->getOffset();
2826 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2827 CodeModel::Model CM
= DAG
.getTarget().getCodeModel();
2830 if (Subtarget
.isPC32DBLSymbol(GV
, CM
)) {
2831 // Assign anchors at 1<<12 byte boundaries.
2832 uint64_t Anchor
= Offset
& ~uint64_t(0xfff);
2833 Result
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, Anchor
);
2834 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
2836 // The offset can be folded into the address if it is aligned to a halfword.
2838 if (Offset
!= 0 && (Offset
& 1) == 0) {
2839 SDValue Full
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, Anchor
+ Offset
);
2840 Result
= DAG
.getNode(SystemZISD::PCREL_OFFSET
, DL
, PtrVT
, Full
, Result
);
2844 Result
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0, SystemZII::MO_GOT
);
2845 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
2846 Result
= DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Result
,
2847 MachinePointerInfo::getGOT(DAG
.getMachineFunction()));
2850 // If there was a non-zero offset that we didn't fold, create an explicit
2853 Result
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Result
,
2854 DAG
.getConstant(Offset
, DL
, PtrVT
));
2859 SDValue
SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode
*Node
,
2862 SDValue GOTOffset
) const {
2864 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2865 SDValue Chain
= DAG
.getEntryNode();
2868 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
2869 SDValue GOT
= DAG
.getGLOBAL_OFFSET_TABLE(PtrVT
);
2870 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R12D
, GOT
, Glue
);
2871 Glue
= Chain
.getValue(1);
2872 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R2D
, GOTOffset
, Glue
);
2873 Glue
= Chain
.getValue(1);
2875 // The first call operand is the chain and the second is the TLS symbol.
2876 SmallVector
<SDValue
, 8> Ops
;
2877 Ops
.push_back(Chain
);
2878 Ops
.push_back(DAG
.getTargetGlobalAddress(Node
->getGlobal(), DL
,
2879 Node
->getValueType(0),
2882 // Add argument registers to the end of the list so that they are
2883 // known live into the call.
2884 Ops
.push_back(DAG
.getRegister(SystemZ::R2D
, PtrVT
));
2885 Ops
.push_back(DAG
.getRegister(SystemZ::R12D
, PtrVT
));
2887 // Add a register mask operand representing the call-preserved registers.
2888 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
2889 const uint32_t *Mask
=
2890 TRI
->getCallPreservedMask(DAG
.getMachineFunction(), CallingConv::C
);
2891 assert(Mask
&& "Missing call preserved mask for calling convention");
2892 Ops
.push_back(DAG
.getRegisterMask(Mask
));
2894 // Glue the call to the argument copies.
2895 Ops
.push_back(Glue
);
2898 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
2899 Chain
= DAG
.getNode(Opcode
, DL
, NodeTys
, Ops
);
2900 Glue
= Chain
.getValue(1);
2902 // Copy the return value from %r2.
2903 return DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R2D
, PtrVT
, Glue
);
2906 SDValue
SystemZTargetLowering::lowerThreadPointer(const SDLoc
&DL
,
2907 SelectionDAG
&DAG
) const {
2908 SDValue Chain
= DAG
.getEntryNode();
2909 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2911 // The high part of the thread pointer is in access register 0.
2912 SDValue TPHi
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::A0
, MVT::i32
);
2913 TPHi
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, PtrVT
, TPHi
);
2915 // The low part of the thread pointer is in access register 1.
2916 SDValue TPLo
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::A1
, MVT::i32
);
2917 TPLo
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, PtrVT
, TPLo
);
2919 // Merge them into a single 64-bit address.
2920 SDValue TPHiShifted
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, TPHi
,
2921 DAG
.getConstant(32, DL
, PtrVT
));
2922 return DAG
.getNode(ISD::OR
, DL
, PtrVT
, TPHiShifted
, TPLo
);
2925 SDValue
SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode
*Node
,
2926 SelectionDAG
&DAG
) const {
2927 if (DAG
.getTarget().useEmulatedTLS())
2928 return LowerToTLSEmulatedModel(Node
, DAG
);
2930 const GlobalValue
*GV
= Node
->getGlobal();
2931 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2932 TLSModel::Model model
= DAG
.getTarget().getTLSModel(GV
);
2934 SDValue TP
= lowerThreadPointer(DL
, DAG
);
2936 // Get the offset of GA from the thread pointer, based on the TLS model.
2939 case TLSModel::GeneralDynamic
: {
2940 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
2941 SystemZConstantPoolValue
*CPV
=
2942 SystemZConstantPoolValue::Create(GV
, SystemZCP::TLSGD
);
2944 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
2945 Offset
= DAG
.getLoad(
2946 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
2947 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
2949 // Call __tls_get_offset to retrieve the offset.
2950 Offset
= lowerTLSGetOffset(Node
, DAG
, SystemZISD::TLS_GDCALL
, Offset
);
2954 case TLSModel::LocalDynamic
: {
2955 // Load the GOT offset of the module ID.
2956 SystemZConstantPoolValue
*CPV
=
2957 SystemZConstantPoolValue::Create(GV
, SystemZCP::TLSLDM
);
2959 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
2960 Offset
= DAG
.getLoad(
2961 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
2962 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
2964 // Call __tls_get_offset to retrieve the module base offset.
2965 Offset
= lowerTLSGetOffset(Node
, DAG
, SystemZISD::TLS_LDCALL
, Offset
);
2967 // Note: The SystemZLDCleanupPass will remove redundant computations
2968 // of the module base offset. Count total number of local-dynamic
2969 // accesses to trigger execution of that pass.
2970 SystemZMachineFunctionInfo
* MFI
=
2971 DAG
.getMachineFunction().getInfo
<SystemZMachineFunctionInfo
>();
2972 MFI
->incNumLocalDynamicTLSAccesses();
2974 // Add the per-symbol offset.
2975 CPV
= SystemZConstantPoolValue::Create(GV
, SystemZCP::DTPOFF
);
2977 SDValue DTPOffset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
2978 DTPOffset
= DAG
.getLoad(
2979 PtrVT
, DL
, DAG
.getEntryNode(), DTPOffset
,
2980 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
2982 Offset
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Offset
, DTPOffset
);
2986 case TLSModel::InitialExec
: {
2987 // Load the offset from the GOT.
2988 Offset
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0,
2989 SystemZII::MO_INDNTPOFF
);
2990 Offset
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Offset
);
2992 DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
2993 MachinePointerInfo::getGOT(DAG
.getMachineFunction()));
2997 case TLSModel::LocalExec
: {
2998 // Force the offset into the constant pool and load it from there.
2999 SystemZConstantPoolValue
*CPV
=
3000 SystemZConstantPoolValue::Create(GV
, SystemZCP::NTPOFF
);
3002 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
3003 Offset
= DAG
.getLoad(
3004 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
3005 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
3010 // Add the base and offset together.
3011 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, TP
, Offset
);
3014 SDValue
SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode
*Node
,
3015 SelectionDAG
&DAG
) const {
3017 const BlockAddress
*BA
= Node
->getBlockAddress();
3018 int64_t Offset
= Node
->getOffset();
3019 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3021 SDValue Result
= DAG
.getTargetBlockAddress(BA
, PtrVT
, Offset
);
3022 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
3026 SDValue
SystemZTargetLowering::lowerJumpTable(JumpTableSDNode
*JT
,
3027 SelectionDAG
&DAG
) const {
3029 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3030 SDValue Result
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
3032 // Use LARL to load the address of the table.
3033 return DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
3036 SDValue
SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode
*CP
,
3037 SelectionDAG
&DAG
) const {
3039 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3042 if (CP
->isMachineConstantPoolEntry())
3043 Result
= DAG
.getTargetConstantPool(CP
->getMachineCPVal(), PtrVT
,
3044 CP
->getAlignment());
3046 Result
= DAG
.getTargetConstantPool(CP
->getConstVal(), PtrVT
,
3047 CP
->getAlignment(), CP
->getOffset());
3049 // Use LARL to load the address of the constant pool entry.
3050 return DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
3053 SDValue
SystemZTargetLowering::lowerFRAMEADDR(SDValue Op
,
3054 SelectionDAG
&DAG
) const {
3055 MachineFunction
&MF
= DAG
.getMachineFunction();
3056 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3057 MFI
.setFrameAddressIsTaken(true);
3060 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
3061 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3063 // If the back chain frame index has not been allocated yet, do so.
3064 SystemZMachineFunctionInfo
*FI
= MF
.getInfo
<SystemZMachineFunctionInfo
>();
3065 int BackChainIdx
= FI
->getFramePointerSaveIndex();
3066 if (!BackChainIdx
) {
3067 // By definition, the frame address is the address of the back chain.
3068 BackChainIdx
= MFI
.CreateFixedObject(8, -SystemZMC::CallFrameSize
, false);
3069 FI
->setFramePointerSaveIndex(BackChainIdx
);
3071 SDValue BackChain
= DAG
.getFrameIndex(BackChainIdx
, PtrVT
);
3073 // FIXME The frontend should detect this case.
3075 report_fatal_error("Unsupported stack frame traversal count");
3081 SDValue
SystemZTargetLowering::lowerRETURNADDR(SDValue Op
,
3082 SelectionDAG
&DAG
) const {
3083 MachineFunction
&MF
= DAG
.getMachineFunction();
3084 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3085 MFI
.setReturnAddressIsTaken(true);
3087 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
3091 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
3092 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3094 // FIXME The frontend should detect this case.
3096 report_fatal_error("Unsupported stack frame traversal count");
3099 // Return R14D, which has the return address. Mark it an implicit live-in.
3100 unsigned LinkReg
= MF
.addLiveIn(SystemZ::R14D
, &SystemZ::GR64BitRegClass
);
3101 return DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, LinkReg
, PtrVT
);
3104 SDValue
SystemZTargetLowering::lowerBITCAST(SDValue Op
,
3105 SelectionDAG
&DAG
) const {
3107 SDValue In
= Op
.getOperand(0);
3108 EVT InVT
= In
.getValueType();
3109 EVT ResVT
= Op
.getValueType();
3111 // Convert loads directly. This is normally done by DAGCombiner,
3112 // but we need this case for bitcasts that are created during lowering
3113 // and which are then lowered themselves.
3114 if (auto *LoadN
= dyn_cast
<LoadSDNode
>(In
))
3115 if (ISD::isNormalLoad(LoadN
)) {
3116 SDValue NewLoad
= DAG
.getLoad(ResVT
, DL
, LoadN
->getChain(),
3117 LoadN
->getBasePtr(), LoadN
->getMemOperand());
3118 // Update the chain uses.
3119 DAG
.ReplaceAllUsesOfValueWith(SDValue(LoadN
, 1), NewLoad
.getValue(1));
3123 if (InVT
== MVT::i32
&& ResVT
== MVT::f32
) {
3125 if (Subtarget
.hasHighWord()) {
3126 SDNode
*U64
= DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
,
3128 In64
= DAG
.getTargetInsertSubreg(SystemZ::subreg_h32
, DL
,
3129 MVT::i64
, SDValue(U64
, 0), In
);
3131 In64
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, In
);
3132 In64
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, In64
,
3133 DAG
.getConstant(32, DL
, MVT::i64
));
3135 SDValue Out64
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::f64
, In64
);
3136 return DAG
.getTargetExtractSubreg(SystemZ::subreg_h32
,
3137 DL
, MVT::f32
, Out64
);
3139 if (InVT
== MVT::f32
&& ResVT
== MVT::i32
) {
3140 SDNode
*U64
= DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MVT::f64
);
3141 SDValue In64
= DAG
.getTargetInsertSubreg(SystemZ::subreg_h32
, DL
,
3142 MVT::f64
, SDValue(U64
, 0), In
);
3143 SDValue Out64
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i64
, In64
);
3144 if (Subtarget
.hasHighWord())
3145 return DAG
.getTargetExtractSubreg(SystemZ::subreg_h32
, DL
,
3147 SDValue Shift
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Out64
,
3148 DAG
.getConstant(32, DL
, MVT::i64
));
3149 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Shift
);
3151 llvm_unreachable("Unexpected bitcast combination");
3154 SDValue
SystemZTargetLowering::lowerVASTART(SDValue Op
,
3155 SelectionDAG
&DAG
) const {
3156 MachineFunction
&MF
= DAG
.getMachineFunction();
3157 SystemZMachineFunctionInfo
*FuncInfo
=
3158 MF
.getInfo
<SystemZMachineFunctionInfo
>();
3159 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3161 SDValue Chain
= Op
.getOperand(0);
3162 SDValue Addr
= Op
.getOperand(1);
3163 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
3166 // The initial values of each field.
3167 const unsigned NumFields
= 4;
3168 SDValue Fields
[NumFields
] = {
3169 DAG
.getConstant(FuncInfo
->getVarArgsFirstGPR(), DL
, PtrVT
),
3170 DAG
.getConstant(FuncInfo
->getVarArgsFirstFPR(), DL
, PtrVT
),
3171 DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
),
3172 DAG
.getFrameIndex(FuncInfo
->getRegSaveFrameIndex(), PtrVT
)
3175 // Store each field into its respective slot.
3176 SDValue MemOps
[NumFields
];
3177 unsigned Offset
= 0;
3178 for (unsigned I
= 0; I
< NumFields
; ++I
) {
3179 SDValue FieldAddr
= Addr
;
3181 FieldAddr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, FieldAddr
,
3182 DAG
.getIntPtrConstant(Offset
, DL
));
3183 MemOps
[I
] = DAG
.getStore(Chain
, DL
, Fields
[I
], FieldAddr
,
3184 MachinePointerInfo(SV
, Offset
));
3187 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOps
);
3190 SDValue
SystemZTargetLowering::lowerVACOPY(SDValue Op
,
3191 SelectionDAG
&DAG
) const {
3192 SDValue Chain
= Op
.getOperand(0);
3193 SDValue DstPtr
= Op
.getOperand(1);
3194 SDValue SrcPtr
= Op
.getOperand(2);
3195 const Value
*DstSV
= cast
<SrcValueSDNode
>(Op
.getOperand(3))->getValue();
3196 const Value
*SrcSV
= cast
<SrcValueSDNode
>(Op
.getOperand(4))->getValue();
3199 return DAG
.getMemcpy(Chain
, DL
, DstPtr
, SrcPtr
, DAG
.getIntPtrConstant(32, DL
),
3200 /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
3201 /*isTailCall*/false,
3202 MachinePointerInfo(DstSV
), MachinePointerInfo(SrcSV
));
3205 SDValue
SystemZTargetLowering::
3206 lowerDYNAMIC_STACKALLOC(SDValue Op
, SelectionDAG
&DAG
) const {
3207 const TargetFrameLowering
*TFI
= Subtarget
.getFrameLowering();
3208 MachineFunction
&MF
= DAG
.getMachineFunction();
3209 bool RealignOpt
= !MF
.getFunction().hasFnAttribute("no-realign-stack");
3210 bool StoreBackchain
= MF
.getFunction().hasFnAttribute("backchain");
3212 SDValue Chain
= Op
.getOperand(0);
3213 SDValue Size
= Op
.getOperand(1);
3214 SDValue Align
= Op
.getOperand(2);
3217 // If user has set the no alignment function attribute, ignore
3218 // alloca alignments.
3219 uint64_t AlignVal
= (RealignOpt
?
3220 dyn_cast
<ConstantSDNode
>(Align
)->getZExtValue() : 0);
3222 uint64_t StackAlign
= TFI
->getStackAlignment();
3223 uint64_t RequiredAlign
= std::max(AlignVal
, StackAlign
);
3224 uint64_t ExtraAlignSpace
= RequiredAlign
- StackAlign
;
3226 unsigned SPReg
= getStackPointerRegisterToSaveRestore();
3227 SDValue NeededSpace
= Size
;
3229 // Get a reference to the stack pointer.
3230 SDValue OldSP
= DAG
.getCopyFromReg(Chain
, DL
, SPReg
, MVT::i64
);
3232 // If we need a backchain, save it now.
3235 Backchain
= DAG
.getLoad(MVT::i64
, DL
, Chain
, OldSP
, MachinePointerInfo());
3237 // Add extra space for alignment if needed.
3238 if (ExtraAlignSpace
)
3239 NeededSpace
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, NeededSpace
,
3240 DAG
.getConstant(ExtraAlignSpace
, DL
, MVT::i64
));
3242 // Get the new stack pointer value.
3243 SDValue NewSP
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, OldSP
, NeededSpace
);
3245 // Copy the new stack pointer back.
3246 Chain
= DAG
.getCopyToReg(Chain
, DL
, SPReg
, NewSP
);
3248 // The allocated data lives above the 160 bytes allocated for the standard
3249 // frame, plus any outgoing stack arguments. We don't know how much that
3250 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3251 SDValue ArgAdjust
= DAG
.getNode(SystemZISD::ADJDYNALLOC
, DL
, MVT::i64
);
3252 SDValue Result
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, NewSP
, ArgAdjust
);
3254 // Dynamically realign if needed.
3255 if (RequiredAlign
> StackAlign
) {
3257 DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Result
,
3258 DAG
.getConstant(ExtraAlignSpace
, DL
, MVT::i64
));
3260 DAG
.getNode(ISD::AND
, DL
, MVT::i64
, Result
,
3261 DAG
.getConstant(~(RequiredAlign
- 1), DL
, MVT::i64
));
3265 Chain
= DAG
.getStore(Chain
, DL
, Backchain
, NewSP
, MachinePointerInfo());
3267 SDValue Ops
[2] = { Result
, Chain
};
3268 return DAG
.getMergeValues(Ops
, DL
);
3271 SDValue
SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3272 SDValue Op
, SelectionDAG
&DAG
) const {
3275 return DAG
.getNode(SystemZISD::ADJDYNALLOC
, DL
, MVT::i64
);
3278 SDValue
SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op
,
3279 SelectionDAG
&DAG
) const {
3280 EVT VT
= Op
.getValueType();
3284 // Just do a normal 64-bit multiplication and extract the results.
3285 // We define this so that it can be used for constant division.
3286 lowerMUL_LOHI32(DAG
, DL
, ISD::SIGN_EXTEND
, Op
.getOperand(0),
3287 Op
.getOperand(1), Ops
[1], Ops
[0]);
3288 else if (Subtarget
.hasMiscellaneousExtensions2())
3289 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3290 // the high result in the even register. ISD::SMUL_LOHI is defined to
3291 // return the low half first, so the results are in reverse order.
3292 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::SMUL_LOHI
,
3293 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3295 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3297 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3299 // but using the fact that the upper halves are either all zeros
3302 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3304 // and grouping the right terms together since they are quicker than the
3307 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3308 SDValue C63
= DAG
.getConstant(63, DL
, MVT::i64
);
3309 SDValue LL
= Op
.getOperand(0);
3310 SDValue RL
= Op
.getOperand(1);
3311 SDValue LH
= DAG
.getNode(ISD::SRA
, DL
, VT
, LL
, C63
);
3312 SDValue RH
= DAG
.getNode(ISD::SRA
, DL
, VT
, RL
, C63
);
3313 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3314 // the high result in the even register. ISD::SMUL_LOHI is defined to
3315 // return the low half first, so the results are in reverse order.
3316 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UMUL_LOHI
,
3317 LL
, RL
, Ops
[1], Ops
[0]);
3318 SDValue NegLLTimesRH
= DAG
.getNode(ISD::AND
, DL
, VT
, LL
, RH
);
3319 SDValue NegLHTimesRL
= DAG
.getNode(ISD::AND
, DL
, VT
, LH
, RL
);
3320 SDValue NegSum
= DAG
.getNode(ISD::ADD
, DL
, VT
, NegLLTimesRH
, NegLHTimesRL
);
3321 Ops
[1] = DAG
.getNode(ISD::SUB
, DL
, VT
, Ops
[1], NegSum
);
3323 return DAG
.getMergeValues(Ops
, DL
);
3326 SDValue
SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op
,
3327 SelectionDAG
&DAG
) const {
3328 EVT VT
= Op
.getValueType();
3332 // Just do a normal 64-bit multiplication and extract the results.
3333 // We define this so that it can be used for constant division.
3334 lowerMUL_LOHI32(DAG
, DL
, ISD::ZERO_EXTEND
, Op
.getOperand(0),
3335 Op
.getOperand(1), Ops
[1], Ops
[0]);
3337 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3338 // the high result in the even register. ISD::UMUL_LOHI is defined to
3339 // return the low half first, so the results are in reverse order.
3340 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UMUL_LOHI
,
3341 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3342 return DAG
.getMergeValues(Ops
, DL
);
3345 SDValue
SystemZTargetLowering::lowerSDIVREM(SDValue Op
,
3346 SelectionDAG
&DAG
) const {
3347 SDValue Op0
= Op
.getOperand(0);
3348 SDValue Op1
= Op
.getOperand(1);
3349 EVT VT
= Op
.getValueType();
3352 // We use DSGF for 32-bit division. This means the first operand must
3353 // always be 64-bit, and the second operand should be 32-bit whenever
3354 // that is possible, to improve performance.
3356 Op0
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, Op0
);
3357 else if (DAG
.ComputeNumSignBits(Op1
) > 32)
3358 Op1
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Op1
);
3360 // DSG(F) returns the remainder in the even register and the
3361 // quotient in the odd register.
3363 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::SDIVREM
, Op0
, Op1
, Ops
[1], Ops
[0]);
3364 return DAG
.getMergeValues(Ops
, DL
);
3367 SDValue
SystemZTargetLowering::lowerUDIVREM(SDValue Op
,
3368 SelectionDAG
&DAG
) const {
3369 EVT VT
= Op
.getValueType();
3372 // DL(G) returns the remainder in the even register and the
3373 // quotient in the odd register.
3375 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UDIVREM
,
3376 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3377 return DAG
.getMergeValues(Ops
, DL
);
3380 SDValue
SystemZTargetLowering::lowerOR(SDValue Op
, SelectionDAG
&DAG
) const {
3381 assert(Op
.getValueType() == MVT::i64
&& "Should be 64-bit operation");
3383 // Get the known-zero masks for each operand.
3384 SDValue Ops
[] = {Op
.getOperand(0), Op
.getOperand(1)};
3385 KnownBits Known
[2] = {DAG
.computeKnownBits(Ops
[0]),
3386 DAG
.computeKnownBits(Ops
[1])};
3388 // See if the upper 32 bits of one operand and the lower 32 bits of the
3389 // other are known zero. They are the low and high operands respectively.
3390 uint64_t Masks
[] = { Known
[0].Zero
.getZExtValue(),
3391 Known
[1].Zero
.getZExtValue() };
3393 if ((Masks
[0] >> 32) == 0xffffffff && uint32_t(Masks
[1]) == 0xffffffff)
3395 else if ((Masks
[1] >> 32) == 0xffffffff && uint32_t(Masks
[0]) == 0xffffffff)
3400 SDValue LowOp
= Ops
[Low
];
3401 SDValue HighOp
= Ops
[High
];
3403 // If the high part is a constant, we're better off using IILH.
3404 if (HighOp
.getOpcode() == ISD::Constant
)
3407 // If the low part is a constant that is outside the range of LHI,
3408 // then we're better off using IILF.
3409 if (LowOp
.getOpcode() == ISD::Constant
) {
3410 int64_t Value
= int32_t(cast
<ConstantSDNode
>(LowOp
)->getZExtValue());
3411 if (!isInt
<16>(Value
))
3415 // Check whether the high part is an AND that doesn't change the
3416 // high 32 bits and just masks out low bits. We can skip it if so.
3417 if (HighOp
.getOpcode() == ISD::AND
&&
3418 HighOp
.getOperand(1).getOpcode() == ISD::Constant
) {
3419 SDValue HighOp0
= HighOp
.getOperand(0);
3420 uint64_t Mask
= cast
<ConstantSDNode
>(HighOp
.getOperand(1))->getZExtValue();
3421 if (DAG
.MaskedValueIsZero(HighOp0
, APInt(64, ~(Mask
| 0xffffffff))))
3425 // Take advantage of the fact that all GR32 operations only change the
3426 // low 32 bits by truncating Low to an i32 and inserting it directly
3427 // using a subreg. The interesting cases are those where the truncation
3430 SDValue Low32
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, LowOp
);
3431 return DAG
.getTargetInsertSubreg(SystemZ::subreg_l32
, DL
,
3432 MVT::i64
, HighOp
, Low32
);
3435 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3436 SDValue
SystemZTargetLowering::lowerXALUO(SDValue Op
,
3437 SelectionDAG
&DAG
) const {
3438 SDNode
*N
= Op
.getNode();
3439 SDValue LHS
= N
->getOperand(0);
3440 SDValue RHS
= N
->getOperand(1);
3442 unsigned BaseOp
= 0;
3443 unsigned CCValid
= 0;
3444 unsigned CCMask
= 0;
3446 switch (Op
.getOpcode()) {
3447 default: llvm_unreachable("Unknown instruction!");
3449 BaseOp
= SystemZISD::SADDO
;
3450 CCValid
= SystemZ::CCMASK_ARITH
;
3451 CCMask
= SystemZ::CCMASK_ARITH_OVERFLOW
;
3454 BaseOp
= SystemZISD::SSUBO
;
3455 CCValid
= SystemZ::CCMASK_ARITH
;
3456 CCMask
= SystemZ::CCMASK_ARITH_OVERFLOW
;
3459 BaseOp
= SystemZISD::UADDO
;
3460 CCValid
= SystemZ::CCMASK_LOGICAL
;
3461 CCMask
= SystemZ::CCMASK_LOGICAL_CARRY
;
3464 BaseOp
= SystemZISD::USUBO
;
3465 CCValid
= SystemZ::CCMASK_LOGICAL
;
3466 CCMask
= SystemZ::CCMASK_LOGICAL_BORROW
;
3470 SDVTList VTs
= DAG
.getVTList(N
->getValueType(0), MVT::i32
);
3471 SDValue Result
= DAG
.getNode(BaseOp
, DL
, VTs
, LHS
, RHS
);
3473 SDValue SetCC
= emitSETCC(DAG
, DL
, Result
.getValue(1), CCValid
, CCMask
);
3474 if (N
->getValueType(1) == MVT::i1
)
3475 SetCC
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, SetCC
);
3477 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, N
->getVTList(), Result
, SetCC
);
3480 static bool isAddCarryChain(SDValue Carry
) {
3481 while (Carry
.getOpcode() == ISD::ADDCARRY
)
3482 Carry
= Carry
.getOperand(2);
3483 return Carry
.getOpcode() == ISD::UADDO
;
3486 static bool isSubBorrowChain(SDValue Carry
) {
3487 while (Carry
.getOpcode() == ISD::SUBCARRY
)
3488 Carry
= Carry
.getOperand(2);
3489 return Carry
.getOpcode() == ISD::USUBO
;
3492 // Lower ADDCARRY/SUBCARRY nodes.
3493 SDValue
SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op
,
3494 SelectionDAG
&DAG
) const {
3496 SDNode
*N
= Op
.getNode();
3497 MVT VT
= N
->getSimpleValueType(0);
3499 // Let legalize expand this if it isn't a legal type yet.
3500 if (!DAG
.getTargetLoweringInfo().isTypeLegal(VT
))
3503 SDValue LHS
= N
->getOperand(0);
3504 SDValue RHS
= N
->getOperand(1);
3505 SDValue Carry
= Op
.getOperand(2);
3507 unsigned BaseOp
= 0;
3508 unsigned CCValid
= 0;
3509 unsigned CCMask
= 0;
3511 switch (Op
.getOpcode()) {
3512 default: llvm_unreachable("Unknown instruction!");
3514 if (!isAddCarryChain(Carry
))
3517 BaseOp
= SystemZISD::ADDCARRY
;
3518 CCValid
= SystemZ::CCMASK_LOGICAL
;
3519 CCMask
= SystemZ::CCMASK_LOGICAL_CARRY
;
3522 if (!isSubBorrowChain(Carry
))
3525 BaseOp
= SystemZISD::SUBCARRY
;
3526 CCValid
= SystemZ::CCMASK_LOGICAL
;
3527 CCMask
= SystemZ::CCMASK_LOGICAL_BORROW
;
3531 // Set the condition code from the carry flag.
3532 Carry
= DAG
.getNode(SystemZISD::GET_CCMASK
, DL
, MVT::i32
, Carry
,
3533 DAG
.getConstant(CCValid
, DL
, MVT::i32
),
3534 DAG
.getConstant(CCMask
, DL
, MVT::i32
));
3536 SDVTList VTs
= DAG
.getVTList(VT
, MVT::i32
);
3537 SDValue Result
= DAG
.getNode(BaseOp
, DL
, VTs
, LHS
, RHS
, Carry
);
3539 SDValue SetCC
= emitSETCC(DAG
, DL
, Result
.getValue(1), CCValid
, CCMask
);
3540 if (N
->getValueType(1) == MVT::i1
)
3541 SetCC
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, SetCC
);
3543 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, N
->getVTList(), Result
, SetCC
);
3546 SDValue
SystemZTargetLowering::lowerCTPOP(SDValue Op
,
3547 SelectionDAG
&DAG
) const {
3548 EVT VT
= Op
.getValueType();
3550 Op
= Op
.getOperand(0);
3552 // Handle vector types via VPOPCT.
3553 if (VT
.isVector()) {
3554 Op
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v16i8
, Op
);
3555 Op
= DAG
.getNode(SystemZISD::POPCNT
, DL
, MVT::v16i8
, Op
);
3556 switch (VT
.getScalarSizeInBits()) {
3560 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
3561 SDValue Shift
= DAG
.getConstant(8, DL
, MVT::i32
);
3562 SDValue Tmp
= DAG
.getNode(SystemZISD::VSHL_BY_SCALAR
, DL
, VT
, Op
, Shift
);
3563 Op
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op
, Tmp
);
3564 Op
= DAG
.getNode(SystemZISD::VSRL_BY_SCALAR
, DL
, VT
, Op
, Shift
);
3568 SDValue Tmp
= DAG
.getSplatBuildVector(MVT::v16i8
, DL
,
3569 DAG
.getConstant(0, DL
, MVT::i32
));
3570 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, VT
, Op
, Tmp
);
3574 SDValue Tmp
= DAG
.getSplatBuildVector(MVT::v16i8
, DL
,
3575 DAG
.getConstant(0, DL
, MVT::i32
));
3576 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, MVT::v4i32
, Op
, Tmp
);
3577 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, VT
, Op
, Tmp
);
3581 llvm_unreachable("Unexpected type");
3586 // Get the known-zero mask for the operand.
3587 KnownBits Known
= DAG
.computeKnownBits(Op
);
3588 unsigned NumSignificantBits
= (~Known
.Zero
).getActiveBits();
3589 if (NumSignificantBits
== 0)
3590 return DAG
.getConstant(0, DL
, VT
);
3592 // Skip known-zero high parts of the operand.
3593 int64_t OrigBitSize
= VT
.getSizeInBits();
3594 int64_t BitSize
= (int64_t)1 << Log2_32_Ceil(NumSignificantBits
);
3595 BitSize
= std::min(BitSize
, OrigBitSize
);
3597 // The POPCNT instruction counts the number of bits in each byte.
3598 Op
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
);
3599 Op
= DAG
.getNode(SystemZISD::POPCNT
, DL
, MVT::i64
, Op
);
3600 Op
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Op
);
3602 // Add up per-byte counts in a binary tree. All bits of Op at
3603 // position larger than BitSize remain zero throughout.
3604 for (int64_t I
= BitSize
/ 2; I
>= 8; I
= I
/ 2) {
3605 SDValue Tmp
= DAG
.getNode(ISD::SHL
, DL
, VT
, Op
, DAG
.getConstant(I
, DL
, VT
));
3606 if (BitSize
!= OrigBitSize
)
3607 Tmp
= DAG
.getNode(ISD::AND
, DL
, VT
, Tmp
,
3608 DAG
.getConstant(((uint64_t)1 << BitSize
) - 1, DL
, VT
));
3609 Op
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op
, Tmp
);
3612 // Extract overall result from high byte.
3614 Op
= DAG
.getNode(ISD::SRL
, DL
, VT
, Op
,
3615 DAG
.getConstant(BitSize
- 8, DL
, VT
));
3620 SDValue
SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op
,
3621 SelectionDAG
&DAG
) const {
3623 AtomicOrdering FenceOrdering
= static_cast<AtomicOrdering
>(
3624 cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue());
3625 SyncScope::ID FenceSSID
= static_cast<SyncScope::ID
>(
3626 cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue());
3628 // The only fence that needs an instruction is a sequentially-consistent
3629 // cross-thread fence.
3630 if (FenceOrdering
== AtomicOrdering::SequentiallyConsistent
&&
3631 FenceSSID
== SyncScope::System
) {
3632 return SDValue(DAG
.getMachineNode(SystemZ::Serialize
, DL
, MVT::Other
,
3637 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3638 return DAG
.getNode(SystemZISD::MEMBARRIER
, DL
, MVT::Other
, Op
.getOperand(0));
3641 // Op is an atomic load. Lower it into a normal volatile load.
3642 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op
,
3643 SelectionDAG
&DAG
) const {
3644 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3645 return DAG
.getExtLoad(ISD::EXTLOAD
, SDLoc(Op
), Op
.getValueType(),
3646 Node
->getChain(), Node
->getBasePtr(),
3647 Node
->getMemoryVT(), Node
->getMemOperand());
3650 // Op is an atomic store. Lower it into a normal volatile store.
3651 SDValue
SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op
,
3652 SelectionDAG
&DAG
) const {
3653 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3654 SDValue Chain
= DAG
.getTruncStore(Node
->getChain(), SDLoc(Op
), Node
->getVal(),
3655 Node
->getBasePtr(), Node
->getMemoryVT(),
3656 Node
->getMemOperand());
3657 // We have to enforce sequential consistency by performing a
3658 // serialization operation after the store.
3659 if (Node
->getOrdering() == AtomicOrdering::SequentiallyConsistent
)
3660 Chain
= SDValue(DAG
.getMachineNode(SystemZ::Serialize
, SDLoc(Op
),
3661 MVT::Other
, Chain
), 0);
3665 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3666 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3667 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op
,
3669 unsigned Opcode
) const {
3670 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3672 // 32-bit operations need no code outside the main loop.
3673 EVT NarrowVT
= Node
->getMemoryVT();
3674 EVT WideVT
= MVT::i32
;
3675 if (NarrowVT
== WideVT
)
3678 int64_t BitSize
= NarrowVT
.getSizeInBits();
3679 SDValue ChainIn
= Node
->getChain();
3680 SDValue Addr
= Node
->getBasePtr();
3681 SDValue Src2
= Node
->getVal();
3682 MachineMemOperand
*MMO
= Node
->getMemOperand();
3684 EVT PtrVT
= Addr
.getValueType();
3686 // Convert atomic subtracts of constants into additions.
3687 if (Opcode
== SystemZISD::ATOMIC_LOADW_SUB
)
3688 if (auto *Const
= dyn_cast
<ConstantSDNode
>(Src2
)) {
3689 Opcode
= SystemZISD::ATOMIC_LOADW_ADD
;
3690 Src2
= DAG
.getConstant(-Const
->getSExtValue(), DL
, Src2
.getValueType());
3693 // Get the address of the containing word.
3694 SDValue AlignedAddr
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, Addr
,
3695 DAG
.getConstant(-4, DL
, PtrVT
));
3697 // Get the number of bits that the word must be rotated left in order
3698 // to bring the field to the top bits of a GR32.
3699 SDValue BitShift
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, Addr
,
3700 DAG
.getConstant(3, DL
, PtrVT
));
3701 BitShift
= DAG
.getNode(ISD::TRUNCATE
, DL
, WideVT
, BitShift
);
3703 // Get the complementing shift amount, for rotating a field in the top
3704 // bits back to its proper position.
3705 SDValue NegBitShift
= DAG
.getNode(ISD::SUB
, DL
, WideVT
,
3706 DAG
.getConstant(0, DL
, WideVT
), BitShift
);
3708 // Extend the source operand to 32 bits and prepare it for the inner loop.
3709 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3710 // operations require the source to be shifted in advance. (This shift
3711 // can be folded if the source is constant.) For AND and NAND, the lower
3712 // bits must be set, while for other opcodes they should be left clear.
3713 if (Opcode
!= SystemZISD::ATOMIC_SWAPW
)
3714 Src2
= DAG
.getNode(ISD::SHL
, DL
, WideVT
, Src2
,
3715 DAG
.getConstant(32 - BitSize
, DL
, WideVT
));
3716 if (Opcode
== SystemZISD::ATOMIC_LOADW_AND
||
3717 Opcode
== SystemZISD::ATOMIC_LOADW_NAND
)
3718 Src2
= DAG
.getNode(ISD::OR
, DL
, WideVT
, Src2
,
3719 DAG
.getConstant(uint32_t(-1) >> BitSize
, DL
, WideVT
));
3721 // Construct the ATOMIC_LOADW_* node.
3722 SDVTList VTList
= DAG
.getVTList(WideVT
, MVT::Other
);
3723 SDValue Ops
[] = { ChainIn
, AlignedAddr
, Src2
, BitShift
, NegBitShift
,
3724 DAG
.getConstant(BitSize
, DL
, WideVT
) };
3725 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(Opcode
, DL
, VTList
, Ops
,
3728 // Rotate the result of the final CS so that the field is in the lower
3729 // bits of a GR32, then truncate it.
3730 SDValue ResultShift
= DAG
.getNode(ISD::ADD
, DL
, WideVT
, BitShift
,
3731 DAG
.getConstant(BitSize
, DL
, WideVT
));
3732 SDValue Result
= DAG
.getNode(ISD::ROTL
, DL
, WideVT
, AtomicOp
, ResultShift
);
3734 SDValue RetOps
[2] = { Result
, AtomicOp
.getValue(1) };
3735 return DAG
.getMergeValues(RetOps
, DL
);
3738 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
3739 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3740 // operations into additions.
3741 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op
,
3742 SelectionDAG
&DAG
) const {
3743 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3744 EVT MemVT
= Node
->getMemoryVT();
3745 if (MemVT
== MVT::i32
|| MemVT
== MVT::i64
) {
3746 // A full-width operation.
3747 assert(Op
.getValueType() == MemVT
&& "Mismatched VTs");
3748 SDValue Src2
= Node
->getVal();
3752 if (auto *Op2
= dyn_cast
<ConstantSDNode
>(Src2
)) {
3753 // Use an addition if the operand is constant and either LAA(G) is
3754 // available or the negative value is in the range of A(G)FHI.
3755 int64_t Value
= (-Op2
->getAPIntValue()).getSExtValue();
3756 if (isInt
<32>(Value
) || Subtarget
.hasInterlockedAccess1())
3757 NegSrc2
= DAG
.getConstant(Value
, DL
, MemVT
);
3758 } else if (Subtarget
.hasInterlockedAccess1())
3759 // Use LAA(G) if available.
3760 NegSrc2
= DAG
.getNode(ISD::SUB
, DL
, MemVT
, DAG
.getConstant(0, DL
, MemVT
),
3763 if (NegSrc2
.getNode())
3764 return DAG
.getAtomic(ISD::ATOMIC_LOAD_ADD
, DL
, MemVT
,
3765 Node
->getChain(), Node
->getBasePtr(), NegSrc2
,
3766 Node
->getMemOperand());
3768 // Use the node as-is.
3772 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_SUB
);
3775 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
3776 SDValue
SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op
,
3777 SelectionDAG
&DAG
) const {
3778 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3779 SDValue ChainIn
= Node
->getOperand(0);
3780 SDValue Addr
= Node
->getOperand(1);
3781 SDValue CmpVal
= Node
->getOperand(2);
3782 SDValue SwapVal
= Node
->getOperand(3);
3783 MachineMemOperand
*MMO
= Node
->getMemOperand();
3786 // We have native support for 32-bit and 64-bit compare and swap, but we
3787 // still need to expand extracting the "success" result from the CC.
3788 EVT NarrowVT
= Node
->getMemoryVT();
3789 EVT WideVT
= NarrowVT
== MVT::i64
? MVT::i64
: MVT::i32
;
3790 if (NarrowVT
== WideVT
) {
3791 SDVTList Tys
= DAG
.getVTList(WideVT
, MVT::i32
, MVT::Other
);
3792 SDValue Ops
[] = { ChainIn
, Addr
, CmpVal
, SwapVal
};
3793 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP
,
3794 DL
, Tys
, Ops
, NarrowVT
, MMO
);
3795 SDValue Success
= emitSETCC(DAG
, DL
, AtomicOp
.getValue(1),
3796 SystemZ::CCMASK_CS
, SystemZ::CCMASK_CS_EQ
);
3798 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(0), AtomicOp
.getValue(0));
3799 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(1), Success
);
3800 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(2), AtomicOp
.getValue(2));
3804 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
3805 // via a fullword ATOMIC_CMP_SWAPW operation.
3806 int64_t BitSize
= NarrowVT
.getSizeInBits();
3807 EVT PtrVT
= Addr
.getValueType();
3809 // Get the address of the containing word.
3810 SDValue AlignedAddr
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, Addr
,
3811 DAG
.getConstant(-4, DL
, PtrVT
));
3813 // Get the number of bits that the word must be rotated left in order
3814 // to bring the field to the top bits of a GR32.
3815 SDValue BitShift
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, Addr
,
3816 DAG
.getConstant(3, DL
, PtrVT
));
3817 BitShift
= DAG
.getNode(ISD::TRUNCATE
, DL
, WideVT
, BitShift
);
3819 // Get the complementing shift amount, for rotating a field in the top
3820 // bits back to its proper position.
3821 SDValue NegBitShift
= DAG
.getNode(ISD::SUB
, DL
, WideVT
,
3822 DAG
.getConstant(0, DL
, WideVT
), BitShift
);
3824 // Construct the ATOMIC_CMP_SWAPW node.
3825 SDVTList VTList
= DAG
.getVTList(WideVT
, MVT::i32
, MVT::Other
);
3826 SDValue Ops
[] = { ChainIn
, AlignedAddr
, CmpVal
, SwapVal
, BitShift
,
3827 NegBitShift
, DAG
.getConstant(BitSize
, DL
, WideVT
) };
3828 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW
, DL
,
3829 VTList
, Ops
, NarrowVT
, MMO
);
3830 SDValue Success
= emitSETCC(DAG
, DL
, AtomicOp
.getValue(1),
3831 SystemZ::CCMASK_ICMP
, SystemZ::CCMASK_CMP_EQ
);
3833 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(0), AtomicOp
.getValue(0));
3834 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(1), Success
);
3835 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(2), AtomicOp
.getValue(2));
3839 MachineMemOperand::Flags
3840 SystemZTargetLowering::getMMOFlags(const Instruction
&I
) const {
3841 // Because of how we convert atomic_load and atomic_store to normal loads and
3842 // stores in the DAG, we need to ensure that the MMOs are marked volatile
3843 // since DAGCombine hasn't been updated to account for atomic, but non
3844 // volatile loads. (See D57601)
3845 if (auto *SI
= dyn_cast
<StoreInst
>(&I
))
3847 return MachineMemOperand::MOVolatile
;
3848 if (auto *LI
= dyn_cast
<LoadInst
>(&I
))
3850 return MachineMemOperand::MOVolatile
;
3851 if (auto *AI
= dyn_cast
<AtomicRMWInst
>(&I
))
3853 return MachineMemOperand::MOVolatile
;
3854 if (auto *AI
= dyn_cast
<AtomicCmpXchgInst
>(&I
))
3856 return MachineMemOperand::MOVolatile
;
3857 return MachineMemOperand::MONone
;
3860 SDValue
SystemZTargetLowering::lowerSTACKSAVE(SDValue Op
,
3861 SelectionDAG
&DAG
) const {
3862 MachineFunction
&MF
= DAG
.getMachineFunction();
3863 MF
.getInfo
<SystemZMachineFunctionInfo
>()->setManipulatesSP(true);
3864 return DAG
.getCopyFromReg(Op
.getOperand(0), SDLoc(Op
),
3865 SystemZ::R15D
, Op
.getValueType());
3868 SDValue
SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op
,
3869 SelectionDAG
&DAG
) const {
3870 MachineFunction
&MF
= DAG
.getMachineFunction();
3871 MF
.getInfo
<SystemZMachineFunctionInfo
>()->setManipulatesSP(true);
3872 bool StoreBackchain
= MF
.getFunction().hasFnAttribute("backchain");
3874 SDValue Chain
= Op
.getOperand(0);
3875 SDValue NewSP
= Op
.getOperand(1);
3879 if (StoreBackchain
) {
3880 SDValue OldSP
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R15D
, MVT::i64
);
3881 Backchain
= DAG
.getLoad(MVT::i64
, DL
, Chain
, OldSP
, MachinePointerInfo());
3884 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R15D
, NewSP
);
3887 Chain
= DAG
.getStore(Chain
, DL
, Backchain
, NewSP
, MachinePointerInfo());
3892 SDValue
SystemZTargetLowering::lowerPREFETCH(SDValue Op
,
3893 SelectionDAG
&DAG
) const {
3894 bool IsData
= cast
<ConstantSDNode
>(Op
.getOperand(4))->getZExtValue();
3896 // Just preserve the chain.
3897 return Op
.getOperand(0);
3900 bool IsWrite
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue();
3901 unsigned Code
= IsWrite
? SystemZ::PFD_WRITE
: SystemZ::PFD_READ
;
3902 auto *Node
= cast
<MemIntrinsicSDNode
>(Op
.getNode());
3903 SDValue Ops
[] = {Op
.getOperand(0), DAG
.getTargetConstant(Code
, DL
, MVT::i32
),
3905 return DAG
.getMemIntrinsicNode(SystemZISD::PREFETCH
, DL
,
3906 Node
->getVTList(), Ops
,
3907 Node
->getMemoryVT(), Node
->getMemOperand());
3910 // Convert condition code in CCReg to an i32 value.
3911 static SDValue
getCCResult(SelectionDAG
&DAG
, SDValue CCReg
) {
3913 SDValue IPM
= DAG
.getNode(SystemZISD::IPM
, DL
, MVT::i32
, CCReg
);
3914 return DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, IPM
,
3915 DAG
.getConstant(SystemZ::IPM_CC
, DL
, MVT::i32
));
3919 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op
,
3920 SelectionDAG
&DAG
) const {
3921 unsigned Opcode
, CCValid
;
3922 if (isIntrinsicWithCCAndChain(Op
, Opcode
, CCValid
)) {
3923 assert(Op
->getNumValues() == 2 && "Expected only CC result and chain");
3924 SDNode
*Node
= emitIntrinsicWithCCAndChain(DAG
, Op
, Opcode
);
3925 SDValue CC
= getCCResult(DAG
, SDValue(Node
, 0));
3926 DAG
.ReplaceAllUsesOfValueWith(SDValue(Op
.getNode(), 0), CC
);
3934 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op
,
3935 SelectionDAG
&DAG
) const {
3936 unsigned Opcode
, CCValid
;
3937 if (isIntrinsicWithCC(Op
, Opcode
, CCValid
)) {
3938 SDNode
*Node
= emitIntrinsicWithCC(DAG
, Op
, Opcode
);
3939 if (Op
->getNumValues() == 1)
3940 return getCCResult(DAG
, SDValue(Node
, 0));
3941 assert(Op
->getNumValues() == 2 && "Expected a CC and non-CC result");
3942 return DAG
.getNode(ISD::MERGE_VALUES
, SDLoc(Op
), Op
->getVTList(),
3943 SDValue(Node
, 0), getCCResult(DAG
, SDValue(Node
, 1)));
3946 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
3948 case Intrinsic::thread_pointer
:
3949 return lowerThreadPointer(SDLoc(Op
), DAG
);
3951 case Intrinsic::s390_vpdi
:
3952 return DAG
.getNode(SystemZISD::PERMUTE_DWORDS
, SDLoc(Op
), Op
.getValueType(),
3953 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
3955 case Intrinsic::s390_vperm
:
3956 return DAG
.getNode(SystemZISD::PERMUTE
, SDLoc(Op
), Op
.getValueType(),
3957 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
3959 case Intrinsic::s390_vuphb
:
3960 case Intrinsic::s390_vuphh
:
3961 case Intrinsic::s390_vuphf
:
3962 return DAG
.getNode(SystemZISD::UNPACK_HIGH
, SDLoc(Op
), Op
.getValueType(),
3965 case Intrinsic::s390_vuplhb
:
3966 case Intrinsic::s390_vuplhh
:
3967 case Intrinsic::s390_vuplhf
:
3968 return DAG
.getNode(SystemZISD::UNPACKL_HIGH
, SDLoc(Op
), Op
.getValueType(),
3971 case Intrinsic::s390_vuplb
:
3972 case Intrinsic::s390_vuplhw
:
3973 case Intrinsic::s390_vuplf
:
3974 return DAG
.getNode(SystemZISD::UNPACK_LOW
, SDLoc(Op
), Op
.getValueType(),
3977 case Intrinsic::s390_vupllb
:
3978 case Intrinsic::s390_vupllh
:
3979 case Intrinsic::s390_vupllf
:
3980 return DAG
.getNode(SystemZISD::UNPACKL_LOW
, SDLoc(Op
), Op
.getValueType(),
3983 case Intrinsic::s390_vsumb
:
3984 case Intrinsic::s390_vsumh
:
3985 case Intrinsic::s390_vsumgh
:
3986 case Intrinsic::s390_vsumgf
:
3987 case Intrinsic::s390_vsumqf
:
3988 case Intrinsic::s390_vsumqg
:
3989 return DAG
.getNode(SystemZISD::VSUM
, SDLoc(Op
), Op
.getValueType(),
3990 Op
.getOperand(1), Op
.getOperand(2));
3997 // Says that SystemZISD operation Opcode can be used to perform the equivalent
3998 // of a VPERM with permute vector Bytes. If Opcode takes three operands,
3999 // Operand is the constant third operand, otherwise it is the number of
4000 // bytes in each element of the result.
4004 unsigned char Bytes
[SystemZ::VectorBytes
];
4008 static const Permute PermuteForms
[] = {
4010 { SystemZISD::MERGE_HIGH
, 8,
4011 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4013 { SystemZISD::MERGE_HIGH
, 4,
4014 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4016 { SystemZISD::MERGE_HIGH
, 2,
4017 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4019 { SystemZISD::MERGE_HIGH
, 1,
4020 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4022 { SystemZISD::MERGE_LOW
, 8,
4023 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4025 { SystemZISD::MERGE_LOW
, 4,
4026 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4028 { SystemZISD::MERGE_LOW
, 2,
4029 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4031 { SystemZISD::MERGE_LOW
, 1,
4032 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4034 { SystemZISD::PACK
, 4,
4035 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4037 { SystemZISD::PACK
, 2,
4038 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4040 { SystemZISD::PACK
, 1,
4041 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4042 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4043 { SystemZISD::PERMUTE_DWORDS
, 4,
4044 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4045 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4046 { SystemZISD::PERMUTE_DWORDS
, 1,
4047 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4050 // Called after matching a vector shuffle against a particular pattern.
4051 // Both the original shuffle and the pattern have two vector operands.
4052 // OpNos[0] is the operand of the original shuffle that should be used for
4053 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4054 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4055 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4056 // for operands 0 and 1 of the pattern.
4057 static bool chooseShuffleOpNos(int *OpNos
, unsigned &OpNo0
, unsigned &OpNo1
) {
4061 OpNo0
= OpNo1
= OpNos
[1];
4062 } else if (OpNos
[1] < 0) {
4063 OpNo0
= OpNo1
= OpNos
[0];
4071 // Bytes is a VPERM-like permute vector, except that -1 is used for
4072 // undefined bytes. Return true if the VPERM can be implemented using P.
4073 // When returning true set OpNo0 to the VPERM operand that should be
4074 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4076 // For example, if swapping the VPERM operands allows P to match, OpNo0
4077 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4078 // operand, but rewriting it to use two duplicated operands allows it to
4079 // match P, then OpNo0 and OpNo1 will be the same.
4080 static bool matchPermute(const SmallVectorImpl
<int> &Bytes
, const Permute
&P
,
4081 unsigned &OpNo0
, unsigned &OpNo1
) {
4082 int OpNos
[] = { -1, -1 };
4083 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
) {
4086 // Make sure that the two permute vectors use the same suboperand
4087 // byte number. Only the operand numbers (the high bits) are
4088 // allowed to differ.
4089 if ((Elt
^ P
.Bytes
[I
]) & (SystemZ::VectorBytes
- 1))
4091 int ModelOpNo
= P
.Bytes
[I
] / SystemZ::VectorBytes
;
4092 int RealOpNo
= unsigned(Elt
) / SystemZ::VectorBytes
;
4093 // Make sure that the operand mappings are consistent with previous
4095 if (OpNos
[ModelOpNo
] == 1 - RealOpNo
)
4097 OpNos
[ModelOpNo
] = RealOpNo
;
4100 return chooseShuffleOpNos(OpNos
, OpNo0
, OpNo1
);
4103 // As above, but search for a matching permute.
4104 static const Permute
*matchPermute(const SmallVectorImpl
<int> &Bytes
,
4105 unsigned &OpNo0
, unsigned &OpNo1
) {
4106 for (auto &P
: PermuteForms
)
4107 if (matchPermute(Bytes
, P
, OpNo0
, OpNo1
))
4112 // Bytes is a VPERM-like permute vector, except that -1 is used for
4113 // undefined bytes. This permute is an operand of an outer permute.
4114 // See whether redistributing the -1 bytes gives a shuffle that can be
4115 // implemented using P. If so, set Transform to a VPERM-like permute vector
4116 // that, when applied to the result of P, gives the original permute in Bytes.
4117 static bool matchDoublePermute(const SmallVectorImpl
<int> &Bytes
,
4119 SmallVectorImpl
<int> &Transform
) {
4121 for (unsigned From
= 0; From
< SystemZ::VectorBytes
; ++From
) {
4122 int Elt
= Bytes
[From
];
4124 // Byte number From of the result is undefined.
4125 Transform
[From
] = -1;
4127 while (P
.Bytes
[To
] != Elt
) {
4129 if (To
== SystemZ::VectorBytes
)
4132 Transform
[From
] = To
;
4138 // As above, but search for a matching permute.
4139 static const Permute
*matchDoublePermute(const SmallVectorImpl
<int> &Bytes
,
4140 SmallVectorImpl
<int> &Transform
) {
4141 for (auto &P
: PermuteForms
)
4142 if (matchDoublePermute(Bytes
, P
, Transform
))
4147 // Convert the mask of the given shuffle op into a byte-level mask,
4148 // as if it had type vNi8.
4149 static bool getVPermMask(SDValue ShuffleOp
,
4150 SmallVectorImpl
<int> &Bytes
) {
4151 EVT VT
= ShuffleOp
.getValueType();
4152 unsigned NumElements
= VT
.getVectorNumElements();
4153 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
4155 if (auto *VSN
= dyn_cast
<ShuffleVectorSDNode
>(ShuffleOp
)) {
4156 Bytes
.resize(NumElements
* BytesPerElement
, -1);
4157 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4158 int Index
= VSN
->getMaskElt(I
);
4160 for (unsigned J
= 0; J
< BytesPerElement
; ++J
)
4161 Bytes
[I
* BytesPerElement
+ J
] = Index
* BytesPerElement
+ J
;
4165 if (SystemZISD::SPLAT
== ShuffleOp
.getOpcode() &&
4166 isa
<ConstantSDNode
>(ShuffleOp
.getOperand(1))) {
4167 unsigned Index
= ShuffleOp
.getConstantOperandVal(1);
4168 Bytes
.resize(NumElements
* BytesPerElement
, -1);
4169 for (unsigned I
= 0; I
< NumElements
; ++I
)
4170 for (unsigned J
= 0; J
< BytesPerElement
; ++J
)
4171 Bytes
[I
* BytesPerElement
+ J
] = Index
* BytesPerElement
+ J
;
4177 // Bytes is a VPERM-like permute vector, except that -1 is used for
4178 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
4179 // the result come from a contiguous sequence of bytes from one input.
4180 // Set Base to the selector for the first byte if so.
4181 static bool getShuffleInput(const SmallVectorImpl
<int> &Bytes
, unsigned Start
,
4182 unsigned BytesPerElement
, int &Base
) {
4184 for (unsigned I
= 0; I
< BytesPerElement
; ++I
) {
4185 if (Bytes
[Start
+ I
] >= 0) {
4186 unsigned Elem
= Bytes
[Start
+ I
];
4189 // Make sure the bytes would come from one input operand.
4190 if (unsigned(Base
) % Bytes
.size() + BytesPerElement
> Bytes
.size())
4192 } else if (unsigned(Base
) != Elem
- I
)
4199 // Bytes is a VPERM-like permute vector, except that -1 is used for
4200 // undefined bytes. Return true if it can be performed using VSLDI.
4201 // When returning true, set StartIndex to the shift amount and OpNo0
4202 // and OpNo1 to the VPERM operands that should be used as the first
4203 // and second shift operand respectively.
4204 static bool isShlDoublePermute(const SmallVectorImpl
<int> &Bytes
,
4205 unsigned &StartIndex
, unsigned &OpNo0
,
4207 int OpNos
[] = { -1, -1 };
4209 for (unsigned I
= 0; I
< 16; ++I
) {
4210 int Index
= Bytes
[I
];
4212 int ExpectedShift
= (Index
- I
) % SystemZ::VectorBytes
;
4213 int ModelOpNo
= unsigned(ExpectedShift
+ I
) / SystemZ::VectorBytes
;
4214 int RealOpNo
= unsigned(Index
) / SystemZ::VectorBytes
;
4216 Shift
= ExpectedShift
;
4217 else if (Shift
!= ExpectedShift
)
4219 // Make sure that the operand mappings are consistent with previous
4221 if (OpNos
[ModelOpNo
] == 1 - RealOpNo
)
4223 OpNos
[ModelOpNo
] = RealOpNo
;
4227 return chooseShuffleOpNos(OpNos
, OpNo0
, OpNo1
);
4230 // Create a node that performs P on operands Op0 and Op1, casting the
4231 // operands to the appropriate type. The type of the result is determined by P.
4232 static SDValue
getPermuteNode(SelectionDAG
&DAG
, const SDLoc
&DL
,
4233 const Permute
&P
, SDValue Op0
, SDValue Op1
) {
4234 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4235 // elements of a PACK are twice as wide as the outputs.
4236 unsigned InBytes
= (P
.Opcode
== SystemZISD::PERMUTE_DWORDS
? 8 :
4237 P
.Opcode
== SystemZISD::PACK
? P
.Operand
* 2 :
4239 // Cast both operands to the appropriate type.
4240 MVT InVT
= MVT::getVectorVT(MVT::getIntegerVT(InBytes
* 8),
4241 SystemZ::VectorBytes
/ InBytes
);
4242 Op0
= DAG
.getNode(ISD::BITCAST
, DL
, InVT
, Op0
);
4243 Op1
= DAG
.getNode(ISD::BITCAST
, DL
, InVT
, Op1
);
4245 if (P
.Opcode
== SystemZISD::PERMUTE_DWORDS
) {
4246 SDValue Op2
= DAG
.getTargetConstant(P
.Operand
, DL
, MVT::i32
);
4247 Op
= DAG
.getNode(SystemZISD::PERMUTE_DWORDS
, DL
, InVT
, Op0
, Op1
, Op2
);
4248 } else if (P
.Opcode
== SystemZISD::PACK
) {
4249 MVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(P
.Operand
* 8),
4250 SystemZ::VectorBytes
/ P
.Operand
);
4251 Op
= DAG
.getNode(SystemZISD::PACK
, DL
, OutVT
, Op0
, Op1
);
4253 Op
= DAG
.getNode(P
.Opcode
, DL
, InVT
, Op0
, Op1
);
4258 // Bytes is a VPERM-like permute vector, except that -1 is used for
4259 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4261 static SDValue
getGeneralPermuteNode(SelectionDAG
&DAG
, const SDLoc
&DL
,
4263 const SmallVectorImpl
<int> &Bytes
) {
4264 for (unsigned I
= 0; I
< 2; ++I
)
4265 Ops
[I
] = DAG
.getNode(ISD::BITCAST
, DL
, MVT::v16i8
, Ops
[I
]);
4267 // First see whether VSLDI can be used.
4268 unsigned StartIndex
, OpNo0
, OpNo1
;
4269 if (isShlDoublePermute(Bytes
, StartIndex
, OpNo0
, OpNo1
))
4270 return DAG
.getNode(SystemZISD::SHL_DOUBLE
, DL
, MVT::v16i8
, Ops
[OpNo0
],
4272 DAG
.getTargetConstant(StartIndex
, DL
, MVT::i32
));
4274 // Fall back on VPERM. Construct an SDNode for the permute vector.
4275 SDValue IndexNodes
[SystemZ::VectorBytes
];
4276 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
)
4278 IndexNodes
[I
] = DAG
.getConstant(Bytes
[I
], DL
, MVT::i32
);
4280 IndexNodes
[I
] = DAG
.getUNDEF(MVT::i32
);
4281 SDValue Op2
= DAG
.getBuildVector(MVT::v16i8
, DL
, IndexNodes
);
4282 return DAG
.getNode(SystemZISD::PERMUTE
, DL
, MVT::v16i8
, Ops
[0], Ops
[1], Op2
);
4286 // Describes a general N-operand vector shuffle.
4287 struct GeneralShuffle
{
4288 GeneralShuffle(EVT vt
) : VT(vt
) {}
4290 bool add(SDValue
, unsigned);
4291 SDValue
getNode(SelectionDAG
&, const SDLoc
&);
4293 // The operands of the shuffle.
4294 SmallVector
<SDValue
, SystemZ::VectorBytes
> Ops
;
4296 // Index I is -1 if byte I of the result is undefined. Otherwise the
4297 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4298 // Bytes[I] / SystemZ::VectorBytes.
4299 SmallVector
<int, SystemZ::VectorBytes
> Bytes
;
4301 // The type of the shuffle result.
4306 // Add an extra undefined element to the shuffle.
4307 void GeneralShuffle::addUndef() {
4308 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
4309 for (unsigned I
= 0; I
< BytesPerElement
; ++I
)
4310 Bytes
.push_back(-1);
4313 // Add an extra element to the shuffle, taking it from element Elem of Op.
4314 // A null Op indicates a vector input whose value will be calculated later;
4315 // there is at most one such input per shuffle and it always has the same
4316 // type as the result. Aborts and returns false if the source vector elements
4317 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4318 // LLVM they become implicitly extended, but this is rare and not optimized.
4319 bool GeneralShuffle::add(SDValue Op
, unsigned Elem
) {
4320 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
4322 // The source vector can have wider elements than the result,
4323 // either through an explicit TRUNCATE or because of type legalization.
4324 // We want the least significant part.
4325 EVT FromVT
= Op
.getNode() ? Op
.getValueType() : VT
;
4326 unsigned FromBytesPerElement
= FromVT
.getVectorElementType().getStoreSize();
4328 // Return false if the source elements are smaller than their destination
4330 if (FromBytesPerElement
< BytesPerElement
)
4333 unsigned Byte
= ((Elem
* FromBytesPerElement
) % SystemZ::VectorBytes
+
4334 (FromBytesPerElement
- BytesPerElement
));
4336 // Look through things like shuffles and bitcasts.
4337 while (Op
.getNode()) {
4338 if (Op
.getOpcode() == ISD::BITCAST
)
4339 Op
= Op
.getOperand(0);
4340 else if (Op
.getOpcode() == ISD::VECTOR_SHUFFLE
&& Op
.hasOneUse()) {
4341 // See whether the bytes we need come from a contiguous part of one
4343 SmallVector
<int, SystemZ::VectorBytes
> OpBytes
;
4344 if (!getVPermMask(Op
, OpBytes
))
4347 if (!getShuffleInput(OpBytes
, Byte
, BytesPerElement
, NewByte
))
4353 Op
= Op
.getOperand(unsigned(NewByte
) / SystemZ::VectorBytes
);
4354 Byte
= unsigned(NewByte
) % SystemZ::VectorBytes
;
4355 } else if (Op
.isUndef()) {
4362 // Make sure that the source of the extraction is in Ops.
4364 for (; OpNo
< Ops
.size(); ++OpNo
)
4365 if (Ops
[OpNo
] == Op
)
4367 if (OpNo
== Ops
.size())
4370 // Add the element to Bytes.
4371 unsigned Base
= OpNo
* SystemZ::VectorBytes
+ Byte
;
4372 for (unsigned I
= 0; I
< BytesPerElement
; ++I
)
4373 Bytes
.push_back(Base
+ I
);
4378 // Return SDNodes for the completed shuffle.
4379 SDValue
GeneralShuffle::getNode(SelectionDAG
&DAG
, const SDLoc
&DL
) {
4380 assert(Bytes
.size() == SystemZ::VectorBytes
&& "Incomplete vector");
4382 if (Ops
.size() == 0)
4383 return DAG
.getUNDEF(VT
);
4385 // Make sure that there are at least two shuffle operands.
4386 if (Ops
.size() == 1)
4387 Ops
.push_back(DAG
.getUNDEF(MVT::v16i8
));
4389 // Create a tree of shuffles, deferring root node until after the loop.
4390 // Try to redistribute the undefined elements of non-root nodes so that
4391 // the non-root shuffles match something like a pack or merge, then adjust
4392 // the parent node's permute vector to compensate for the new order.
4393 // Among other things, this copes with vectors like <2 x i16> that were
4394 // padded with undefined elements during type legalization.
4396 // In the best case this redistribution will lead to the whole tree
4397 // using packs and merges. It should rarely be a loss in other cases.
4398 unsigned Stride
= 1;
4399 for (; Stride
* 2 < Ops
.size(); Stride
*= 2) {
4400 for (unsigned I
= 0; I
< Ops
.size() - Stride
; I
+= Stride
* 2) {
4401 SDValue SubOps
[] = { Ops
[I
], Ops
[I
+ Stride
] };
4403 // Create a mask for just these two operands.
4404 SmallVector
<int, SystemZ::VectorBytes
> NewBytes(SystemZ::VectorBytes
);
4405 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
) {
4406 unsigned OpNo
= unsigned(Bytes
[J
]) / SystemZ::VectorBytes
;
4407 unsigned Byte
= unsigned(Bytes
[J
]) % SystemZ::VectorBytes
;
4410 else if (OpNo
== I
+ Stride
)
4411 NewBytes
[J
] = SystemZ::VectorBytes
+ Byte
;
4415 // See if it would be better to reorganize NewMask to avoid using VPERM.
4416 SmallVector
<int, SystemZ::VectorBytes
> NewBytesMap(SystemZ::VectorBytes
);
4417 if (const Permute
*P
= matchDoublePermute(NewBytes
, NewBytesMap
)) {
4418 Ops
[I
] = getPermuteNode(DAG
, DL
, *P
, SubOps
[0], SubOps
[1]);
4419 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4420 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
) {
4421 if (NewBytes
[J
] >= 0) {
4422 assert(unsigned(NewBytesMap
[J
]) < SystemZ::VectorBytes
&&
4423 "Invalid double permute");
4424 Bytes
[J
] = I
* SystemZ::VectorBytes
+ NewBytesMap
[J
];
4426 assert(NewBytesMap
[J
] < 0 && "Invalid double permute");
4429 // Just use NewBytes on the operands.
4430 Ops
[I
] = getGeneralPermuteNode(DAG
, DL
, SubOps
, NewBytes
);
4431 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
)
4432 if (NewBytes
[J
] >= 0)
4433 Bytes
[J
] = I
* SystemZ::VectorBytes
+ J
;
4438 // Now we just have 2 inputs. Put the second operand in Ops[1].
4440 Ops
[1] = Ops
[Stride
];
4441 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
)
4442 if (Bytes
[I
] >= int(SystemZ::VectorBytes
))
4443 Bytes
[I
] -= (Stride
- 1) * SystemZ::VectorBytes
;
4446 // Look for an instruction that can do the permute without resorting
4448 unsigned OpNo0
, OpNo1
;
4450 if (const Permute
*P
= matchPermute(Bytes
, OpNo0
, OpNo1
))
4451 Op
= getPermuteNode(DAG
, DL
, *P
, Ops
[OpNo0
], Ops
[OpNo1
]);
4453 Op
= getGeneralPermuteNode(DAG
, DL
, &Ops
[0], Bytes
);
4454 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
4457 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4458 static bool isScalarToVector(SDValue Op
) {
4459 for (unsigned I
= 1, E
= Op
.getNumOperands(); I
!= E
; ++I
)
4460 if (!Op
.getOperand(I
).isUndef())
4465 // Return a vector of type VT that contains Value in the first element.
4466 // The other elements don't matter.
4467 static SDValue
buildScalarToVector(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4469 // If we have a constant, replicate it to all elements and let the
4470 // BUILD_VECTOR lowering take care of it.
4471 if (Value
.getOpcode() == ISD::Constant
||
4472 Value
.getOpcode() == ISD::ConstantFP
) {
4473 SmallVector
<SDValue
, 16> Ops(VT
.getVectorNumElements(), Value
);
4474 return DAG
.getBuildVector(VT
, DL
, Ops
);
4476 if (Value
.isUndef())
4477 return DAG
.getUNDEF(VT
);
4478 return DAG
.getNode(ISD::SCALAR_TO_VECTOR
, DL
, VT
, Value
);
4481 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4482 // element 1. Used for cases in which replication is cheap.
4483 static SDValue
buildMergeScalars(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4484 SDValue Op0
, SDValue Op1
) {
4485 if (Op0
.isUndef()) {
4487 return DAG
.getUNDEF(VT
);
4488 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op1
);
4491 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op0
);
4492 return DAG
.getNode(SystemZISD::MERGE_HIGH
, DL
, VT
,
4493 buildScalarToVector(DAG
, DL
, VT
, Op0
),
4494 buildScalarToVector(DAG
, DL
, VT
, Op1
));
4497 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4499 static SDValue
joinDwords(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Op0
,
4501 if (Op0
.isUndef() && Op1
.isUndef())
4502 return DAG
.getUNDEF(MVT::v2i64
);
4503 // If one of the two inputs is undefined then replicate the other one,
4504 // in order to avoid using another register unnecessarily.
4506 Op0
= Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op1
);
4507 else if (Op1
.isUndef())
4508 Op0
= Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
4510 Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
4511 Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op1
);
4513 return DAG
.getNode(SystemZISD::JOIN_DWORDS
, DL
, MVT::v2i64
, Op0
, Op1
);
4516 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4517 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4518 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
4519 // would benefit from this representation and return it if so.
4520 static SDValue
tryBuildVectorShuffle(SelectionDAG
&DAG
,
4521 BuildVectorSDNode
*BVN
) {
4522 EVT VT
= BVN
->getValueType(0);
4523 unsigned NumElements
= VT
.getVectorNumElements();
4525 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4526 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
4527 // need a BUILD_VECTOR, add an additional placeholder operand for that
4528 // BUILD_VECTOR and store its operands in ResidueOps.
4529 GeneralShuffle
GS(VT
);
4530 SmallVector
<SDValue
, SystemZ::VectorBytes
> ResidueOps
;
4531 bool FoundOne
= false;
4532 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4533 SDValue Op
= BVN
->getOperand(I
);
4534 if (Op
.getOpcode() == ISD::TRUNCATE
)
4535 Op
= Op
.getOperand(0);
4536 if (Op
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
4537 Op
.getOperand(1).getOpcode() == ISD::Constant
) {
4538 unsigned Elem
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
4539 if (!GS
.add(Op
.getOperand(0), Elem
))
4542 } else if (Op
.isUndef()) {
4545 if (!GS
.add(SDValue(), ResidueOps
.size()))
4547 ResidueOps
.push_back(BVN
->getOperand(I
));
4551 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
4555 // Create the BUILD_VECTOR for the remaining elements, if any.
4556 if (!ResidueOps
.empty()) {
4557 while (ResidueOps
.size() < NumElements
)
4558 ResidueOps
.push_back(DAG
.getUNDEF(ResidueOps
[0].getValueType()));
4559 for (auto &Op
: GS
.Ops
) {
4560 if (!Op
.getNode()) {
4561 Op
= DAG
.getBuildVector(VT
, SDLoc(BVN
), ResidueOps
);
4566 return GS
.getNode(DAG
, SDLoc(BVN
));
4569 bool SystemZTargetLowering::isVectorElementLoad(SDValue Op
) const {
4570 if (Op
.getOpcode() == ISD::LOAD
&& cast
<LoadSDNode
>(Op
)->isUnindexed())
4572 if (Subtarget
.hasVectorEnhancements2() && Op
.getOpcode() == SystemZISD::LRV
)
4577 // Combine GPR scalar values Elems into a vector of type VT.
4579 SystemZTargetLowering::buildVector(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4580 SmallVectorImpl
<SDValue
> &Elems
) const {
4581 // See whether there is a single replicated value.
4583 unsigned int NumElements
= Elems
.size();
4584 unsigned int Count
= 0;
4585 for (auto Elem
: Elems
) {
4586 if (!Elem
.isUndef()) {
4587 if (!Single
.getNode())
4589 else if (Elem
!= Single
) {
4596 // There are three cases here:
4598 // - if the only defined element is a loaded one, the best sequence
4599 // is a replicating load.
4601 // - otherwise, if the only defined element is an i64 value, we will
4602 // end up with the same VLVGP sequence regardless of whether we short-cut
4603 // for replication or fall through to the later code.
4605 // - otherwise, if the only defined element is an i32 or smaller value,
4606 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
4607 // This is only a win if the single defined element is used more than once.
4608 // In other cases we're better off using a single VLVGx.
4609 if (Single
.getNode() && (Count
> 1 || isVectorElementLoad(Single
)))
4610 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Single
);
4612 // If all elements are loads, use VLREP/VLEs (below).
4613 bool AllLoads
= true;
4614 for (auto Elem
: Elems
)
4615 if (!isVectorElementLoad(Elem
)) {
4620 // The best way of building a v2i64 from two i64s is to use VLVGP.
4621 if (VT
== MVT::v2i64
&& !AllLoads
)
4622 return joinDwords(DAG
, DL
, Elems
[0], Elems
[1]);
4624 // Use a 64-bit merge high to combine two doubles.
4625 if (VT
== MVT::v2f64
&& !AllLoads
)
4626 return buildMergeScalars(DAG
, DL
, VT
, Elems
[0], Elems
[1]);
4628 // Build v4f32 values directly from the FPRs:
4630 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
4635 if (VT
== MVT::v4f32
&& !AllLoads
) {
4636 SDValue Op01
= buildMergeScalars(DAG
, DL
, VT
, Elems
[0], Elems
[1]);
4637 SDValue Op23
= buildMergeScalars(DAG
, DL
, VT
, Elems
[2], Elems
[3]);
4638 // Avoid unnecessary undefs by reusing the other operand.
4641 else if (Op23
.isUndef())
4643 // Merging identical replications is a no-op.
4644 if (Op01
.getOpcode() == SystemZISD::REPLICATE
&& Op01
== Op23
)
4646 Op01
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Op01
);
4647 Op23
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Op23
);
4648 SDValue Op
= DAG
.getNode(SystemZISD::MERGE_HIGH
,
4649 DL
, MVT::v2i64
, Op01
, Op23
);
4650 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
4653 // Collect the constant terms.
4654 SmallVector
<SDValue
, SystemZ::VectorBytes
> Constants(NumElements
, SDValue());
4655 SmallVector
<bool, SystemZ::VectorBytes
> Done(NumElements
, false);
4657 unsigned NumConstants
= 0;
4658 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4659 SDValue Elem
= Elems
[I
];
4660 if (Elem
.getOpcode() == ISD::Constant
||
4661 Elem
.getOpcode() == ISD::ConstantFP
) {
4663 Constants
[I
] = Elem
;
4667 // If there was at least one constant, fill in the other elements of
4668 // Constants with undefs to get a full vector constant and use that
4669 // as the starting point.
4671 SDValue ReplicatedVal
;
4672 if (NumConstants
> 0) {
4673 for (unsigned I
= 0; I
< NumElements
; ++I
)
4674 if (!Constants
[I
].getNode())
4675 Constants
[I
] = DAG
.getUNDEF(Elems
[I
].getValueType());
4676 Result
= DAG
.getBuildVector(VT
, DL
, Constants
);
4678 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
4679 // avoid a false dependency on any previous contents of the vector
4682 // Use a VLREP if at least one element is a load. Make sure to replicate
4683 // the load with the most elements having its value.
4684 std::map
<const SDNode
*, unsigned> UseCounts
;
4685 SDNode
*LoadMaxUses
= nullptr;
4686 for (unsigned I
= 0; I
< NumElements
; ++I
)
4687 if (isVectorElementLoad(Elems
[I
])) {
4688 SDNode
*Ld
= Elems
[I
].getNode();
4690 if (LoadMaxUses
== nullptr || UseCounts
[LoadMaxUses
] < UseCounts
[Ld
])
4693 if (LoadMaxUses
!= nullptr) {
4694 ReplicatedVal
= SDValue(LoadMaxUses
, 0);
4695 Result
= DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, ReplicatedVal
);
4697 // Try to use VLVGP.
4698 unsigned I1
= NumElements
/ 2 - 1;
4699 unsigned I2
= NumElements
- 1;
4700 bool Def1
= !Elems
[I1
].isUndef();
4701 bool Def2
= !Elems
[I2
].isUndef();
4703 SDValue Elem1
= Elems
[Def1
? I1
: I2
];
4704 SDValue Elem2
= Elems
[Def2
? I2
: I1
];
4705 Result
= DAG
.getNode(ISD::BITCAST
, DL
, VT
,
4706 joinDwords(DAG
, DL
, Elem1
, Elem2
));
4710 Result
= DAG
.getUNDEF(VT
);
4714 // Use VLVGx to insert the other elements.
4715 for (unsigned I
= 0; I
< NumElements
; ++I
)
4716 if (!Done
[I
] && !Elems
[I
].isUndef() && Elems
[I
] != ReplicatedVal
)
4717 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
, Result
, Elems
[I
],
4718 DAG
.getConstant(I
, DL
, MVT::i32
));
4722 SDValue
SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op
,
4723 SelectionDAG
&DAG
) const {
4724 auto *BVN
= cast
<BuildVectorSDNode
>(Op
.getNode());
4726 EVT VT
= Op
.getValueType();
4728 if (BVN
->isConstant()) {
4729 if (SystemZVectorConstantInfo(BVN
).isVectorConstantLegal(Subtarget
))
4732 // Fall back to loading it from memory.
4736 // See if we should use shuffles to construct the vector from other vectors.
4737 if (SDValue Res
= tryBuildVectorShuffle(DAG
, BVN
))
4740 // Detect SCALAR_TO_VECTOR conversions.
4741 if (isOperationLegal(ISD::SCALAR_TO_VECTOR
, VT
) && isScalarToVector(Op
))
4742 return buildScalarToVector(DAG
, DL
, VT
, Op
.getOperand(0));
4744 // Otherwise use buildVector to build the vector up from GPRs.
4745 unsigned NumElements
= Op
.getNumOperands();
4746 SmallVector
<SDValue
, SystemZ::VectorBytes
> Ops(NumElements
);
4747 for (unsigned I
= 0; I
< NumElements
; ++I
)
4748 Ops
[I
] = Op
.getOperand(I
);
4749 return buildVector(DAG
, DL
, VT
, Ops
);
4752 SDValue
SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op
,
4753 SelectionDAG
&DAG
) const {
4754 auto *VSN
= cast
<ShuffleVectorSDNode
>(Op
.getNode());
4756 EVT VT
= Op
.getValueType();
4757 unsigned NumElements
= VT
.getVectorNumElements();
4759 if (VSN
->isSplat()) {
4760 SDValue Op0
= Op
.getOperand(0);
4761 unsigned Index
= VSN
->getSplatIndex();
4762 assert(Index
< VT
.getVectorNumElements() &&
4763 "Splat index should be defined and in first operand");
4764 // See whether the value we're splatting is directly available as a scalar.
4765 if ((Index
== 0 && Op0
.getOpcode() == ISD::SCALAR_TO_VECTOR
) ||
4766 Op0
.getOpcode() == ISD::BUILD_VECTOR
)
4767 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op0
.getOperand(Index
));
4768 // Otherwise keep it as a vector-to-vector operation.
4769 return DAG
.getNode(SystemZISD::SPLAT
, DL
, VT
, Op
.getOperand(0),
4770 DAG
.getTargetConstant(Index
, DL
, MVT::i32
));
4773 GeneralShuffle
GS(VT
);
4774 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4775 int Elt
= VSN
->getMaskElt(I
);
4778 else if (!GS
.add(Op
.getOperand(unsigned(Elt
) / NumElements
),
4779 unsigned(Elt
) % NumElements
))
4782 return GS
.getNode(DAG
, SDLoc(VSN
));
4785 SDValue
SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op
,
4786 SelectionDAG
&DAG
) const {
4788 // Just insert the scalar into element 0 of an undefined vector.
4789 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
4790 Op
.getValueType(), DAG
.getUNDEF(Op
.getValueType()),
4791 Op
.getOperand(0), DAG
.getConstant(0, DL
, MVT::i32
));
4794 SDValue
SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op
,
4795 SelectionDAG
&DAG
) const {
4796 // Handle insertions of floating-point values.
4798 SDValue Op0
= Op
.getOperand(0);
4799 SDValue Op1
= Op
.getOperand(1);
4800 SDValue Op2
= Op
.getOperand(2);
4801 EVT VT
= Op
.getValueType();
4803 // Insertions into constant indices of a v2f64 can be done using VPDI.
4804 // However, if the inserted value is a bitcast or a constant then it's
4805 // better to use GPRs, as below.
4806 if (VT
== MVT::v2f64
&&
4807 Op1
.getOpcode() != ISD::BITCAST
&&
4808 Op1
.getOpcode() != ISD::ConstantFP
&&
4809 Op2
.getOpcode() == ISD::Constant
) {
4810 uint64_t Index
= cast
<ConstantSDNode
>(Op2
)->getZExtValue();
4811 unsigned Mask
= VT
.getVectorNumElements() - 1;
4816 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
4817 MVT IntVT
= MVT::getIntegerVT(VT
.getScalarSizeInBits());
4818 MVT IntVecVT
= MVT::getVectorVT(IntVT
, VT
.getVectorNumElements());
4819 SDValue Res
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, IntVecVT
,
4820 DAG
.getNode(ISD::BITCAST
, DL
, IntVecVT
, Op0
),
4821 DAG
.getNode(ISD::BITCAST
, DL
, IntVT
, Op1
), Op2
);
4822 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Res
);
4826 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op
,
4827 SelectionDAG
&DAG
) const {
4828 // Handle extractions of floating-point values.
4830 SDValue Op0
= Op
.getOperand(0);
4831 SDValue Op1
= Op
.getOperand(1);
4832 EVT VT
= Op
.getValueType();
4833 EVT VecVT
= Op0
.getValueType();
4835 // Extractions of constant indices can be done directly.
4836 if (auto *CIndexN
= dyn_cast
<ConstantSDNode
>(Op1
)) {
4837 uint64_t Index
= CIndexN
->getZExtValue();
4838 unsigned Mask
= VecVT
.getVectorNumElements() - 1;
4843 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
4844 MVT IntVT
= MVT::getIntegerVT(VT
.getSizeInBits());
4845 MVT IntVecVT
= MVT::getVectorVT(IntVT
, VecVT
.getVectorNumElements());
4846 SDValue Res
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, IntVT
,
4847 DAG
.getNode(ISD::BITCAST
, DL
, IntVecVT
, Op0
), Op1
);
4848 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Res
);
4852 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op
, SelectionDAG
&DAG
,
4853 unsigned UnpackHigh
) const {
4854 SDValue PackedOp
= Op
.getOperand(0);
4855 EVT OutVT
= Op
.getValueType();
4856 EVT InVT
= PackedOp
.getValueType();
4857 unsigned ToBits
= OutVT
.getScalarSizeInBits();
4858 unsigned FromBits
= InVT
.getScalarSizeInBits();
4861 EVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(FromBits
),
4862 SystemZ::VectorBits
/ FromBits
);
4863 PackedOp
= DAG
.getNode(UnpackHigh
, SDLoc(PackedOp
), OutVT
, PackedOp
);
4864 } while (FromBits
!= ToBits
);
4868 SDValue
SystemZTargetLowering::lowerShift(SDValue Op
, SelectionDAG
&DAG
,
4869 unsigned ByScalar
) const {
4870 // Look for cases where a vector shift can use the *_BY_SCALAR form.
4871 SDValue Op0
= Op
.getOperand(0);
4872 SDValue Op1
= Op
.getOperand(1);
4874 EVT VT
= Op
.getValueType();
4875 unsigned ElemBitSize
= VT
.getScalarSizeInBits();
4877 // See whether the shift vector is a splat represented as BUILD_VECTOR.
4878 if (auto *BVN
= dyn_cast
<BuildVectorSDNode
>(Op1
)) {
4879 APInt SplatBits
, SplatUndef
;
4880 unsigned SplatBitSize
;
4882 // Check for constant splats. Use ElemBitSize as the minimum element
4883 // width and reject splats that need wider elements.
4884 if (BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
4885 ElemBitSize
, true) &&
4886 SplatBitSize
== ElemBitSize
) {
4887 SDValue Shift
= DAG
.getConstant(SplatBits
.getZExtValue() & 0xfff,
4889 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
4891 // Check for variable splats.
4892 BitVector UndefElements
;
4893 SDValue Splat
= BVN
->getSplatValue(&UndefElements
);
4895 // Since i32 is the smallest legal type, we either need a no-op
4897 SDValue Shift
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Splat
);
4898 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
4902 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
4903 // and the shift amount is directly available in a GPR.
4904 if (auto *VSN
= dyn_cast
<ShuffleVectorSDNode
>(Op1
)) {
4905 if (VSN
->isSplat()) {
4906 SDValue VSNOp0
= VSN
->getOperand(0);
4907 unsigned Index
= VSN
->getSplatIndex();
4908 assert(Index
< VT
.getVectorNumElements() &&
4909 "Splat index should be defined and in first operand");
4910 if ((Index
== 0 && VSNOp0
.getOpcode() == ISD::SCALAR_TO_VECTOR
) ||
4911 VSNOp0
.getOpcode() == ISD::BUILD_VECTOR
) {
4912 // Since i32 is the smallest legal type, we either need a no-op
4914 SDValue Shift
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
,
4915 VSNOp0
.getOperand(Index
));
4916 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
4921 // Otherwise just treat the current form as legal.
4925 SDValue
SystemZTargetLowering::LowerOperation(SDValue Op
,
4926 SelectionDAG
&DAG
) const {
4927 switch (Op
.getOpcode()) {
4928 case ISD::FRAMEADDR
:
4929 return lowerFRAMEADDR(Op
, DAG
);
4930 case ISD::RETURNADDR
:
4931 return lowerRETURNADDR(Op
, DAG
);
4933 return lowerBR_CC(Op
, DAG
);
4934 case ISD::SELECT_CC
:
4935 return lowerSELECT_CC(Op
, DAG
);
4937 return lowerSETCC(Op
, DAG
);
4938 case ISD::GlobalAddress
:
4939 return lowerGlobalAddress(cast
<GlobalAddressSDNode
>(Op
), DAG
);
4940 case ISD::GlobalTLSAddress
:
4941 return lowerGlobalTLSAddress(cast
<GlobalAddressSDNode
>(Op
), DAG
);
4942 case ISD::BlockAddress
:
4943 return lowerBlockAddress(cast
<BlockAddressSDNode
>(Op
), DAG
);
4944 case ISD::JumpTable
:
4945 return lowerJumpTable(cast
<JumpTableSDNode
>(Op
), DAG
);
4946 case ISD::ConstantPool
:
4947 return lowerConstantPool(cast
<ConstantPoolSDNode
>(Op
), DAG
);
4949 return lowerBITCAST(Op
, DAG
);
4951 return lowerVASTART(Op
, DAG
);
4953 return lowerVACOPY(Op
, DAG
);
4954 case ISD::DYNAMIC_STACKALLOC
:
4955 return lowerDYNAMIC_STACKALLOC(Op
, DAG
);
4956 case ISD::GET_DYNAMIC_AREA_OFFSET
:
4957 return lowerGET_DYNAMIC_AREA_OFFSET(Op
, DAG
);
4958 case ISD::SMUL_LOHI
:
4959 return lowerSMUL_LOHI(Op
, DAG
);
4960 case ISD::UMUL_LOHI
:
4961 return lowerUMUL_LOHI(Op
, DAG
);
4963 return lowerSDIVREM(Op
, DAG
);
4965 return lowerUDIVREM(Op
, DAG
);
4970 return lowerXALUO(Op
, DAG
);
4973 return lowerADDSUBCARRY(Op
, DAG
);
4975 return lowerOR(Op
, DAG
);
4977 return lowerCTPOP(Op
, DAG
);
4978 case ISD::ATOMIC_FENCE
:
4979 return lowerATOMIC_FENCE(Op
, DAG
);
4980 case ISD::ATOMIC_SWAP
:
4981 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_SWAPW
);
4982 case ISD::ATOMIC_STORE
:
4983 return lowerATOMIC_STORE(Op
, DAG
);
4984 case ISD::ATOMIC_LOAD
:
4985 return lowerATOMIC_LOAD(Op
, DAG
);
4986 case ISD::ATOMIC_LOAD_ADD
:
4987 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_ADD
);
4988 case ISD::ATOMIC_LOAD_SUB
:
4989 return lowerATOMIC_LOAD_SUB(Op
, DAG
);
4990 case ISD::ATOMIC_LOAD_AND
:
4991 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_AND
);
4992 case ISD::ATOMIC_LOAD_OR
:
4993 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_OR
);
4994 case ISD::ATOMIC_LOAD_XOR
:
4995 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_XOR
);
4996 case ISD::ATOMIC_LOAD_NAND
:
4997 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_NAND
);
4998 case ISD::ATOMIC_LOAD_MIN
:
4999 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_MIN
);
5000 case ISD::ATOMIC_LOAD_MAX
:
5001 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_MAX
);
5002 case ISD::ATOMIC_LOAD_UMIN
:
5003 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_UMIN
);
5004 case ISD::ATOMIC_LOAD_UMAX
:
5005 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_UMAX
);
5006 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
:
5007 return lowerATOMIC_CMP_SWAP(Op
, DAG
);
5008 case ISD::STACKSAVE
:
5009 return lowerSTACKSAVE(Op
, DAG
);
5010 case ISD::STACKRESTORE
:
5011 return lowerSTACKRESTORE(Op
, DAG
);
5013 return lowerPREFETCH(Op
, DAG
);
5014 case ISD::INTRINSIC_W_CHAIN
:
5015 return lowerINTRINSIC_W_CHAIN(Op
, DAG
);
5016 case ISD::INTRINSIC_WO_CHAIN
:
5017 return lowerINTRINSIC_WO_CHAIN(Op
, DAG
);
5018 case ISD::BUILD_VECTOR
:
5019 return lowerBUILD_VECTOR(Op
, DAG
);
5020 case ISD::VECTOR_SHUFFLE
:
5021 return lowerVECTOR_SHUFFLE(Op
, DAG
);
5022 case ISD::SCALAR_TO_VECTOR
:
5023 return lowerSCALAR_TO_VECTOR(Op
, DAG
);
5024 case ISD::INSERT_VECTOR_ELT
:
5025 return lowerINSERT_VECTOR_ELT(Op
, DAG
);
5026 case ISD::EXTRACT_VECTOR_ELT
:
5027 return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
5028 case ISD::SIGN_EXTEND_VECTOR_INREG
:
5029 return lowerExtendVectorInreg(Op
, DAG
, SystemZISD::UNPACK_HIGH
);
5030 case ISD::ZERO_EXTEND_VECTOR_INREG
:
5031 return lowerExtendVectorInreg(Op
, DAG
, SystemZISD::UNPACKL_HIGH
);
5033 return lowerShift(Op
, DAG
, SystemZISD::VSHL_BY_SCALAR
);
5035 return lowerShift(Op
, DAG
, SystemZISD::VSRL_BY_SCALAR
);
5037 return lowerShift(Op
, DAG
, SystemZISD::VSRA_BY_SCALAR
);
5039 llvm_unreachable("Unexpected node to lower");
5043 // Lower operations with invalid operand or result types (currently used
5044 // only for 128-bit integer types).
5046 static SDValue
lowerI128ToGR128(SelectionDAG
&DAG
, SDValue In
) {
5048 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i64
, In
,
5049 DAG
.getIntPtrConstant(0, DL
));
5050 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i64
, In
,
5051 DAG
.getIntPtrConstant(1, DL
));
5052 SDNode
*Pair
= DAG
.getMachineNode(SystemZ::PAIR128
, DL
,
5053 MVT::Untyped
, Hi
, Lo
);
5054 return SDValue(Pair
, 0);
5057 static SDValue
lowerGR128ToI128(SelectionDAG
&DAG
, SDValue In
) {
5059 SDValue Hi
= DAG
.getTargetExtractSubreg(SystemZ::subreg_h64
,
5061 SDValue Lo
= DAG
.getTargetExtractSubreg(SystemZ::subreg_l64
,
5063 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i128
, Lo
, Hi
);
5067 SystemZTargetLowering::LowerOperationWrapper(SDNode
*N
,
5068 SmallVectorImpl
<SDValue
> &Results
,
5069 SelectionDAG
&DAG
) const {
5070 switch (N
->getOpcode()) {
5071 case ISD::ATOMIC_LOAD
: {
5073 SDVTList Tys
= DAG
.getVTList(MVT::Untyped
, MVT::Other
);
5074 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1) };
5075 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
5076 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128
,
5077 DL
, Tys
, Ops
, MVT::i128
, MMO
);
5078 Results
.push_back(lowerGR128ToI128(DAG
, Res
));
5079 Results
.push_back(Res
.getValue(1));
5082 case ISD::ATOMIC_STORE
: {
5084 SDVTList Tys
= DAG
.getVTList(MVT::Other
);
5085 SDValue Ops
[] = { N
->getOperand(0),
5086 lowerI128ToGR128(DAG
, N
->getOperand(2)),
5088 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
5089 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128
,
5090 DL
, Tys
, Ops
, MVT::i128
, MMO
);
5091 // We have to enforce sequential consistency by performing a
5092 // serialization operation after the store.
5093 if (cast
<AtomicSDNode
>(N
)->getOrdering() ==
5094 AtomicOrdering::SequentiallyConsistent
)
5095 Res
= SDValue(DAG
.getMachineNode(SystemZ::Serialize
, DL
,
5096 MVT::Other
, Res
), 0);
5097 Results
.push_back(Res
);
5100 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
: {
5102 SDVTList Tys
= DAG
.getVTList(MVT::Untyped
, MVT::i32
, MVT::Other
);
5103 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1),
5104 lowerI128ToGR128(DAG
, N
->getOperand(2)),
5105 lowerI128ToGR128(DAG
, N
->getOperand(3)) };
5106 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
5107 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128
,
5108 DL
, Tys
, Ops
, MVT::i128
, MMO
);
5109 SDValue Success
= emitSETCC(DAG
, DL
, Res
.getValue(1),
5110 SystemZ::CCMASK_CS
, SystemZ::CCMASK_CS_EQ
);
5111 Success
= DAG
.getZExtOrTrunc(Success
, DL
, N
->getValueType(1));
5112 Results
.push_back(lowerGR128ToI128(DAG
, Res
));
5113 Results
.push_back(Success
);
5114 Results
.push_back(Res
.getValue(2));
5118 llvm_unreachable("Unexpected node to lower");
5123 SystemZTargetLowering::ReplaceNodeResults(SDNode
*N
,
5124 SmallVectorImpl
<SDValue
> &Results
,
5125 SelectionDAG
&DAG
) const {
5126 return LowerOperationWrapper(N
, Results
, DAG
);
5129 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode
) const {
5130 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
5131 switch ((SystemZISD::NodeType
)Opcode
) {
5132 case SystemZISD::FIRST_NUMBER
: break;
5138 OPCODE(PCREL_WRAPPER
);
5139 OPCODE(PCREL_OFFSET
);
5145 OPCODE(SELECT_CCMASK
);
5146 OPCODE(ADJDYNALLOC
);
5171 OPCODE(SEARCH_STRING
);
5175 OPCODE(TBEGIN_NOFLOAT
);
5178 OPCODE(ROTATE_MASK
);
5180 OPCODE(JOIN_DWORDS
);
5185 OPCODE(PERMUTE_DWORDS
);
5190 OPCODE(UNPACK_HIGH
);
5191 OPCODE(UNPACKL_HIGH
);
5193 OPCODE(UNPACKL_LOW
);
5194 OPCODE(VSHL_BY_SCALAR
);
5195 OPCODE(VSRL_BY_SCALAR
);
5196 OPCODE(VSRA_BY_SCALAR
);
5226 OPCODE(ATOMIC_SWAPW
);
5227 OPCODE(ATOMIC_LOADW_ADD
);
5228 OPCODE(ATOMIC_LOADW_SUB
);
5229 OPCODE(ATOMIC_LOADW_AND
);
5230 OPCODE(ATOMIC_LOADW_OR
);
5231 OPCODE(ATOMIC_LOADW_XOR
);
5232 OPCODE(ATOMIC_LOADW_NAND
);
5233 OPCODE(ATOMIC_LOADW_MIN
);
5234 OPCODE(ATOMIC_LOADW_MAX
);
5235 OPCODE(ATOMIC_LOADW_UMIN
);
5236 OPCODE(ATOMIC_LOADW_UMAX
);
5237 OPCODE(ATOMIC_CMP_SWAPW
);
5238 OPCODE(ATOMIC_CMP_SWAP
);
5239 OPCODE(ATOMIC_LOAD_128
);
5240 OPCODE(ATOMIC_STORE_128
);
5241 OPCODE(ATOMIC_CMP_SWAP_128
);
5252 // Return true if VT is a vector whose elements are a whole number of bytes
5253 // in width. Also check for presence of vector support.
5254 bool SystemZTargetLowering::canTreatAsByteVector(EVT VT
) const {
5255 if (!Subtarget
.hasVector())
5258 return VT
.isVector() && VT
.getScalarSizeInBits() % 8 == 0 && VT
.isSimple();
5261 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
5262 // producing a result of type ResVT. Op is a possibly bitcast version
5263 // of the input vector and Index is the index (based on type VecVT) that
5264 // should be extracted. Return the new extraction if a simplification
5265 // was possible or if Force is true.
5266 SDValue
SystemZTargetLowering::combineExtract(const SDLoc
&DL
, EVT ResVT
,
5267 EVT VecVT
, SDValue Op
,
5269 DAGCombinerInfo
&DCI
,
5271 SelectionDAG
&DAG
= DCI
.DAG
;
5273 // The number of bytes being extracted.
5274 unsigned BytesPerElement
= VecVT
.getVectorElementType().getStoreSize();
5277 unsigned Opcode
= Op
.getOpcode();
5278 if (Opcode
== ISD::BITCAST
)
5279 // Look through bitcasts.
5280 Op
= Op
.getOperand(0);
5281 else if ((Opcode
== ISD::VECTOR_SHUFFLE
|| Opcode
== SystemZISD::SPLAT
) &&
5282 canTreatAsByteVector(Op
.getValueType())) {
5283 // Get a VPERM-like permute mask and see whether the bytes covered
5284 // by the extracted element are a contiguous sequence from one
5286 SmallVector
<int, SystemZ::VectorBytes
> Bytes
;
5287 if (!getVPermMask(Op
, Bytes
))
5290 if (!getShuffleInput(Bytes
, Index
* BytesPerElement
,
5291 BytesPerElement
, First
))
5294 return DAG
.getUNDEF(ResVT
);
5295 // Make sure the contiguous sequence starts at a multiple of the
5296 // original element size.
5297 unsigned Byte
= unsigned(First
) % Bytes
.size();
5298 if (Byte
% BytesPerElement
!= 0)
5300 // We can get the extracted value directly from an input.
5301 Index
= Byte
/ BytesPerElement
;
5302 Op
= Op
.getOperand(unsigned(First
) / Bytes
.size());
5304 } else if (Opcode
== ISD::BUILD_VECTOR
&&
5305 canTreatAsByteVector(Op
.getValueType())) {
5306 // We can only optimize this case if the BUILD_VECTOR elements are
5307 // at least as wide as the extracted value.
5308 EVT OpVT
= Op
.getValueType();
5309 unsigned OpBytesPerElement
= OpVT
.getVectorElementType().getStoreSize();
5310 if (OpBytesPerElement
< BytesPerElement
)
5312 // Make sure that the least-significant bit of the extracted value
5313 // is the least significant bit of an input.
5314 unsigned End
= (Index
+ 1) * BytesPerElement
;
5315 if (End
% OpBytesPerElement
!= 0)
5317 // We're extracting the low part of one operand of the BUILD_VECTOR.
5318 Op
= Op
.getOperand(End
/ OpBytesPerElement
- 1);
5319 if (!Op
.getValueType().isInteger()) {
5320 EVT VT
= MVT::getIntegerVT(Op
.getValueSizeInBits());
5321 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
5322 DCI
.AddToWorklist(Op
.getNode());
5324 EVT VT
= MVT::getIntegerVT(ResVT
.getSizeInBits());
5325 Op
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Op
);
5327 DCI
.AddToWorklist(Op
.getNode());
5328 Op
= DAG
.getNode(ISD::BITCAST
, DL
, ResVT
, Op
);
5331 } else if ((Opcode
== ISD::SIGN_EXTEND_VECTOR_INREG
||
5332 Opcode
== ISD::ZERO_EXTEND_VECTOR_INREG
||
5333 Opcode
== ISD::ANY_EXTEND_VECTOR_INREG
) &&
5334 canTreatAsByteVector(Op
.getValueType()) &&
5335 canTreatAsByteVector(Op
.getOperand(0).getValueType())) {
5336 // Make sure that only the unextended bits are significant.
5337 EVT ExtVT
= Op
.getValueType();
5338 EVT OpVT
= Op
.getOperand(0).getValueType();
5339 unsigned ExtBytesPerElement
= ExtVT
.getVectorElementType().getStoreSize();
5340 unsigned OpBytesPerElement
= OpVT
.getVectorElementType().getStoreSize();
5341 unsigned Byte
= Index
* BytesPerElement
;
5342 unsigned SubByte
= Byte
% ExtBytesPerElement
;
5343 unsigned MinSubByte
= ExtBytesPerElement
- OpBytesPerElement
;
5344 if (SubByte
< MinSubByte
||
5345 SubByte
+ BytesPerElement
> ExtBytesPerElement
)
5347 // Get the byte offset of the unextended element
5348 Byte
= Byte
/ ExtBytesPerElement
* OpBytesPerElement
;
5349 // ...then add the byte offset relative to that element.
5350 Byte
+= SubByte
- MinSubByte
;
5351 if (Byte
% BytesPerElement
!= 0)
5353 Op
= Op
.getOperand(0);
5354 Index
= Byte
/ BytesPerElement
;
5360 if (Op
.getValueType() != VecVT
) {
5361 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VecVT
, Op
);
5362 DCI
.AddToWorklist(Op
.getNode());
5364 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, ResVT
, Op
,
5365 DAG
.getConstant(Index
, DL
, MVT::i32
));
5370 // Optimize vector operations in scalar value Op on the basis that Op
5371 // is truncated to TruncVT.
5372 SDValue
SystemZTargetLowering::combineTruncateExtract(
5373 const SDLoc
&DL
, EVT TruncVT
, SDValue Op
, DAGCombinerInfo
&DCI
) const {
5374 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
5375 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
5377 if (Op
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5378 TruncVT
.getSizeInBits() % 8 == 0) {
5379 SDValue Vec
= Op
.getOperand(0);
5380 EVT VecVT
= Vec
.getValueType();
5381 if (canTreatAsByteVector(VecVT
)) {
5382 if (auto *IndexN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
5383 unsigned BytesPerElement
= VecVT
.getVectorElementType().getStoreSize();
5384 unsigned TruncBytes
= TruncVT
.getStoreSize();
5385 if (BytesPerElement
% TruncBytes
== 0) {
5386 // Calculate the value of Y' in the above description. We are
5387 // splitting the original elements into Scale equal-sized pieces
5388 // and for truncation purposes want the last (least-significant)
5389 // of these pieces for IndexN. This is easiest to do by calculating
5390 // the start index of the following element and then subtracting 1.
5391 unsigned Scale
= BytesPerElement
/ TruncBytes
;
5392 unsigned NewIndex
= (IndexN
->getZExtValue() + 1) * Scale
- 1;
5394 // Defer the creation of the bitcast from X to combineExtract,
5395 // which might be able to optimize the extraction.
5396 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(TruncBytes
* 8),
5397 VecVT
.getStoreSize() / TruncBytes
);
5398 EVT ResVT
= (TruncBytes
< 4 ? MVT::i32
: TruncVT
);
5399 return combineExtract(DL
, ResVT
, VecVT
, Vec
, NewIndex
, DCI
, true);
5407 SDValue
SystemZTargetLowering::combineZERO_EXTEND(
5408 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5409 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
5410 SelectionDAG
&DAG
= DCI
.DAG
;
5411 SDValue N0
= N
->getOperand(0);
5412 EVT VT
= N
->getValueType(0);
5413 if (N0
.getOpcode() == SystemZISD::SELECT_CCMASK
) {
5414 auto *TrueOp
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(0));
5415 auto *FalseOp
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5416 if (TrueOp
&& FalseOp
) {
5418 SDValue Ops
[] = { DAG
.getConstant(TrueOp
->getZExtValue(), DL
, VT
),
5419 DAG
.getConstant(FalseOp
->getZExtValue(), DL
, VT
),
5420 N0
.getOperand(2), N0
.getOperand(3), N0
.getOperand(4) };
5421 SDValue NewSelect
= DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, VT
, Ops
);
5422 // If N0 has multiple uses, change other uses as well.
5423 if (!N0
.hasOneUse()) {
5424 SDValue TruncSelect
=
5425 DAG
.getNode(ISD::TRUNCATE
, DL
, N0
.getValueType(), NewSelect
);
5426 DCI
.CombineTo(N0
.getNode(), TruncSelect
);
5434 SDValue
SystemZTargetLowering::combineSIGN_EXTEND_INREG(
5435 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5436 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
5437 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
5438 // into (select_cc LHS, RHS, -1, 0, COND)
5439 SelectionDAG
&DAG
= DCI
.DAG
;
5440 SDValue N0
= N
->getOperand(0);
5441 EVT VT
= N
->getValueType(0);
5442 EVT EVT
= cast
<VTSDNode
>(N
->getOperand(1))->getVT();
5443 if (N0
.hasOneUse() && N0
.getOpcode() == ISD::ANY_EXTEND
)
5444 N0
= N0
.getOperand(0);
5445 if (EVT
== MVT::i1
&& N0
.hasOneUse() && N0
.getOpcode() == ISD::SETCC
) {
5447 SDValue Ops
[] = { N0
.getOperand(0), N0
.getOperand(1),
5448 DAG
.getConstant(-1, DL
, VT
), DAG
.getConstant(0, DL
, VT
),
5450 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, Ops
);
5455 SDValue
SystemZTargetLowering::combineSIGN_EXTEND(
5456 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5457 // Convert (sext (ashr (shl X, C1), C2)) to
5458 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
5459 // cheap as narrower ones.
5460 SelectionDAG
&DAG
= DCI
.DAG
;
5461 SDValue N0
= N
->getOperand(0);
5462 EVT VT
= N
->getValueType(0);
5463 if (N0
.hasOneUse() && N0
.getOpcode() == ISD::SRA
) {
5464 auto *SraAmt
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5465 SDValue Inner
= N0
.getOperand(0);
5466 if (SraAmt
&& Inner
.hasOneUse() && Inner
.getOpcode() == ISD::SHL
) {
5467 if (auto *ShlAmt
= dyn_cast
<ConstantSDNode
>(Inner
.getOperand(1))) {
5468 unsigned Extra
= (VT
.getSizeInBits() - N0
.getValueSizeInBits());
5469 unsigned NewShlAmt
= ShlAmt
->getZExtValue() + Extra
;
5470 unsigned NewSraAmt
= SraAmt
->getZExtValue() + Extra
;
5471 EVT ShiftVT
= N0
.getOperand(1).getValueType();
5472 SDValue Ext
= DAG
.getNode(ISD::ANY_EXTEND
, SDLoc(Inner
), VT
,
5473 Inner
.getOperand(0));
5474 SDValue Shl
= DAG
.getNode(ISD::SHL
, SDLoc(Inner
), VT
, Ext
,
5475 DAG
.getConstant(NewShlAmt
, SDLoc(Inner
),
5477 return DAG
.getNode(ISD::SRA
, SDLoc(N0
), VT
, Shl
,
5478 DAG
.getConstant(NewSraAmt
, SDLoc(N0
), ShiftVT
));
5485 SDValue
SystemZTargetLowering::combineMERGE(
5486 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5487 SelectionDAG
&DAG
= DCI
.DAG
;
5488 unsigned Opcode
= N
->getOpcode();
5489 SDValue Op0
= N
->getOperand(0);
5490 SDValue Op1
= N
->getOperand(1);
5491 if (Op0
.getOpcode() == ISD::BITCAST
)
5492 Op0
= Op0
.getOperand(0);
5493 if (ISD::isBuildVectorAllZeros(Op0
.getNode())) {
5494 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
5496 if (Op1
== N
->getOperand(0))
5498 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
5499 EVT VT
= Op1
.getValueType();
5500 unsigned ElemBytes
= VT
.getVectorElementType().getStoreSize();
5501 if (ElemBytes
<= 4) {
5502 Opcode
= (Opcode
== SystemZISD::MERGE_HIGH
?
5503 SystemZISD::UNPACKL_HIGH
: SystemZISD::UNPACKL_LOW
);
5504 EVT InVT
= VT
.changeVectorElementTypeToInteger();
5505 EVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(ElemBytes
* 16),
5506 SystemZ::VectorBytes
/ ElemBytes
/ 2);
5508 Op1
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), InVT
, Op1
);
5509 DCI
.AddToWorklist(Op1
.getNode());
5511 SDValue Op
= DAG
.getNode(Opcode
, SDLoc(N
), OutVT
, Op1
);
5512 DCI
.AddToWorklist(Op
.getNode());
5513 return DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VT
, Op
);
5519 SDValue
SystemZTargetLowering::combineLOAD(
5520 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5521 SelectionDAG
&DAG
= DCI
.DAG
;
5522 EVT LdVT
= N
->getValueType(0);
5523 if (LdVT
.isVector() || LdVT
.isInteger())
5525 // Transform a scalar load that is REPLICATEd as well as having other
5526 // use(s) to the form where the other use(s) use the first element of the
5527 // REPLICATE instead of the load. Otherwise instruction selection will not
5528 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
5532 SmallVector
<SDNode
*, 8> OtherUses
;
5533 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
5535 if (UI
->getOpcode() == SystemZISD::REPLICATE
) {
5537 return SDValue(); // Should never happen
5538 Replicate
= SDValue(*UI
, 0);
5540 else if (UI
.getUse().getResNo() == 0)
5541 OtherUses
.push_back(*UI
);
5543 if (!Replicate
|| OtherUses
.empty())
5547 SDValue Extract0
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, LdVT
,
5548 Replicate
, DAG
.getConstant(0, DL
, MVT::i32
));
5549 // Update uses of the loaded Value while preserving old chains.
5550 for (SDNode
*U
: OtherUses
) {
5551 SmallVector
<SDValue
, 8> Ops
;
5552 for (SDValue Op
: U
->ops())
5553 Ops
.push_back((Op
.getNode() == N
&& Op
.getResNo() == 0) ? Extract0
: Op
);
5554 DAG
.UpdateNodeOperands(U
, Ops
);
5556 return SDValue(N
, 0);
5559 bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT
) const {
5560 if (VT
== MVT::i16
|| VT
== MVT::i32
|| VT
== MVT::i64
)
5562 if (Subtarget
.hasVectorEnhancements2())
5563 if (VT
== MVT::v8i16
|| VT
== MVT::v4i32
|| VT
== MVT::v2i64
)
5568 static bool isVectorElementSwap(ArrayRef
<int> M
, EVT VT
) {
5569 if (!VT
.isVector() || !VT
.isSimple() ||
5570 VT
.getSizeInBits() != 128 ||
5571 VT
.getScalarSizeInBits() % 8 != 0)
5574 unsigned NumElts
= VT
.getVectorNumElements();
5575 for (unsigned i
= 0; i
< NumElts
; ++i
) {
5576 if (M
[i
] < 0) continue; // ignore UNDEF indices
5577 if ((unsigned) M
[i
] != NumElts
- 1 - i
)
5584 SDValue
SystemZTargetLowering::combineSTORE(
5585 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5586 SelectionDAG
&DAG
= DCI
.DAG
;
5587 auto *SN
= cast
<StoreSDNode
>(N
);
5588 auto &Op1
= N
->getOperand(1);
5589 EVT MemVT
= SN
->getMemoryVT();
5590 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
5591 // for the extraction to be done on a vMiN value, so that we can use VSTE.
5592 // If X has wider elements then convert it to:
5593 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
5594 if (MemVT
.isInteger() && SN
->isTruncatingStore()) {
5596 combineTruncateExtract(SDLoc(N
), MemVT
, SN
->getValue(), DCI
)) {
5597 DCI
.AddToWorklist(Value
.getNode());
5599 // Rewrite the store with the new form of stored value.
5600 return DAG
.getTruncStore(SN
->getChain(), SDLoc(SN
), Value
,
5601 SN
->getBasePtr(), SN
->getMemoryVT(),
5602 SN
->getMemOperand());
5605 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
5606 if (!SN
->isTruncatingStore() &&
5607 Op1
.getOpcode() == ISD::BSWAP
&&
5608 Op1
.getNode()->hasOneUse() &&
5609 canLoadStoreByteSwapped(Op1
.getValueType())) {
5611 SDValue BSwapOp
= Op1
.getOperand(0);
5613 if (BSwapOp
.getValueType() == MVT::i16
)
5614 BSwapOp
= DAG
.getNode(ISD::ANY_EXTEND
, SDLoc(N
), MVT::i32
, BSwapOp
);
5617 N
->getOperand(0), BSwapOp
, N
->getOperand(2)
5621 DAG
.getMemIntrinsicNode(SystemZISD::STRV
, SDLoc(N
), DAG
.getVTList(MVT::Other
),
5622 Ops
, MemVT
, SN
->getMemOperand());
5624 // Combine STORE (element-swap) into VSTER
5625 if (!SN
->isTruncatingStore() &&
5626 Op1
.getOpcode() == ISD::VECTOR_SHUFFLE
&&
5627 Op1
.getNode()->hasOneUse() &&
5628 Subtarget
.hasVectorEnhancements2()) {
5629 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(Op1
.getNode());
5630 ArrayRef
<int> ShuffleMask
= SVN
->getMask();
5631 if (isVectorElementSwap(ShuffleMask
, Op1
.getValueType())) {
5633 N
->getOperand(0), Op1
.getOperand(0), N
->getOperand(2)
5636 return DAG
.getMemIntrinsicNode(SystemZISD::VSTER
, SDLoc(N
),
5637 DAG
.getVTList(MVT::Other
),
5638 Ops
, MemVT
, SN
->getMemOperand());
5645 SDValue
SystemZTargetLowering::combineVECTOR_SHUFFLE(
5646 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5647 SelectionDAG
&DAG
= DCI
.DAG
;
5648 // Combine element-swap (LOAD) into VLER
5649 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
5650 N
->getOperand(0).hasOneUse() &&
5651 Subtarget
.hasVectorEnhancements2()) {
5652 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
);
5653 ArrayRef
<int> ShuffleMask
= SVN
->getMask();
5654 if (isVectorElementSwap(ShuffleMask
, N
->getValueType(0))) {
5655 SDValue Load
= N
->getOperand(0);
5656 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
5658 // Create the element-swapping load.
5660 LD
->getChain(), // Chain
5661 LD
->getBasePtr() // Ptr
5664 DAG
.getMemIntrinsicNode(SystemZISD::VLER
, SDLoc(N
),
5665 DAG
.getVTList(LD
->getValueType(0), MVT::Other
),
5666 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
5668 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
5669 // by the load dead.
5670 DCI
.CombineTo(N
, ESLoad
);
5672 // Next, combine the load away, we give it a bogus result value but a real
5673 // chain result. The result value is dead because the shuffle is dead.
5674 DCI
.CombineTo(Load
.getNode(), ESLoad
, ESLoad
.getValue(1));
5676 // Return N so it doesn't get rechecked!
5677 return SDValue(N
, 0);
5684 SDValue
SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
5685 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5686 SelectionDAG
&DAG
= DCI
.DAG
;
5688 if (!Subtarget
.hasVector())
5691 // Look through bitcasts that retain the number of vector elements.
5692 SDValue Op
= N
->getOperand(0);
5693 if (Op
.getOpcode() == ISD::BITCAST
&&
5694 Op
.getValueType().isVector() &&
5695 Op
.getOperand(0).getValueType().isVector() &&
5696 Op
.getValueType().getVectorNumElements() ==
5697 Op
.getOperand(0).getValueType().getVectorNumElements())
5698 Op
= Op
.getOperand(0);
5700 // Pull BSWAP out of a vector extraction.
5701 if (Op
.getOpcode() == ISD::BSWAP
&& Op
.hasOneUse()) {
5702 EVT VecVT
= Op
.getValueType();
5703 EVT EltVT
= VecVT
.getVectorElementType();
5704 Op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(N
), EltVT
,
5705 Op
.getOperand(0), N
->getOperand(1));
5706 DCI
.AddToWorklist(Op
.getNode());
5707 Op
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), EltVT
, Op
);
5708 if (EltVT
!= N
->getValueType(0)) {
5709 DCI
.AddToWorklist(Op
.getNode());
5710 Op
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), N
->getValueType(0), Op
);
5715 // Try to simplify a vector extraction.
5716 if (auto *IndexN
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
5717 SDValue Op0
= N
->getOperand(0);
5718 EVT VecVT
= Op0
.getValueType();
5719 return combineExtract(SDLoc(N
), N
->getValueType(0), VecVT
, Op0
,
5720 IndexN
->getZExtValue(), DCI
, false);
5725 SDValue
SystemZTargetLowering::combineJOIN_DWORDS(
5726 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5727 SelectionDAG
&DAG
= DCI
.DAG
;
5728 // (join_dwords X, X) == (replicate X)
5729 if (N
->getOperand(0) == N
->getOperand(1))
5730 return DAG
.getNode(SystemZISD::REPLICATE
, SDLoc(N
), N
->getValueType(0),
5735 SDValue
SystemZTargetLowering::combineFP_ROUND(
5736 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5738 if (!Subtarget
.hasVector())
5741 // (fpround (extract_vector_elt X 0))
5742 // (fpround (extract_vector_elt X 1)) ->
5743 // (extract_vector_elt (VROUND X) 0)
5744 // (extract_vector_elt (VROUND X) 2)
5746 // This is a special case since the target doesn't really support v2f32s.
5747 SelectionDAG
&DAG
= DCI
.DAG
;
5748 SDValue Op0
= N
->getOperand(0);
5749 if (N
->getValueType(0) == MVT::f32
&&
5751 Op0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5752 Op0
.getOperand(0).getValueType() == MVT::v2f64
&&
5753 Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
5754 cast
<ConstantSDNode
>(Op0
.getOperand(1))->getZExtValue() == 0) {
5755 SDValue Vec
= Op0
.getOperand(0);
5756 for (auto *U
: Vec
->uses()) {
5757 if (U
!= Op0
.getNode() &&
5759 U
->getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5760 U
->getOperand(0) == Vec
&&
5761 U
->getOperand(1).getOpcode() == ISD::Constant
&&
5762 cast
<ConstantSDNode
>(U
->getOperand(1))->getZExtValue() == 1) {
5763 SDValue OtherRound
= SDValue(*U
->use_begin(), 0);
5764 if (OtherRound
.getOpcode() == ISD::FP_ROUND
&&
5765 OtherRound
.getOperand(0) == SDValue(U
, 0) &&
5766 OtherRound
.getValueType() == MVT::f32
) {
5767 SDValue VRound
= DAG
.getNode(SystemZISD::VROUND
, SDLoc(N
),
5769 DCI
.AddToWorklist(VRound
.getNode());
5771 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(U
), MVT::f32
,
5772 VRound
, DAG
.getConstant(2, SDLoc(U
), MVT::i32
));
5773 DCI
.AddToWorklist(Extract1
.getNode());
5774 DAG
.ReplaceAllUsesOfValueWith(OtherRound
, Extract1
);
5776 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op0
), MVT::f32
,
5777 VRound
, DAG
.getConstant(0, SDLoc(Op0
), MVT::i32
));
5786 SDValue
SystemZTargetLowering::combineFP_EXTEND(
5787 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5789 if (!Subtarget
.hasVector())
5792 // (fpextend (extract_vector_elt X 0))
5793 // (fpextend (extract_vector_elt X 2)) ->
5794 // (extract_vector_elt (VEXTEND X) 0)
5795 // (extract_vector_elt (VEXTEND X) 1)
5797 // This is a special case since the target doesn't really support v2f32s.
5798 SelectionDAG
&DAG
= DCI
.DAG
;
5799 SDValue Op0
= N
->getOperand(0);
5800 if (N
->getValueType(0) == MVT::f64
&&
5802 Op0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5803 Op0
.getOperand(0).getValueType() == MVT::v4f32
&&
5804 Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
5805 cast
<ConstantSDNode
>(Op0
.getOperand(1))->getZExtValue() == 0) {
5806 SDValue Vec
= Op0
.getOperand(0);
5807 for (auto *U
: Vec
->uses()) {
5808 if (U
!= Op0
.getNode() &&
5810 U
->getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5811 U
->getOperand(0) == Vec
&&
5812 U
->getOperand(1).getOpcode() == ISD::Constant
&&
5813 cast
<ConstantSDNode
>(U
->getOperand(1))->getZExtValue() == 2) {
5814 SDValue OtherExtend
= SDValue(*U
->use_begin(), 0);
5815 if (OtherExtend
.getOpcode() == ISD::FP_EXTEND
&&
5816 OtherExtend
.getOperand(0) == SDValue(U
, 0) &&
5817 OtherExtend
.getValueType() == MVT::f64
) {
5818 SDValue VExtend
= DAG
.getNode(SystemZISD::VEXTEND
, SDLoc(N
),
5820 DCI
.AddToWorklist(VExtend
.getNode());
5822 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(U
), MVT::f64
,
5823 VExtend
, DAG
.getConstant(1, SDLoc(U
), MVT::i32
));
5824 DCI
.AddToWorklist(Extract1
.getNode());
5825 DAG
.ReplaceAllUsesOfValueWith(OtherExtend
, Extract1
);
5827 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op0
), MVT::f64
,
5828 VExtend
, DAG
.getConstant(0, SDLoc(Op0
), MVT::i32
));
5837 SDValue
SystemZTargetLowering::combineBSWAP(
5838 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5839 SelectionDAG
&DAG
= DCI
.DAG
;
5840 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
5841 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
5842 N
->getOperand(0).hasOneUse() &&
5843 canLoadStoreByteSwapped(N
->getValueType(0))) {
5844 SDValue Load
= N
->getOperand(0);
5845 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
5847 // Create the byte-swapping load.
5849 LD
->getChain(), // Chain
5850 LD
->getBasePtr() // Ptr
5852 EVT LoadVT
= N
->getValueType(0);
5853 if (LoadVT
== MVT::i16
)
5856 DAG
.getMemIntrinsicNode(SystemZISD::LRV
, SDLoc(N
),
5857 DAG
.getVTList(LoadVT
, MVT::Other
),
5858 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
5860 // If this is an i16 load, insert the truncate.
5861 SDValue ResVal
= BSLoad
;
5862 if (N
->getValueType(0) == MVT::i16
)
5863 ResVal
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), MVT::i16
, BSLoad
);
5865 // First, combine the bswap away. This makes the value produced by the
5867 DCI
.CombineTo(N
, ResVal
);
5869 // Next, combine the load away, we give it a bogus result value but a real
5870 // chain result. The result value is dead because the bswap is dead.
5871 DCI
.CombineTo(Load
.getNode(), ResVal
, BSLoad
.getValue(1));
5873 // Return N so it doesn't get rechecked!
5874 return SDValue(N
, 0);
5877 // Look through bitcasts that retain the number of vector elements.
5878 SDValue Op
= N
->getOperand(0);
5879 if (Op
.getOpcode() == ISD::BITCAST
&&
5880 Op
.getValueType().isVector() &&
5881 Op
.getOperand(0).getValueType().isVector() &&
5882 Op
.getValueType().getVectorNumElements() ==
5883 Op
.getOperand(0).getValueType().getVectorNumElements())
5884 Op
= Op
.getOperand(0);
5886 // Push BSWAP into a vector insertion if at least one side then simplifies.
5887 if (Op
.getOpcode() == ISD::INSERT_VECTOR_ELT
&& Op
.hasOneUse()) {
5888 SDValue Vec
= Op
.getOperand(0);
5889 SDValue Elt
= Op
.getOperand(1);
5890 SDValue Idx
= Op
.getOperand(2);
5892 if (DAG
.isConstantIntBuildVectorOrConstantInt(Vec
) ||
5893 Vec
.getOpcode() == ISD::BSWAP
|| Vec
.isUndef() ||
5894 DAG
.isConstantIntBuildVectorOrConstantInt(Elt
) ||
5895 Elt
.getOpcode() == ISD::BSWAP
|| Elt
.isUndef() ||
5896 (canLoadStoreByteSwapped(N
->getValueType(0)) &&
5897 ISD::isNON_EXTLoad(Elt
.getNode()) && Elt
.hasOneUse())) {
5898 EVT VecVT
= N
->getValueType(0);
5899 EVT EltVT
= N
->getValueType(0).getVectorElementType();
5900 if (VecVT
!= Vec
.getValueType()) {
5901 Vec
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VecVT
, Vec
);
5902 DCI
.AddToWorklist(Vec
.getNode());
5904 if (EltVT
!= Elt
.getValueType()) {
5905 Elt
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), EltVT
, Elt
);
5906 DCI
.AddToWorklist(Elt
.getNode());
5908 Vec
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VecVT
, Vec
);
5909 DCI
.AddToWorklist(Vec
.getNode());
5910 Elt
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), EltVT
, Elt
);
5911 DCI
.AddToWorklist(Elt
.getNode());
5912 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(N
), VecVT
,
5917 // Push BSWAP into a vector shuffle if at least one side then simplifies.
5918 ShuffleVectorSDNode
*SV
= dyn_cast
<ShuffleVectorSDNode
>(Op
);
5919 if (SV
&& Op
.hasOneUse()) {
5920 SDValue Op0
= Op
.getOperand(0);
5921 SDValue Op1
= Op
.getOperand(1);
5923 if (DAG
.isConstantIntBuildVectorOrConstantInt(Op0
) ||
5924 Op0
.getOpcode() == ISD::BSWAP
|| Op0
.isUndef() ||
5925 DAG
.isConstantIntBuildVectorOrConstantInt(Op1
) ||
5926 Op1
.getOpcode() == ISD::BSWAP
|| Op1
.isUndef()) {
5927 EVT VecVT
= N
->getValueType(0);
5928 if (VecVT
!= Op0
.getValueType()) {
5929 Op0
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VecVT
, Op0
);
5930 DCI
.AddToWorklist(Op0
.getNode());
5932 if (VecVT
!= Op1
.getValueType()) {
5933 Op1
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VecVT
, Op1
);
5934 DCI
.AddToWorklist(Op1
.getNode());
5936 Op0
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VecVT
, Op0
);
5937 DCI
.AddToWorklist(Op0
.getNode());
5938 Op1
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VecVT
, Op1
);
5939 DCI
.AddToWorklist(Op1
.getNode());
5940 return DAG
.getVectorShuffle(VecVT
, SDLoc(N
), Op0
, Op1
, SV
->getMask());
5947 static bool combineCCMask(SDValue
&CCReg
, int &CCValid
, int &CCMask
) {
5948 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
5949 // set by the CCReg instruction using the CCValid / CCMask masks,
5950 // If the CCReg instruction is itself a ICMP testing the condition
5951 // code set by some other instruction, see whether we can directly
5952 // use that condition code.
5954 // Verify that we have an ICMP against some constant.
5955 if (CCValid
!= SystemZ::CCMASK_ICMP
)
5957 auto *ICmp
= CCReg
.getNode();
5958 if (ICmp
->getOpcode() != SystemZISD::ICMP
)
5960 auto *CompareLHS
= ICmp
->getOperand(0).getNode();
5961 auto *CompareRHS
= dyn_cast
<ConstantSDNode
>(ICmp
->getOperand(1));
5965 // Optimize the case where CompareLHS is a SELECT_CCMASK.
5966 if (CompareLHS
->getOpcode() == SystemZISD::SELECT_CCMASK
) {
5967 // Verify that we have an appropriate mask for a EQ or NE comparison.
5968 bool Invert
= false;
5969 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
5971 else if (CCMask
!= SystemZ::CCMASK_CMP_EQ
)
5974 // Verify that the ICMP compares against one of select values.
5975 auto *TrueVal
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(0));
5978 auto *FalseVal
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(1));
5981 if (CompareRHS
->getZExtValue() == FalseVal
->getZExtValue())
5983 else if (CompareRHS
->getZExtValue() != TrueVal
->getZExtValue())
5986 // Compute the effective CC mask for the new branch or select.
5987 auto *NewCCValid
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(2));
5988 auto *NewCCMask
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(3));
5989 if (!NewCCValid
|| !NewCCMask
)
5991 CCValid
= NewCCValid
->getZExtValue();
5992 CCMask
= NewCCMask
->getZExtValue();
5996 // Return the updated CCReg link.
5997 CCReg
= CompareLHS
->getOperand(4);
6001 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
6002 if (CompareLHS
->getOpcode() == ISD::SRA
) {
6003 auto *SRACount
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(1));
6004 if (!SRACount
|| SRACount
->getZExtValue() != 30)
6006 auto *SHL
= CompareLHS
->getOperand(0).getNode();
6007 if (SHL
->getOpcode() != ISD::SHL
)
6009 auto *SHLCount
= dyn_cast
<ConstantSDNode
>(SHL
->getOperand(1));
6010 if (!SHLCount
|| SHLCount
->getZExtValue() != 30 - SystemZ::IPM_CC
)
6012 auto *IPM
= SHL
->getOperand(0).getNode();
6013 if (IPM
->getOpcode() != SystemZISD::IPM
)
6016 // Avoid introducing CC spills (because SRA would clobber CC).
6017 if (!CompareLHS
->hasOneUse())
6019 // Verify that the ICMP compares against zero.
6020 if (CompareRHS
->getZExtValue() != 0)
6023 // Compute the effective CC mask for the new branch or select.
6025 case SystemZ::CCMASK_CMP_EQ
: break;
6026 case SystemZ::CCMASK_CMP_NE
: break;
6027 case SystemZ::CCMASK_CMP_LT
: CCMask
= SystemZ::CCMASK_CMP_GT
; break;
6028 case SystemZ::CCMASK_CMP_GT
: CCMask
= SystemZ::CCMASK_CMP_LT
; break;
6029 case SystemZ::CCMASK_CMP_LE
: CCMask
= SystemZ::CCMASK_CMP_GE
; break;
6030 case SystemZ::CCMASK_CMP_GE
: CCMask
= SystemZ::CCMASK_CMP_LE
; break;
6031 default: return false;
6034 // Return the updated CCReg link.
6035 CCReg
= IPM
->getOperand(0);
6042 SDValue
SystemZTargetLowering::combineBR_CCMASK(
6043 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6044 SelectionDAG
&DAG
= DCI
.DAG
;
6046 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
6047 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
6048 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
6049 if (!CCValid
|| !CCMask
)
6052 int CCValidVal
= CCValid
->getZExtValue();
6053 int CCMaskVal
= CCMask
->getZExtValue();
6054 SDValue Chain
= N
->getOperand(0);
6055 SDValue CCReg
= N
->getOperand(4);
6057 if (combineCCMask(CCReg
, CCValidVal
, CCMaskVal
))
6058 return DAG
.getNode(SystemZISD::BR_CCMASK
, SDLoc(N
), N
->getValueType(0),
6060 DAG
.getTargetConstant(CCValidVal
, SDLoc(N
), MVT::i32
),
6061 DAG
.getTargetConstant(CCMaskVal
, SDLoc(N
), MVT::i32
),
6062 N
->getOperand(3), CCReg
);
6066 SDValue
SystemZTargetLowering::combineSELECT_CCMASK(
6067 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6068 SelectionDAG
&DAG
= DCI
.DAG
;
6070 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
6071 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
6072 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3));
6073 if (!CCValid
|| !CCMask
)
6076 int CCValidVal
= CCValid
->getZExtValue();
6077 int CCMaskVal
= CCMask
->getZExtValue();
6078 SDValue CCReg
= N
->getOperand(4);
6080 if (combineCCMask(CCReg
, CCValidVal
, CCMaskVal
))
6081 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, SDLoc(N
), N
->getValueType(0),
6082 N
->getOperand(0), N
->getOperand(1),
6083 DAG
.getTargetConstant(CCValidVal
, SDLoc(N
), MVT::i32
),
6084 DAG
.getTargetConstant(CCMaskVal
, SDLoc(N
), MVT::i32
),
6090 SDValue
SystemZTargetLowering::combineGET_CCMASK(
6091 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6093 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
6094 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
6095 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
6096 if (!CCValid
|| !CCMask
)
6098 int CCValidVal
= CCValid
->getZExtValue();
6099 int CCMaskVal
= CCMask
->getZExtValue();
6101 SDValue Select
= N
->getOperand(0);
6102 if (Select
->getOpcode() != SystemZISD::SELECT_CCMASK
)
6105 auto *SelectCCValid
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(2));
6106 auto *SelectCCMask
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(3));
6107 if (!SelectCCValid
|| !SelectCCMask
)
6109 int SelectCCValidVal
= SelectCCValid
->getZExtValue();
6110 int SelectCCMaskVal
= SelectCCMask
->getZExtValue();
6112 auto *TrueVal
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(0));
6113 auto *FalseVal
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(1));
6114 if (!TrueVal
|| !FalseVal
)
6116 if (TrueVal
->getZExtValue() != 0 && FalseVal
->getZExtValue() == 0)
6118 else if (TrueVal
->getZExtValue() == 0 && FalseVal
->getZExtValue() != 0)
6119 SelectCCMaskVal
^= SelectCCValidVal
;
6123 if (SelectCCValidVal
& ~CCValidVal
)
6125 if (SelectCCMaskVal
!= (CCMaskVal
& SelectCCValidVal
))
6128 return Select
->getOperand(4);
6131 SDValue
SystemZTargetLowering::combineIntDIVREM(
6132 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6133 SelectionDAG
&DAG
= DCI
.DAG
;
6134 EVT VT
= N
->getValueType(0);
6135 // In the case where the divisor is a vector of constants a cheaper
6136 // sequence of instructions can replace the divide. BuildSDIV is called to
6137 // do this during DAG combining, but it only succeeds when it can build a
6138 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
6139 // since it is not Legal but Custom it can only happen before
6140 // legalization. Therefore we must scalarize this early before Combine
6141 // 1. For widened vectors, this is already the result of type legalization.
6142 if (DCI
.Level
== BeforeLegalizeTypes
&& VT
.isVector() && isTypeLegal(VT
) &&
6143 DAG
.isConstantIntBuildVectorOrConstantInt(N
->getOperand(1)))
6144 return DAG
.UnrollVectorOp(N
);
6148 SDValue
SystemZTargetLowering::unwrapAddress(SDValue N
) const {
6149 if (N
->getOpcode() == SystemZISD::PCREL_WRAPPER
)
6150 return N
->getOperand(0);
6154 SDValue
SystemZTargetLowering::PerformDAGCombine(SDNode
*N
,
6155 DAGCombinerInfo
&DCI
) const {
6156 switch(N
->getOpcode()) {
6158 case ISD::ZERO_EXTEND
: return combineZERO_EXTEND(N
, DCI
);
6159 case ISD::SIGN_EXTEND
: return combineSIGN_EXTEND(N
, DCI
);
6160 case ISD::SIGN_EXTEND_INREG
: return combineSIGN_EXTEND_INREG(N
, DCI
);
6161 case SystemZISD::MERGE_HIGH
:
6162 case SystemZISD::MERGE_LOW
: return combineMERGE(N
, DCI
);
6163 case ISD::LOAD
: return combineLOAD(N
, DCI
);
6164 case ISD::STORE
: return combineSTORE(N
, DCI
);
6165 case ISD::VECTOR_SHUFFLE
: return combineVECTOR_SHUFFLE(N
, DCI
);
6166 case ISD::EXTRACT_VECTOR_ELT
: return combineEXTRACT_VECTOR_ELT(N
, DCI
);
6167 case SystemZISD::JOIN_DWORDS
: return combineJOIN_DWORDS(N
, DCI
);
6168 case ISD::FP_ROUND
: return combineFP_ROUND(N
, DCI
);
6169 case ISD::FP_EXTEND
: return combineFP_EXTEND(N
, DCI
);
6170 case ISD::BSWAP
: return combineBSWAP(N
, DCI
);
6171 case SystemZISD::BR_CCMASK
: return combineBR_CCMASK(N
, DCI
);
6172 case SystemZISD::SELECT_CCMASK
: return combineSELECT_CCMASK(N
, DCI
);
6173 case SystemZISD::GET_CCMASK
: return combineGET_CCMASK(N
, DCI
);
6177 case ISD::UREM
: return combineIntDIVREM(N
, DCI
);
6183 // Return the demanded elements for the OpNo source operand of Op. DemandedElts
6185 static APInt
getDemandedSrcElements(SDValue Op
, const APInt
&DemandedElts
,
6187 EVT VT
= Op
.getValueType();
6188 unsigned NumElts
= (VT
.isVector() ? VT
.getVectorNumElements() : 1);
6190 unsigned Opcode
= Op
.getOpcode();
6191 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
6192 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6194 case Intrinsic::s390_vpksh
: // PACKS
6195 case Intrinsic::s390_vpksf
:
6196 case Intrinsic::s390_vpksg
:
6197 case Intrinsic::s390_vpkshs
: // PACKS_CC
6198 case Intrinsic::s390_vpksfs
:
6199 case Intrinsic::s390_vpksgs
:
6200 case Intrinsic::s390_vpklsh
: // PACKLS
6201 case Intrinsic::s390_vpklsf
:
6202 case Intrinsic::s390_vpklsg
:
6203 case Intrinsic::s390_vpklshs
: // PACKLS_CC
6204 case Intrinsic::s390_vpklsfs
:
6205 case Intrinsic::s390_vpklsgs
:
6206 // VECTOR PACK truncates the elements of two source vectors into one.
6207 SrcDemE
= DemandedElts
;
6209 SrcDemE
.lshrInPlace(NumElts
/ 2);
6210 SrcDemE
= SrcDemE
.trunc(NumElts
/ 2);
6212 // VECTOR UNPACK extends half the elements of the source vector.
6213 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
6214 case Intrinsic::s390_vuphh
:
6215 case Intrinsic::s390_vuphf
:
6216 case Intrinsic::s390_vuplhb
: // VECTOR UNPACK LOGICAL HIGH
6217 case Intrinsic::s390_vuplhh
:
6218 case Intrinsic::s390_vuplhf
:
6219 SrcDemE
= APInt(NumElts
* 2, 0);
6220 SrcDemE
.insertBits(DemandedElts
, 0);
6222 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
6223 case Intrinsic::s390_vuplhw
:
6224 case Intrinsic::s390_vuplf
:
6225 case Intrinsic::s390_vupllb
: // VECTOR UNPACK LOGICAL LOW
6226 case Intrinsic::s390_vupllh
:
6227 case Intrinsic::s390_vupllf
:
6228 SrcDemE
= APInt(NumElts
* 2, 0);
6229 SrcDemE
.insertBits(DemandedElts
, NumElts
);
6231 case Intrinsic::s390_vpdi
: {
6232 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
6233 SrcDemE
= APInt(NumElts
, 0);
6234 if (!DemandedElts
[OpNo
- 1])
6236 unsigned Mask
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
6237 unsigned MaskBit
= ((OpNo
- 1) ? 1 : 4);
6238 // Demand input element 0 or 1, given by the mask bit value.
6239 SrcDemE
.setBit((Mask
& MaskBit
)? 1 : 0);
6242 case Intrinsic::s390_vsldb
: {
6243 // VECTOR SHIFT LEFT DOUBLE BY BYTE
6244 assert(VT
== MVT::v16i8
&& "Unexpected type.");
6245 unsigned FirstIdx
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
6246 assert (FirstIdx
> 0 && FirstIdx
< 16 && "Unused operand.");
6247 unsigned NumSrc0Els
= 16 - FirstIdx
;
6248 SrcDemE
= APInt(NumElts
, 0);
6250 APInt DemEls
= DemandedElts
.trunc(NumSrc0Els
);
6251 SrcDemE
.insertBits(DemEls
, FirstIdx
);
6253 APInt DemEls
= DemandedElts
.lshr(NumSrc0Els
);
6254 SrcDemE
.insertBits(DemEls
, 0);
6258 case Intrinsic::s390_vperm
:
6259 SrcDemE
= APInt(NumElts
, 1);
6262 llvm_unreachable("Unhandled intrinsic.");
6267 case SystemZISD::JOIN_DWORDS
:
6269 SrcDemE
= APInt(1, 1);
6271 case SystemZISD::SELECT_CCMASK
:
6272 SrcDemE
= DemandedElts
;
6275 llvm_unreachable("Unhandled opcode.");
6282 static void computeKnownBitsBinOp(const SDValue Op
, KnownBits
&Known
,
6283 const APInt
&DemandedElts
,
6284 const SelectionDAG
&DAG
, unsigned Depth
,
6286 APInt Src0DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
);
6287 APInt Src1DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
+ 1);
6288 KnownBits LHSKnown
=
6289 DAG
.computeKnownBits(Op
.getOperand(OpNo
), Src0DemE
, Depth
+ 1);
6290 KnownBits RHSKnown
=
6291 DAG
.computeKnownBits(Op
.getOperand(OpNo
+ 1), Src1DemE
, Depth
+ 1);
6292 Known
.Zero
= LHSKnown
.Zero
& RHSKnown
.Zero
;
6293 Known
.One
= LHSKnown
.One
& RHSKnown
.One
;
6297 SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
6299 const APInt
&DemandedElts
,
6300 const SelectionDAG
&DAG
,
6301 unsigned Depth
) const {
6304 // Intrinsic CC result is returned in the two low bits.
6305 unsigned tmp0
, tmp1
; // not used
6306 if (Op
.getResNo() == 1 && isIntrinsicWithCC(Op
, tmp0
, tmp1
)) {
6307 Known
.Zero
.setBitsFrom(2);
6310 EVT VT
= Op
.getValueType();
6311 if (Op
.getResNo() != 0 || VT
== MVT::Untyped
)
6313 assert (Known
.getBitWidth() == VT
.getScalarSizeInBits() &&
6314 "KnownBits does not match VT in bitwidth");
6315 assert ((!VT
.isVector() ||
6316 (DemandedElts
.getBitWidth() == VT
.getVectorNumElements())) &&
6317 "DemandedElts does not match VT number of elements");
6318 unsigned BitWidth
= Known
.getBitWidth();
6319 unsigned Opcode
= Op
.getOpcode();
6320 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
6321 bool IsLogical
= false;
6322 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6324 case Intrinsic::s390_vpksh
: // PACKS
6325 case Intrinsic::s390_vpksf
:
6326 case Intrinsic::s390_vpksg
:
6327 case Intrinsic::s390_vpkshs
: // PACKS_CC
6328 case Intrinsic::s390_vpksfs
:
6329 case Intrinsic::s390_vpksgs
:
6330 case Intrinsic::s390_vpklsh
: // PACKLS
6331 case Intrinsic::s390_vpklsf
:
6332 case Intrinsic::s390_vpklsg
:
6333 case Intrinsic::s390_vpklshs
: // PACKLS_CC
6334 case Intrinsic::s390_vpklsfs
:
6335 case Intrinsic::s390_vpklsgs
:
6336 case Intrinsic::s390_vpdi
:
6337 case Intrinsic::s390_vsldb
:
6338 case Intrinsic::s390_vperm
:
6339 computeKnownBitsBinOp(Op
, Known
, DemandedElts
, DAG
, Depth
, 1);
6341 case Intrinsic::s390_vuplhb
: // VECTOR UNPACK LOGICAL HIGH
6342 case Intrinsic::s390_vuplhh
:
6343 case Intrinsic::s390_vuplhf
:
6344 case Intrinsic::s390_vupllb
: // VECTOR UNPACK LOGICAL LOW
6345 case Intrinsic::s390_vupllh
:
6346 case Intrinsic::s390_vupllf
:
6349 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
6350 case Intrinsic::s390_vuphh
:
6351 case Intrinsic::s390_vuphf
:
6352 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
6353 case Intrinsic::s390_vuplhw
:
6354 case Intrinsic::s390_vuplf
: {
6355 SDValue SrcOp
= Op
.getOperand(1);
6356 APInt SrcDemE
= getDemandedSrcElements(Op
, DemandedElts
, 0);
6357 Known
= DAG
.computeKnownBits(SrcOp
, SrcDemE
, Depth
+ 1);
6359 Known
= Known
.zext(BitWidth
, true);
6361 Known
= Known
.sext(BitWidth
);
6369 case SystemZISD::JOIN_DWORDS
:
6370 case SystemZISD::SELECT_CCMASK
:
6371 computeKnownBitsBinOp(Op
, Known
, DemandedElts
, DAG
, Depth
, 0);
6373 case SystemZISD::REPLICATE
: {
6374 SDValue SrcOp
= Op
.getOperand(0);
6375 Known
= DAG
.computeKnownBits(SrcOp
, Depth
+ 1);
6376 if (Known
.getBitWidth() < BitWidth
&& isa
<ConstantSDNode
>(SrcOp
))
6377 Known
= Known
.sext(BitWidth
); // VREPI sign extends the immedate.
6385 // Known has the width of the source operand(s). Adjust if needed to match
6386 // the passed bitwidth.
6387 if (Known
.getBitWidth() != BitWidth
)
6388 Known
= Known
.zextOrTrunc(BitWidth
, false);
6391 static unsigned computeNumSignBitsBinOp(SDValue Op
, const APInt
&DemandedElts
,
6392 const SelectionDAG
&DAG
, unsigned Depth
,
6394 APInt Src0DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
);
6395 unsigned LHS
= DAG
.ComputeNumSignBits(Op
.getOperand(OpNo
), Src0DemE
, Depth
+ 1);
6396 if (LHS
== 1) return 1; // Early out.
6397 APInt Src1DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
+ 1);
6398 unsigned RHS
= DAG
.ComputeNumSignBits(Op
.getOperand(OpNo
+ 1), Src1DemE
, Depth
+ 1);
6399 if (RHS
== 1) return 1; // Early out.
6400 unsigned Common
= std::min(LHS
, RHS
);
6401 unsigned SrcBitWidth
= Op
.getOperand(OpNo
).getScalarValueSizeInBits();
6402 EVT VT
= Op
.getValueType();
6403 unsigned VTBits
= VT
.getScalarSizeInBits();
6404 if (SrcBitWidth
> VTBits
) { // PACK
6405 unsigned SrcExtraBits
= SrcBitWidth
- VTBits
;
6406 if (Common
> SrcExtraBits
)
6407 return (Common
- SrcExtraBits
);
6410 assert (SrcBitWidth
== VTBits
&& "Expected operands of same bitwidth.");
6415 SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
6416 SDValue Op
, const APInt
&DemandedElts
, const SelectionDAG
&DAG
,
6417 unsigned Depth
) const {
6418 if (Op
.getResNo() != 0)
6420 unsigned Opcode
= Op
.getOpcode();
6421 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
6422 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6424 case Intrinsic::s390_vpksh
: // PACKS
6425 case Intrinsic::s390_vpksf
:
6426 case Intrinsic::s390_vpksg
:
6427 case Intrinsic::s390_vpkshs
: // PACKS_CC
6428 case Intrinsic::s390_vpksfs
:
6429 case Intrinsic::s390_vpksgs
:
6430 case Intrinsic::s390_vpklsh
: // PACKLS
6431 case Intrinsic::s390_vpklsf
:
6432 case Intrinsic::s390_vpklsg
:
6433 case Intrinsic::s390_vpklshs
: // PACKLS_CC
6434 case Intrinsic::s390_vpklsfs
:
6435 case Intrinsic::s390_vpklsgs
:
6436 case Intrinsic::s390_vpdi
:
6437 case Intrinsic::s390_vsldb
:
6438 case Intrinsic::s390_vperm
:
6439 return computeNumSignBitsBinOp(Op
, DemandedElts
, DAG
, Depth
, 1);
6440 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
6441 case Intrinsic::s390_vuphh
:
6442 case Intrinsic::s390_vuphf
:
6443 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
6444 case Intrinsic::s390_vuplhw
:
6445 case Intrinsic::s390_vuplf
: {
6446 SDValue PackedOp
= Op
.getOperand(1);
6447 APInt SrcDemE
= getDemandedSrcElements(Op
, DemandedElts
, 1);
6448 unsigned Tmp
= DAG
.ComputeNumSignBits(PackedOp
, SrcDemE
, Depth
+ 1);
6449 EVT VT
= Op
.getValueType();
6450 unsigned VTBits
= VT
.getScalarSizeInBits();
6451 Tmp
+= VTBits
- PackedOp
.getScalarValueSizeInBits();
6459 case SystemZISD::SELECT_CCMASK
:
6460 return computeNumSignBitsBinOp(Op
, DemandedElts
, DAG
, Depth
, 0);
6469 //===----------------------------------------------------------------------===//
6471 //===----------------------------------------------------------------------===//
6473 // Create a new basic block after MBB.
6474 static MachineBasicBlock
*emitBlockAfter(MachineBasicBlock
*MBB
) {
6475 MachineFunction
&MF
= *MBB
->getParent();
6476 MachineBasicBlock
*NewMBB
= MF
.CreateMachineBasicBlock(MBB
->getBasicBlock());
6477 MF
.insert(std::next(MachineFunction::iterator(MBB
)), NewMBB
);
6481 // Split MBB after MI and return the new block (the one that contains
6482 // instructions after MI).
6483 static MachineBasicBlock
*splitBlockAfter(MachineBasicBlock::iterator MI
,
6484 MachineBasicBlock
*MBB
) {
6485 MachineBasicBlock
*NewMBB
= emitBlockAfter(MBB
);
6486 NewMBB
->splice(NewMBB
->begin(), MBB
,
6487 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
6488 NewMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
6492 // Split MBB before MI and return the new block (the one that contains MI).
6493 static MachineBasicBlock
*splitBlockBefore(MachineBasicBlock::iterator MI
,
6494 MachineBasicBlock
*MBB
) {
6495 MachineBasicBlock
*NewMBB
= emitBlockAfter(MBB
);
6496 NewMBB
->splice(NewMBB
->begin(), MBB
, MI
, MBB
->end());
6497 NewMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
6501 // Force base value Base into a register before MI. Return the register.
6502 static Register
forceReg(MachineInstr
&MI
, MachineOperand
&Base
,
6503 const SystemZInstrInfo
*TII
) {
6505 return Base
.getReg();
6507 MachineBasicBlock
*MBB
= MI
.getParent();
6508 MachineFunction
&MF
= *MBB
->getParent();
6509 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6511 Register Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
6512 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LA
), Reg
)
6519 // The CC operand of MI might be missing a kill marker because there
6520 // were multiple uses of CC, and ISel didn't know which to mark.
6521 // Figure out whether MI should have had a kill marker.
6522 static bool checkCCKill(MachineInstr
&MI
, MachineBasicBlock
*MBB
) {
6523 // Scan forward through BB for a use/def of CC.
6524 MachineBasicBlock::iterator
miI(std::next(MachineBasicBlock::iterator(MI
)));
6525 for (MachineBasicBlock::iterator miE
= MBB
->end(); miI
!= miE
; ++miI
) {
6526 const MachineInstr
& mi
= *miI
;
6527 if (mi
.readsRegister(SystemZ::CC
))
6529 if (mi
.definesRegister(SystemZ::CC
))
6530 break; // Should have kill-flag - update below.
6533 // If we hit the end of the block, check whether CC is live into a
6535 if (miI
== MBB
->end()) {
6536 for (auto SI
= MBB
->succ_begin(), SE
= MBB
->succ_end(); SI
!= SE
; ++SI
)
6537 if ((*SI
)->isLiveIn(SystemZ::CC
))
6544 // Return true if it is OK for this Select pseudo-opcode to be cascaded
6545 // together with other Select pseudo-opcodes into a single basic-block with
6546 // a conditional jump around it.
6547 static bool isSelectPseudo(MachineInstr
&MI
) {
6548 switch (MI
.getOpcode()) {
6549 case SystemZ::Select32
:
6550 case SystemZ::Select64
:
6551 case SystemZ::SelectF32
:
6552 case SystemZ::SelectF64
:
6553 case SystemZ::SelectF128
:
6554 case SystemZ::SelectVR32
:
6555 case SystemZ::SelectVR64
:
6556 case SystemZ::SelectVR128
:
6564 // Helper function, which inserts PHI functions into SinkMBB:
6565 // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
6566 // where %FalseValue(i) and %TrueValue(i) are taken from Selects.
6567 static void createPHIsForSelects(SmallVector
<MachineInstr
*, 8> &Selects
,
6568 MachineBasicBlock
*TrueMBB
,
6569 MachineBasicBlock
*FalseMBB
,
6570 MachineBasicBlock
*SinkMBB
) {
6571 MachineFunction
*MF
= TrueMBB
->getParent();
6572 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
6574 MachineInstr
*FirstMI
= Selects
.front();
6575 unsigned CCValid
= FirstMI
->getOperand(3).getImm();
6576 unsigned CCMask
= FirstMI
->getOperand(4).getImm();
6578 MachineBasicBlock::iterator SinkInsertionPoint
= SinkMBB
->begin();
6580 // As we are creating the PHIs, we have to be careful if there is more than
6581 // one. Later Selects may reference the results of earlier Selects, but later
6582 // PHIs have to reference the individual true/false inputs from earlier PHIs.
6583 // That also means that PHI construction must work forward from earlier to
6584 // later, and that the code must maintain a mapping from earlier PHI's
6585 // destination registers, and the registers that went into the PHI.
6586 DenseMap
<unsigned, std::pair
<unsigned, unsigned>> RegRewriteTable
;
6588 for (auto MI
: Selects
) {
6589 Register DestReg
= MI
->getOperand(0).getReg();
6590 Register TrueReg
= MI
->getOperand(1).getReg();
6591 Register FalseReg
= MI
->getOperand(2).getReg();
6593 // If this Select we are generating is the opposite condition from
6594 // the jump we generated, then we have to swap the operands for the
6595 // PHI that is going to be generated.
6596 if (MI
->getOperand(4).getImm() == (CCValid
^ CCMask
))
6597 std::swap(TrueReg
, FalseReg
);
6599 if (RegRewriteTable
.find(TrueReg
) != RegRewriteTable
.end())
6600 TrueReg
= RegRewriteTable
[TrueReg
].first
;
6602 if (RegRewriteTable
.find(FalseReg
) != RegRewriteTable
.end())
6603 FalseReg
= RegRewriteTable
[FalseReg
].second
;
6605 DebugLoc DL
= MI
->getDebugLoc();
6606 BuildMI(*SinkMBB
, SinkInsertionPoint
, DL
, TII
->get(SystemZ::PHI
), DestReg
)
6607 .addReg(TrueReg
).addMBB(TrueMBB
)
6608 .addReg(FalseReg
).addMBB(FalseMBB
);
6610 // Add this PHI to the rewrite table.
6611 RegRewriteTable
[DestReg
] = std::make_pair(TrueReg
, FalseReg
);
6614 MF
->getProperties().reset(MachineFunctionProperties::Property::NoPHIs
);
6617 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
6619 SystemZTargetLowering::emitSelect(MachineInstr
&MI
,
6620 MachineBasicBlock
*MBB
) const {
6621 assert(isSelectPseudo(MI
) && "Bad call to emitSelect()");
6622 const SystemZInstrInfo
*TII
=
6623 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6625 unsigned CCValid
= MI
.getOperand(3).getImm();
6626 unsigned CCMask
= MI
.getOperand(4).getImm();
6628 // If we have a sequence of Select* pseudo instructions using the
6629 // same condition code value, we want to expand all of them into
6630 // a single pair of basic blocks using the same condition.
6631 SmallVector
<MachineInstr
*, 8> Selects
;
6632 SmallVector
<MachineInstr
*, 8> DbgValues
;
6633 Selects
.push_back(&MI
);
6635 for (MachineBasicBlock::iterator NextMIIt
=
6636 std::next(MachineBasicBlock::iterator(MI
));
6637 NextMIIt
!= MBB
->end(); ++NextMIIt
) {
6638 if (NextMIIt
->definesRegister(SystemZ::CC
))
6640 if (isSelectPseudo(*NextMIIt
)) {
6641 assert(NextMIIt
->getOperand(3).getImm() == CCValid
&&
6642 "Bad CCValid operands since CC was not redefined.");
6643 if (NextMIIt
->getOperand(4).getImm() == CCMask
||
6644 NextMIIt
->getOperand(4).getImm() == (CCValid
^ CCMask
)) {
6645 Selects
.push_back(&*NextMIIt
);
6651 for (auto SelMI
: Selects
)
6652 if (NextMIIt
->readsVirtualRegister(SelMI
->getOperand(0).getReg())) {
6656 if (NextMIIt
->isDebugInstr()) {
6658 assert(NextMIIt
->isDebugValue() && "Unhandled debug opcode.");
6659 DbgValues
.push_back(&*NextMIIt
);
6662 else if (User
|| ++Count
> 20)
6666 MachineInstr
*LastMI
= Selects
.back();
6668 (LastMI
->killsRegister(SystemZ::CC
) || checkCCKill(*LastMI
, MBB
));
6669 MachineBasicBlock
*StartMBB
= MBB
;
6670 MachineBasicBlock
*JoinMBB
= splitBlockAfter(LastMI
, MBB
);
6671 MachineBasicBlock
*FalseMBB
= emitBlockAfter(StartMBB
);
6673 // Unless CC was killed in the last Select instruction, mark it as
6674 // live-in to both FalseMBB and JoinMBB.
6676 FalseMBB
->addLiveIn(SystemZ::CC
);
6677 JoinMBB
->addLiveIn(SystemZ::CC
);
6681 // BRC CCMask, JoinMBB
6682 // # fallthrough to FalseMBB
6684 BuildMI(MBB
, MI
.getDebugLoc(), TII
->get(SystemZ::BRC
))
6685 .addImm(CCValid
).addImm(CCMask
).addMBB(JoinMBB
);
6686 MBB
->addSuccessor(JoinMBB
);
6687 MBB
->addSuccessor(FalseMBB
);
6690 // # fallthrough to JoinMBB
6692 MBB
->addSuccessor(JoinMBB
);
6695 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
6698 createPHIsForSelects(Selects
, StartMBB
, FalseMBB
, MBB
);
6699 for (auto SelMI
: Selects
)
6700 SelMI
->eraseFromParent();
6702 MachineBasicBlock::iterator InsertPos
= MBB
->getFirstNonPHI();
6703 for (auto DbgMI
: DbgValues
)
6704 MBB
->splice(InsertPos
, StartMBB
, DbgMI
);
6709 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
6710 // StoreOpcode is the store to use and Invert says whether the store should
6711 // happen when the condition is false rather than true. If a STORE ON
6712 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
6713 MachineBasicBlock
*SystemZTargetLowering::emitCondStore(MachineInstr
&MI
,
6714 MachineBasicBlock
*MBB
,
6715 unsigned StoreOpcode
,
6716 unsigned STOCOpcode
,
6717 bool Invert
) const {
6718 const SystemZInstrInfo
*TII
=
6719 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6721 Register SrcReg
= MI
.getOperand(0).getReg();
6722 MachineOperand Base
= MI
.getOperand(1);
6723 int64_t Disp
= MI
.getOperand(2).getImm();
6724 Register IndexReg
= MI
.getOperand(3).getReg();
6725 unsigned CCValid
= MI
.getOperand(4).getImm();
6726 unsigned CCMask
= MI
.getOperand(5).getImm();
6727 DebugLoc DL
= MI
.getDebugLoc();
6729 StoreOpcode
= TII
->getOpcodeForOffset(StoreOpcode
, Disp
);
6731 // Use STOCOpcode if possible. We could use different store patterns in
6732 // order to avoid matching the index register, but the performance trade-offs
6733 // might be more complicated in that case.
6734 if (STOCOpcode
&& !IndexReg
&& Subtarget
.hasLoadStoreOnCond()) {
6738 // ISel pattern matching also adds a load memory operand of the same
6739 // address, so take special care to find the storing memory operand.
6740 MachineMemOperand
*MMO
= nullptr;
6741 for (auto *I
: MI
.memoperands())
6747 BuildMI(*MBB
, MI
, DL
, TII
->get(STOCOpcode
))
6753 .addMemOperand(MMO
);
6755 MI
.eraseFromParent();
6759 // Get the condition needed to branch around the store.
6763 MachineBasicBlock
*StartMBB
= MBB
;
6764 MachineBasicBlock
*JoinMBB
= splitBlockBefore(MI
, MBB
);
6765 MachineBasicBlock
*FalseMBB
= emitBlockAfter(StartMBB
);
6767 // Unless CC was killed in the CondStore instruction, mark it as
6768 // live-in to both FalseMBB and JoinMBB.
6769 if (!MI
.killsRegister(SystemZ::CC
) && !checkCCKill(MI
, JoinMBB
)) {
6770 FalseMBB
->addLiveIn(SystemZ::CC
);
6771 JoinMBB
->addLiveIn(SystemZ::CC
);
6775 // BRC CCMask, JoinMBB
6776 // # fallthrough to FalseMBB
6778 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6779 .addImm(CCValid
).addImm(CCMask
).addMBB(JoinMBB
);
6780 MBB
->addSuccessor(JoinMBB
);
6781 MBB
->addSuccessor(FalseMBB
);
6784 // store %SrcReg, %Disp(%Index,%Base)
6785 // # fallthrough to JoinMBB
6787 BuildMI(MBB
, DL
, TII
->get(StoreOpcode
))
6792 MBB
->addSuccessor(JoinMBB
);
6794 MI
.eraseFromParent();
6798 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
6799 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
6800 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
6801 // BitSize is the width of the field in bits, or 0 if this is a partword
6802 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
6803 // is one of the operands. Invert says whether the field should be
6804 // inverted after performing BinOpcode (e.g. for NAND).
6805 MachineBasicBlock
*SystemZTargetLowering::emitAtomicLoadBinary(
6806 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned BinOpcode
,
6807 unsigned BitSize
, bool Invert
) const {
6808 MachineFunction
&MF
= *MBB
->getParent();
6809 const SystemZInstrInfo
*TII
=
6810 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6811 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6812 bool IsSubWord
= (BitSize
< 32);
6814 // Extract the operands. Base can be a register or a frame index.
6815 // Src2 can be a register or immediate.
6816 Register Dest
= MI
.getOperand(0).getReg();
6817 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
6818 int64_t Disp
= MI
.getOperand(2).getImm();
6819 MachineOperand Src2
= earlyUseOperand(MI
.getOperand(3));
6820 Register BitShift
= IsSubWord
? MI
.getOperand(4).getReg() : Register();
6821 Register NegBitShift
= IsSubWord
? MI
.getOperand(5).getReg() : Register();
6822 DebugLoc DL
= MI
.getDebugLoc();
6824 BitSize
= MI
.getOperand(6).getImm();
6826 // Subword operations use 32-bit registers.
6827 const TargetRegisterClass
*RC
= (BitSize
<= 32 ?
6828 &SystemZ::GR32BitRegClass
:
6829 &SystemZ::GR64BitRegClass
);
6830 unsigned LOpcode
= BitSize
<= 32 ? SystemZ::L
: SystemZ::LG
;
6831 unsigned CSOpcode
= BitSize
<= 32 ? SystemZ::CS
: SystemZ::CSG
;
6833 // Get the right opcodes for the displacement.
6834 LOpcode
= TII
->getOpcodeForOffset(LOpcode
, Disp
);
6835 CSOpcode
= TII
->getOpcodeForOffset(CSOpcode
, Disp
);
6836 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
6838 // Create virtual registers for temporary results.
6839 Register OrigVal
= MRI
.createVirtualRegister(RC
);
6840 Register OldVal
= MRI
.createVirtualRegister(RC
);
6841 Register NewVal
= (BinOpcode
|| IsSubWord
?
6842 MRI
.createVirtualRegister(RC
) : Src2
.getReg());
6843 Register RotatedOldVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : OldVal
);
6844 Register RotatedNewVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : NewVal
);
6846 // Insert a basic block for the main loop.
6847 MachineBasicBlock
*StartMBB
= MBB
;
6848 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
6849 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
6853 // %OrigVal = L Disp(%Base)
6854 // # fall through to LoopMMB
6856 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigVal
).add(Base
).addImm(Disp
).addReg(0);
6857 MBB
->addSuccessor(LoopMBB
);
6860 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
6861 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
6862 // %RotatedNewVal = OP %RotatedOldVal, %Src2
6863 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
6864 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
6866 // # fall through to DoneMMB
6868 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
6869 .addReg(OrigVal
).addMBB(StartMBB
)
6870 .addReg(Dest
).addMBB(LoopMBB
);
6872 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), RotatedOldVal
)
6873 .addReg(OldVal
).addReg(BitShift
).addImm(0);
6875 // Perform the operation normally and then invert every bit of the field.
6876 Register Tmp
= MRI
.createVirtualRegister(RC
);
6877 BuildMI(MBB
, DL
, TII
->get(BinOpcode
), Tmp
).addReg(RotatedOldVal
).add(Src2
);
6879 // XILF with the upper BitSize bits set.
6880 BuildMI(MBB
, DL
, TII
->get(SystemZ::XILF
), RotatedNewVal
)
6881 .addReg(Tmp
).addImm(-1U << (32 - BitSize
));
6883 // Use LCGR and add -1 to the result, which is more compact than
6884 // an XILF, XILH pair.
6885 Register Tmp2
= MRI
.createVirtualRegister(RC
);
6886 BuildMI(MBB
, DL
, TII
->get(SystemZ::LCGR
), Tmp2
).addReg(Tmp
);
6887 BuildMI(MBB
, DL
, TII
->get(SystemZ::AGHI
), RotatedNewVal
)
6888 .addReg(Tmp2
).addImm(-1);
6890 } else if (BinOpcode
)
6891 // A simply binary operation.
6892 BuildMI(MBB
, DL
, TII
->get(BinOpcode
), RotatedNewVal
)
6893 .addReg(RotatedOldVal
)
6896 // Use RISBG to rotate Src2 into position and use it to replace the
6897 // field in RotatedOldVal.
6898 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RotatedNewVal
)
6899 .addReg(RotatedOldVal
).addReg(Src2
.getReg())
6900 .addImm(32).addImm(31 + BitSize
).addImm(32 - BitSize
);
6902 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), NewVal
)
6903 .addReg(RotatedNewVal
).addReg(NegBitShift
).addImm(0);
6904 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), Dest
)
6909 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6910 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
6911 MBB
->addSuccessor(LoopMBB
);
6912 MBB
->addSuccessor(DoneMBB
);
6914 MI
.eraseFromParent();
6918 // Implement EmitInstrWithCustomInserter for pseudo
6919 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
6920 // instruction that should be used to compare the current field with the
6921 // minimum or maximum value. KeepOldMask is the BRC condition-code mask
6922 // for when the current field should be kept. BitSize is the width of
6923 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
6924 MachineBasicBlock
*SystemZTargetLowering::emitAtomicLoadMinMax(
6925 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned CompareOpcode
,
6926 unsigned KeepOldMask
, unsigned BitSize
) const {
6927 MachineFunction
&MF
= *MBB
->getParent();
6928 const SystemZInstrInfo
*TII
=
6929 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6930 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6931 bool IsSubWord
= (BitSize
< 32);
6933 // Extract the operands. Base can be a register or a frame index.
6934 Register Dest
= MI
.getOperand(0).getReg();
6935 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
6936 int64_t Disp
= MI
.getOperand(2).getImm();
6937 Register Src2
= MI
.getOperand(3).getReg();
6938 Register BitShift
= (IsSubWord
? MI
.getOperand(4).getReg() : Register());
6939 Register NegBitShift
= (IsSubWord
? MI
.getOperand(5).getReg() : Register());
6940 DebugLoc DL
= MI
.getDebugLoc();
6942 BitSize
= MI
.getOperand(6).getImm();
6944 // Subword operations use 32-bit registers.
6945 const TargetRegisterClass
*RC
= (BitSize
<= 32 ?
6946 &SystemZ::GR32BitRegClass
:
6947 &SystemZ::GR64BitRegClass
);
6948 unsigned LOpcode
= BitSize
<= 32 ? SystemZ::L
: SystemZ::LG
;
6949 unsigned CSOpcode
= BitSize
<= 32 ? SystemZ::CS
: SystemZ::CSG
;
6951 // Get the right opcodes for the displacement.
6952 LOpcode
= TII
->getOpcodeForOffset(LOpcode
, Disp
);
6953 CSOpcode
= TII
->getOpcodeForOffset(CSOpcode
, Disp
);
6954 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
6956 // Create virtual registers for temporary results.
6957 Register OrigVal
= MRI
.createVirtualRegister(RC
);
6958 Register OldVal
= MRI
.createVirtualRegister(RC
);
6959 Register NewVal
= MRI
.createVirtualRegister(RC
);
6960 Register RotatedOldVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : OldVal
);
6961 Register RotatedAltVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : Src2
);
6962 Register RotatedNewVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : NewVal
);
6964 // Insert 3 basic blocks for the loop.
6965 MachineBasicBlock
*StartMBB
= MBB
;
6966 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
6967 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
6968 MachineBasicBlock
*UseAltMBB
= emitBlockAfter(LoopMBB
);
6969 MachineBasicBlock
*UpdateMBB
= emitBlockAfter(UseAltMBB
);
6973 // %OrigVal = L Disp(%Base)
6974 // # fall through to LoopMMB
6976 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigVal
).add(Base
).addImm(Disp
).addReg(0);
6977 MBB
->addSuccessor(LoopMBB
);
6980 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
6981 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
6982 // CompareOpcode %RotatedOldVal, %Src2
6983 // BRC KeepOldMask, UpdateMBB
6985 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
6986 .addReg(OrigVal
).addMBB(StartMBB
)
6987 .addReg(Dest
).addMBB(UpdateMBB
);
6989 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), RotatedOldVal
)
6990 .addReg(OldVal
).addReg(BitShift
).addImm(0);
6991 BuildMI(MBB
, DL
, TII
->get(CompareOpcode
))
6992 .addReg(RotatedOldVal
).addReg(Src2
);
6993 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6994 .addImm(SystemZ::CCMASK_ICMP
).addImm(KeepOldMask
).addMBB(UpdateMBB
);
6995 MBB
->addSuccessor(UpdateMBB
);
6996 MBB
->addSuccessor(UseAltMBB
);
6999 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
7000 // # fall through to UpdateMMB
7003 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RotatedAltVal
)
7004 .addReg(RotatedOldVal
).addReg(Src2
)
7005 .addImm(32).addImm(31 + BitSize
).addImm(0);
7006 MBB
->addSuccessor(UpdateMBB
);
7009 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
7010 // [ %RotatedAltVal, UseAltMBB ]
7011 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
7012 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
7014 // # fall through to DoneMMB
7016 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), RotatedNewVal
)
7017 .addReg(RotatedOldVal
).addMBB(LoopMBB
)
7018 .addReg(RotatedAltVal
).addMBB(UseAltMBB
);
7020 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), NewVal
)
7021 .addReg(RotatedNewVal
).addReg(NegBitShift
).addImm(0);
7022 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), Dest
)
7027 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7028 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
7029 MBB
->addSuccessor(LoopMBB
);
7030 MBB
->addSuccessor(DoneMBB
);
7032 MI
.eraseFromParent();
7036 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
7039 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr
&MI
,
7040 MachineBasicBlock
*MBB
) const {
7042 MachineFunction
&MF
= *MBB
->getParent();
7043 const SystemZInstrInfo
*TII
=
7044 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7045 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7047 // Extract the operands. Base can be a register or a frame index.
7048 Register Dest
= MI
.getOperand(0).getReg();
7049 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
7050 int64_t Disp
= MI
.getOperand(2).getImm();
7051 Register OrigCmpVal
= MI
.getOperand(3).getReg();
7052 Register OrigSwapVal
= MI
.getOperand(4).getReg();
7053 Register BitShift
= MI
.getOperand(5).getReg();
7054 Register NegBitShift
= MI
.getOperand(6).getReg();
7055 int64_t BitSize
= MI
.getOperand(7).getImm();
7056 DebugLoc DL
= MI
.getDebugLoc();
7058 const TargetRegisterClass
*RC
= &SystemZ::GR32BitRegClass
;
7060 // Get the right opcodes for the displacement.
7061 unsigned LOpcode
= TII
->getOpcodeForOffset(SystemZ::L
, Disp
);
7062 unsigned CSOpcode
= TII
->getOpcodeForOffset(SystemZ::CS
, Disp
);
7063 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
7065 // Create virtual registers for temporary results.
7066 Register OrigOldVal
= MRI
.createVirtualRegister(RC
);
7067 Register OldVal
= MRI
.createVirtualRegister(RC
);
7068 Register CmpVal
= MRI
.createVirtualRegister(RC
);
7069 Register SwapVal
= MRI
.createVirtualRegister(RC
);
7070 Register StoreVal
= MRI
.createVirtualRegister(RC
);
7071 Register RetryOldVal
= MRI
.createVirtualRegister(RC
);
7072 Register RetryCmpVal
= MRI
.createVirtualRegister(RC
);
7073 Register RetrySwapVal
= MRI
.createVirtualRegister(RC
);
7075 // Insert 2 basic blocks for the loop.
7076 MachineBasicBlock
*StartMBB
= MBB
;
7077 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7078 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7079 MachineBasicBlock
*SetMBB
= emitBlockAfter(LoopMBB
);
7083 // %OrigOldVal = L Disp(%Base)
7084 // # fall through to LoopMMB
7086 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigOldVal
)
7090 MBB
->addSuccessor(LoopMBB
);
7093 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
7094 // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
7095 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
7096 // %Dest = RLL %OldVal, BitSize(%BitShift)
7097 // ^^ The low BitSize bits contain the field
7099 // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
7100 // ^^ Replace the upper 32-BitSize bits of the
7101 // comparison value with those that we loaded,
7102 // so that we can use a full word comparison.
7103 // CR %Dest, %RetryCmpVal
7105 // # Fall through to SetMBB
7107 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
7108 .addReg(OrigOldVal
).addMBB(StartMBB
)
7109 .addReg(RetryOldVal
).addMBB(SetMBB
);
7110 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), CmpVal
)
7111 .addReg(OrigCmpVal
).addMBB(StartMBB
)
7112 .addReg(RetryCmpVal
).addMBB(SetMBB
);
7113 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), SwapVal
)
7114 .addReg(OrigSwapVal
).addMBB(StartMBB
)
7115 .addReg(RetrySwapVal
).addMBB(SetMBB
);
7116 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), Dest
)
7117 .addReg(OldVal
).addReg(BitShift
).addImm(BitSize
);
7118 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RetryCmpVal
)
7119 .addReg(CmpVal
).addReg(Dest
).addImm(32).addImm(63 - BitSize
).addImm(0);
7120 BuildMI(MBB
, DL
, TII
->get(SystemZ::CR
))
7121 .addReg(Dest
).addReg(RetryCmpVal
);
7122 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7123 .addImm(SystemZ::CCMASK_ICMP
)
7124 .addImm(SystemZ::CCMASK_CMP_NE
).addMBB(DoneMBB
);
7125 MBB
->addSuccessor(DoneMBB
);
7126 MBB
->addSuccessor(SetMBB
);
7129 // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
7130 // ^^ Replace the upper 32-BitSize bits of the new
7131 // value with those that we loaded.
7132 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
7133 // ^^ Rotate the new field to its proper position.
7134 // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
7136 // # fall through to ExitMMB
7138 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RetrySwapVal
)
7139 .addReg(SwapVal
).addReg(Dest
).addImm(32).addImm(63 - BitSize
).addImm(0);
7140 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), StoreVal
)
7141 .addReg(RetrySwapVal
).addReg(NegBitShift
).addImm(-BitSize
);
7142 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), RetryOldVal
)
7147 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7148 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
7149 MBB
->addSuccessor(LoopMBB
);
7150 MBB
->addSuccessor(DoneMBB
);
7152 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
7153 // to the block after the loop. At this point, CC may have been defined
7154 // either by the CR in LoopMBB or by the CS in SetMBB.
7155 if (!MI
.registerDefIsDead(SystemZ::CC
))
7156 DoneMBB
->addLiveIn(SystemZ::CC
);
7158 MI
.eraseFromParent();
7162 // Emit a move from two GR64s to a GR128.
7164 SystemZTargetLowering::emitPair128(MachineInstr
&MI
,
7165 MachineBasicBlock
*MBB
) const {
7166 MachineFunction
&MF
= *MBB
->getParent();
7167 const SystemZInstrInfo
*TII
=
7168 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7169 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7170 DebugLoc DL
= MI
.getDebugLoc();
7172 Register Dest
= MI
.getOperand(0).getReg();
7173 Register Hi
= MI
.getOperand(1).getReg();
7174 Register Lo
= MI
.getOperand(2).getReg();
7175 Register Tmp1
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7176 Register Tmp2
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7178 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::IMPLICIT_DEF
), Tmp1
);
7179 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Tmp2
)
7180 .addReg(Tmp1
).addReg(Hi
).addImm(SystemZ::subreg_h64
);
7181 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Dest
)
7182 .addReg(Tmp2
).addReg(Lo
).addImm(SystemZ::subreg_l64
);
7184 MI
.eraseFromParent();
7188 // Emit an extension from a GR64 to a GR128. ClearEven is true
7189 // if the high register of the GR128 value must be cleared or false if
7190 // it's "don't care".
7191 MachineBasicBlock
*SystemZTargetLowering::emitExt128(MachineInstr
&MI
,
7192 MachineBasicBlock
*MBB
,
7193 bool ClearEven
) const {
7194 MachineFunction
&MF
= *MBB
->getParent();
7195 const SystemZInstrInfo
*TII
=
7196 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7197 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7198 DebugLoc DL
= MI
.getDebugLoc();
7200 Register Dest
= MI
.getOperand(0).getReg();
7201 Register Src
= MI
.getOperand(1).getReg();
7202 Register In128
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7204 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::IMPLICIT_DEF
), In128
);
7206 Register NewIn128
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7207 Register Zero64
= MRI
.createVirtualRegister(&SystemZ::GR64BitRegClass
);
7209 BuildMI(*MBB
, MI
, DL
, TII
->get(SystemZ::LLILL
), Zero64
)
7211 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), NewIn128
)
7212 .addReg(In128
).addReg(Zero64
).addImm(SystemZ::subreg_h64
);
7215 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Dest
)
7216 .addReg(In128
).addReg(Src
).addImm(SystemZ::subreg_l64
);
7218 MI
.eraseFromParent();
7222 MachineBasicBlock
*SystemZTargetLowering::emitMemMemWrapper(
7223 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
7224 MachineFunction
&MF
= *MBB
->getParent();
7225 const SystemZInstrInfo
*TII
=
7226 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7227 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7228 DebugLoc DL
= MI
.getDebugLoc();
7230 MachineOperand DestBase
= earlyUseOperand(MI
.getOperand(0));
7231 uint64_t DestDisp
= MI
.getOperand(1).getImm();
7232 MachineOperand SrcBase
= earlyUseOperand(MI
.getOperand(2));
7233 uint64_t SrcDisp
= MI
.getOperand(3).getImm();
7234 uint64_t Length
= MI
.getOperand(4).getImm();
7236 // When generating more than one CLC, all but the last will need to
7237 // branch to the end when a difference is found.
7238 MachineBasicBlock
*EndMBB
= (Length
> 256 && Opcode
== SystemZ::CLC
?
7239 splitBlockAfter(MI
, MBB
) : nullptr);
7241 // Check for the loop form, in which operand 5 is the trip count.
7242 if (MI
.getNumExplicitOperands() > 5) {
7243 bool HaveSingleBase
= DestBase
.isIdenticalTo(SrcBase
);
7245 Register StartCountReg
= MI
.getOperand(5).getReg();
7246 Register StartSrcReg
= forceReg(MI
, SrcBase
, TII
);
7247 Register StartDestReg
= (HaveSingleBase
? StartSrcReg
:
7248 forceReg(MI
, DestBase
, TII
));
7250 const TargetRegisterClass
*RC
= &SystemZ::ADDR64BitRegClass
;
7251 Register ThisSrcReg
= MRI
.createVirtualRegister(RC
);
7252 Register ThisDestReg
= (HaveSingleBase
? ThisSrcReg
:
7253 MRI
.createVirtualRegister(RC
));
7254 Register NextSrcReg
= MRI
.createVirtualRegister(RC
);
7255 Register NextDestReg
= (HaveSingleBase
? NextSrcReg
:
7256 MRI
.createVirtualRegister(RC
));
7258 RC
= &SystemZ::GR64BitRegClass
;
7259 Register ThisCountReg
= MRI
.createVirtualRegister(RC
);
7260 Register NextCountReg
= MRI
.createVirtualRegister(RC
);
7262 MachineBasicBlock
*StartMBB
= MBB
;
7263 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7264 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7265 MachineBasicBlock
*NextMBB
= (EndMBB
? emitBlockAfter(LoopMBB
) : LoopMBB
);
7268 // # fall through to LoopMMB
7269 MBB
->addSuccessor(LoopMBB
);
7272 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
7273 // [ %NextDestReg, NextMBB ]
7274 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
7275 // [ %NextSrcReg, NextMBB ]
7276 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
7277 // [ %NextCountReg, NextMBB ]
7278 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
7279 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
7282 // The prefetch is used only for MVC. The JLH is used only for CLC.
7285 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisDestReg
)
7286 .addReg(StartDestReg
).addMBB(StartMBB
)
7287 .addReg(NextDestReg
).addMBB(NextMBB
);
7288 if (!HaveSingleBase
)
7289 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisSrcReg
)
7290 .addReg(StartSrcReg
).addMBB(StartMBB
)
7291 .addReg(NextSrcReg
).addMBB(NextMBB
);
7292 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisCountReg
)
7293 .addReg(StartCountReg
).addMBB(StartMBB
)
7294 .addReg(NextCountReg
).addMBB(NextMBB
);
7295 if (Opcode
== SystemZ::MVC
)
7296 BuildMI(MBB
, DL
, TII
->get(SystemZ::PFD
))
7297 .addImm(SystemZ::PFD_WRITE
)
7298 .addReg(ThisDestReg
).addImm(DestDisp
+ 768).addReg(0);
7299 BuildMI(MBB
, DL
, TII
->get(Opcode
))
7300 .addReg(ThisDestReg
).addImm(DestDisp
).addImm(256)
7301 .addReg(ThisSrcReg
).addImm(SrcDisp
);
7303 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7304 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
7306 MBB
->addSuccessor(EndMBB
);
7307 MBB
->addSuccessor(NextMBB
);
7311 // %NextDestReg = LA 256(%ThisDestReg)
7312 // %NextSrcReg = LA 256(%ThisSrcReg)
7313 // %NextCountReg = AGHI %ThisCountReg, -1
7314 // CGHI %NextCountReg, 0
7316 // # fall through to DoneMMB
7318 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
7321 BuildMI(MBB
, DL
, TII
->get(SystemZ::LA
), NextDestReg
)
7322 .addReg(ThisDestReg
).addImm(256).addReg(0);
7323 if (!HaveSingleBase
)
7324 BuildMI(MBB
, DL
, TII
->get(SystemZ::LA
), NextSrcReg
)
7325 .addReg(ThisSrcReg
).addImm(256).addReg(0);
7326 BuildMI(MBB
, DL
, TII
->get(SystemZ::AGHI
), NextCountReg
)
7327 .addReg(ThisCountReg
).addImm(-1);
7328 BuildMI(MBB
, DL
, TII
->get(SystemZ::CGHI
))
7329 .addReg(NextCountReg
).addImm(0);
7330 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7331 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
7333 MBB
->addSuccessor(LoopMBB
);
7334 MBB
->addSuccessor(DoneMBB
);
7336 DestBase
= MachineOperand::CreateReg(NextDestReg
, false);
7337 SrcBase
= MachineOperand::CreateReg(NextSrcReg
, false);
7339 if (EndMBB
&& !Length
)
7340 // If the loop handled the whole CLC range, DoneMBB will be empty with
7341 // CC live-through into EndMBB, so add it as live-in.
7342 DoneMBB
->addLiveIn(SystemZ::CC
);
7345 // Handle any remaining bytes with straight-line code.
7346 while (Length
> 0) {
7347 uint64_t ThisLength
= std::min(Length
, uint64_t(256));
7348 // The previous iteration might have created out-of-range displacements.
7349 // Apply them using LAY if so.
7350 if (!isUInt
<12>(DestDisp
)) {
7351 Register Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
7352 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LAY
), Reg
)
7356 DestBase
= MachineOperand::CreateReg(Reg
, false);
7359 if (!isUInt
<12>(SrcDisp
)) {
7360 Register Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
7361 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LAY
), Reg
)
7365 SrcBase
= MachineOperand::CreateReg(Reg
, false);
7368 BuildMI(*MBB
, MI
, DL
, TII
->get(Opcode
))
7374 .setMemRefs(MI
.memoperands());
7375 DestDisp
+= ThisLength
;
7376 SrcDisp
+= ThisLength
;
7377 Length
-= ThisLength
;
7378 // If there's another CLC to go, branch to the end if a difference
7380 if (EndMBB
&& Length
> 0) {
7381 MachineBasicBlock
*NextMBB
= splitBlockBefore(MI
, MBB
);
7382 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7383 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
7385 MBB
->addSuccessor(EndMBB
);
7386 MBB
->addSuccessor(NextMBB
);
7391 MBB
->addSuccessor(EndMBB
);
7393 MBB
->addLiveIn(SystemZ::CC
);
7396 MI
.eraseFromParent();
7400 // Decompose string pseudo-instruction MI into a loop that continually performs
7401 // Opcode until CC != 3.
7402 MachineBasicBlock
*SystemZTargetLowering::emitStringWrapper(
7403 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
7404 MachineFunction
&MF
= *MBB
->getParent();
7405 const SystemZInstrInfo
*TII
=
7406 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7407 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7408 DebugLoc DL
= MI
.getDebugLoc();
7410 uint64_t End1Reg
= MI
.getOperand(0).getReg();
7411 uint64_t Start1Reg
= MI
.getOperand(1).getReg();
7412 uint64_t Start2Reg
= MI
.getOperand(2).getReg();
7413 uint64_t CharReg
= MI
.getOperand(3).getReg();
7415 const TargetRegisterClass
*RC
= &SystemZ::GR64BitRegClass
;
7416 uint64_t This1Reg
= MRI
.createVirtualRegister(RC
);
7417 uint64_t This2Reg
= MRI
.createVirtualRegister(RC
);
7418 uint64_t End2Reg
= MRI
.createVirtualRegister(RC
);
7420 MachineBasicBlock
*StartMBB
= MBB
;
7421 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7422 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7425 // # fall through to LoopMMB
7426 MBB
->addSuccessor(LoopMBB
);
7429 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
7430 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
7432 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
7434 // # fall through to DoneMMB
7436 // The load of R0L can be hoisted by post-RA LICM.
7439 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), This1Reg
)
7440 .addReg(Start1Reg
).addMBB(StartMBB
)
7441 .addReg(End1Reg
).addMBB(LoopMBB
);
7442 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), This2Reg
)
7443 .addReg(Start2Reg
).addMBB(StartMBB
)
7444 .addReg(End2Reg
).addMBB(LoopMBB
);
7445 BuildMI(MBB
, DL
, TII
->get(TargetOpcode::COPY
), SystemZ::R0L
).addReg(CharReg
);
7446 BuildMI(MBB
, DL
, TII
->get(Opcode
))
7447 .addReg(End1Reg
, RegState::Define
).addReg(End2Reg
, RegState::Define
)
7448 .addReg(This1Reg
).addReg(This2Reg
);
7449 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7450 .addImm(SystemZ::CCMASK_ANY
).addImm(SystemZ::CCMASK_3
).addMBB(LoopMBB
);
7451 MBB
->addSuccessor(LoopMBB
);
7452 MBB
->addSuccessor(DoneMBB
);
7454 DoneMBB
->addLiveIn(SystemZ::CC
);
7456 MI
.eraseFromParent();
7460 // Update TBEGIN instruction with final opcode and register clobbers.
7461 MachineBasicBlock
*SystemZTargetLowering::emitTransactionBegin(
7462 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
,
7463 bool NoFloat
) const {
7464 MachineFunction
&MF
= *MBB
->getParent();
7465 const TargetFrameLowering
*TFI
= Subtarget
.getFrameLowering();
7466 const SystemZInstrInfo
*TII
= Subtarget
.getInstrInfo();
7469 MI
.setDesc(TII
->get(Opcode
));
7471 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
7472 // Make sure to add the corresponding GRSM bits if they are missing.
7473 uint64_t Control
= MI
.getOperand(2).getImm();
7474 static const unsigned GPRControlBit
[16] = {
7475 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
7476 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
7478 Control
|= GPRControlBit
[15];
7480 Control
|= GPRControlBit
[11];
7481 MI
.getOperand(2).setImm(Control
);
7483 // Add GPR clobbers.
7484 for (int I
= 0; I
< 16; I
++) {
7485 if ((Control
& GPRControlBit
[I
]) == 0) {
7486 unsigned Reg
= SystemZMC::GR64Regs
[I
];
7487 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7491 // Add FPR/VR clobbers.
7492 if (!NoFloat
&& (Control
& 4) != 0) {
7493 if (Subtarget
.hasVector()) {
7494 for (int I
= 0; I
< 32; I
++) {
7495 unsigned Reg
= SystemZMC::VR128Regs
[I
];
7496 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7499 for (int I
= 0; I
< 16; I
++) {
7500 unsigned Reg
= SystemZMC::FP64Regs
[I
];
7501 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7509 MachineBasicBlock
*SystemZTargetLowering::emitLoadAndTestCmp0(
7510 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
7511 MachineFunction
&MF
= *MBB
->getParent();
7512 MachineRegisterInfo
*MRI
= &MF
.getRegInfo();
7513 const SystemZInstrInfo
*TII
=
7514 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7515 DebugLoc DL
= MI
.getDebugLoc();
7517 Register SrcReg
= MI
.getOperand(0).getReg();
7519 // Create new virtual register of the same class as source.
7520 const TargetRegisterClass
*RC
= MRI
->getRegClass(SrcReg
);
7521 Register DstReg
= MRI
->createVirtualRegister(RC
);
7523 // Replace pseudo with a normal load-and-test that models the def as
7525 BuildMI(*MBB
, MI
, DL
, TII
->get(Opcode
), DstReg
)
7527 MI
.eraseFromParent();
7532 MachineBasicBlock
*SystemZTargetLowering::EmitInstrWithCustomInserter(
7533 MachineInstr
&MI
, MachineBasicBlock
*MBB
) const {
7534 switch (MI
.getOpcode()) {
7535 case SystemZ::Select32
:
7536 case SystemZ::Select64
:
7537 case SystemZ::SelectF32
:
7538 case SystemZ::SelectF64
:
7539 case SystemZ::SelectF128
:
7540 case SystemZ::SelectVR32
:
7541 case SystemZ::SelectVR64
:
7542 case SystemZ::SelectVR128
:
7543 return emitSelect(MI
, MBB
);
7545 case SystemZ::CondStore8Mux
:
7546 return emitCondStore(MI
, MBB
, SystemZ::STCMux
, 0, false);
7547 case SystemZ::CondStore8MuxInv
:
7548 return emitCondStore(MI
, MBB
, SystemZ::STCMux
, 0, true);
7549 case SystemZ::CondStore16Mux
:
7550 return emitCondStore(MI
, MBB
, SystemZ::STHMux
, 0, false);
7551 case SystemZ::CondStore16MuxInv
:
7552 return emitCondStore(MI
, MBB
, SystemZ::STHMux
, 0, true);
7553 case SystemZ::CondStore32Mux
:
7554 return emitCondStore(MI
, MBB
, SystemZ::STMux
, SystemZ::STOCMux
, false);
7555 case SystemZ::CondStore32MuxInv
:
7556 return emitCondStore(MI
, MBB
, SystemZ::STMux
, SystemZ::STOCMux
, true);
7557 case SystemZ::CondStore8
:
7558 return emitCondStore(MI
, MBB
, SystemZ::STC
, 0, false);
7559 case SystemZ::CondStore8Inv
:
7560 return emitCondStore(MI
, MBB
, SystemZ::STC
, 0, true);
7561 case SystemZ::CondStore16
:
7562 return emitCondStore(MI
, MBB
, SystemZ::STH
, 0, false);
7563 case SystemZ::CondStore16Inv
:
7564 return emitCondStore(MI
, MBB
, SystemZ::STH
, 0, true);
7565 case SystemZ::CondStore32
:
7566 return emitCondStore(MI
, MBB
, SystemZ::ST
, SystemZ::STOC
, false);
7567 case SystemZ::CondStore32Inv
:
7568 return emitCondStore(MI
, MBB
, SystemZ::ST
, SystemZ::STOC
, true);
7569 case SystemZ::CondStore64
:
7570 return emitCondStore(MI
, MBB
, SystemZ::STG
, SystemZ::STOCG
, false);
7571 case SystemZ::CondStore64Inv
:
7572 return emitCondStore(MI
, MBB
, SystemZ::STG
, SystemZ::STOCG
, true);
7573 case SystemZ::CondStoreF32
:
7574 return emitCondStore(MI
, MBB
, SystemZ::STE
, 0, false);
7575 case SystemZ::CondStoreF32Inv
:
7576 return emitCondStore(MI
, MBB
, SystemZ::STE
, 0, true);
7577 case SystemZ::CondStoreF64
:
7578 return emitCondStore(MI
, MBB
, SystemZ::STD
, 0, false);
7579 case SystemZ::CondStoreF64Inv
:
7580 return emitCondStore(MI
, MBB
, SystemZ::STD
, 0, true);
7582 case SystemZ::PAIR128
:
7583 return emitPair128(MI
, MBB
);
7584 case SystemZ::AEXT128
:
7585 return emitExt128(MI
, MBB
, false);
7586 case SystemZ::ZEXT128
:
7587 return emitExt128(MI
, MBB
, true);
7589 case SystemZ::ATOMIC_SWAPW
:
7590 return emitAtomicLoadBinary(MI
, MBB
, 0, 0);
7591 case SystemZ::ATOMIC_SWAP_32
:
7592 return emitAtomicLoadBinary(MI
, MBB
, 0, 32);
7593 case SystemZ::ATOMIC_SWAP_64
:
7594 return emitAtomicLoadBinary(MI
, MBB
, 0, 64);
7596 case SystemZ::ATOMIC_LOADW_AR
:
7597 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AR
, 0);
7598 case SystemZ::ATOMIC_LOADW_AFI
:
7599 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AFI
, 0);
7600 case SystemZ::ATOMIC_LOAD_AR
:
7601 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AR
, 32);
7602 case SystemZ::ATOMIC_LOAD_AHI
:
7603 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AHI
, 32);
7604 case SystemZ::ATOMIC_LOAD_AFI
:
7605 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AFI
, 32);
7606 case SystemZ::ATOMIC_LOAD_AGR
:
7607 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGR
, 64);
7608 case SystemZ::ATOMIC_LOAD_AGHI
:
7609 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGHI
, 64);
7610 case SystemZ::ATOMIC_LOAD_AGFI
:
7611 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGFI
, 64);
7613 case SystemZ::ATOMIC_LOADW_SR
:
7614 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SR
, 0);
7615 case SystemZ::ATOMIC_LOAD_SR
:
7616 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SR
, 32);
7617 case SystemZ::ATOMIC_LOAD_SGR
:
7618 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SGR
, 64);
7620 case SystemZ::ATOMIC_LOADW_NR
:
7621 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 0);
7622 case SystemZ::ATOMIC_LOADW_NILH
:
7623 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 0);
7624 case SystemZ::ATOMIC_LOAD_NR
:
7625 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 32);
7626 case SystemZ::ATOMIC_LOAD_NILL
:
7627 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL
, 32);
7628 case SystemZ::ATOMIC_LOAD_NILH
:
7629 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 32);
7630 case SystemZ::ATOMIC_LOAD_NILF
:
7631 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF
, 32);
7632 case SystemZ::ATOMIC_LOAD_NGR
:
7633 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NGR
, 64);
7634 case SystemZ::ATOMIC_LOAD_NILL64
:
7635 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL64
, 64);
7636 case SystemZ::ATOMIC_LOAD_NILH64
:
7637 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH64
, 64);
7638 case SystemZ::ATOMIC_LOAD_NIHL64
:
7639 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHL64
, 64);
7640 case SystemZ::ATOMIC_LOAD_NIHH64
:
7641 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHH64
, 64);
7642 case SystemZ::ATOMIC_LOAD_NILF64
:
7643 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF64
, 64);
7644 case SystemZ::ATOMIC_LOAD_NIHF64
:
7645 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHF64
, 64);
7647 case SystemZ::ATOMIC_LOADW_OR
:
7648 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OR
, 0);
7649 case SystemZ::ATOMIC_LOADW_OILH
:
7650 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH
, 0);
7651 case SystemZ::ATOMIC_LOAD_OR
:
7652 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OR
, 32);
7653 case SystemZ::ATOMIC_LOAD_OILL
:
7654 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILL
, 32);
7655 case SystemZ::ATOMIC_LOAD_OILH
:
7656 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH
, 32);
7657 case SystemZ::ATOMIC_LOAD_OILF
:
7658 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILF
, 32);
7659 case SystemZ::ATOMIC_LOAD_OGR
:
7660 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OGR
, 64);
7661 case SystemZ::ATOMIC_LOAD_OILL64
:
7662 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILL64
, 64);
7663 case SystemZ::ATOMIC_LOAD_OILH64
:
7664 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH64
, 64);
7665 case SystemZ::ATOMIC_LOAD_OIHL64
:
7666 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHL64
, 64);
7667 case SystemZ::ATOMIC_LOAD_OIHH64
:
7668 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHH64
, 64);
7669 case SystemZ::ATOMIC_LOAD_OILF64
:
7670 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILF64
, 64);
7671 case SystemZ::ATOMIC_LOAD_OIHF64
:
7672 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHF64
, 64);
7674 case SystemZ::ATOMIC_LOADW_XR
:
7675 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XR
, 0);
7676 case SystemZ::ATOMIC_LOADW_XILF
:
7677 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF
, 0);
7678 case SystemZ::ATOMIC_LOAD_XR
:
7679 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XR
, 32);
7680 case SystemZ::ATOMIC_LOAD_XILF
:
7681 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF
, 32);
7682 case SystemZ::ATOMIC_LOAD_XGR
:
7683 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XGR
, 64);
7684 case SystemZ::ATOMIC_LOAD_XILF64
:
7685 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF64
, 64);
7686 case SystemZ::ATOMIC_LOAD_XIHF64
:
7687 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XIHF64
, 64);
7689 case SystemZ::ATOMIC_LOADW_NRi
:
7690 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 0, true);
7691 case SystemZ::ATOMIC_LOADW_NILHi
:
7692 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 0, true);
7693 case SystemZ::ATOMIC_LOAD_NRi
:
7694 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 32, true);
7695 case SystemZ::ATOMIC_LOAD_NILLi
:
7696 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL
, 32, true);
7697 case SystemZ::ATOMIC_LOAD_NILHi
:
7698 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 32, true);
7699 case SystemZ::ATOMIC_LOAD_NILFi
:
7700 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF
, 32, true);
7701 case SystemZ::ATOMIC_LOAD_NGRi
:
7702 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NGR
, 64, true);
7703 case SystemZ::ATOMIC_LOAD_NILL64i
:
7704 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL64
, 64, true);
7705 case SystemZ::ATOMIC_LOAD_NILH64i
:
7706 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH64
, 64, true);
7707 case SystemZ::ATOMIC_LOAD_NIHL64i
:
7708 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHL64
, 64, true);
7709 case SystemZ::ATOMIC_LOAD_NIHH64i
:
7710 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHH64
, 64, true);
7711 case SystemZ::ATOMIC_LOAD_NILF64i
:
7712 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF64
, 64, true);
7713 case SystemZ::ATOMIC_LOAD_NIHF64i
:
7714 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHF64
, 64, true);
7716 case SystemZ::ATOMIC_LOADW_MIN
:
7717 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7718 SystemZ::CCMASK_CMP_LE
, 0);
7719 case SystemZ::ATOMIC_LOAD_MIN_32
:
7720 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7721 SystemZ::CCMASK_CMP_LE
, 32);
7722 case SystemZ::ATOMIC_LOAD_MIN_64
:
7723 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CGR
,
7724 SystemZ::CCMASK_CMP_LE
, 64);
7726 case SystemZ::ATOMIC_LOADW_MAX
:
7727 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7728 SystemZ::CCMASK_CMP_GE
, 0);
7729 case SystemZ::ATOMIC_LOAD_MAX_32
:
7730 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7731 SystemZ::CCMASK_CMP_GE
, 32);
7732 case SystemZ::ATOMIC_LOAD_MAX_64
:
7733 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CGR
,
7734 SystemZ::CCMASK_CMP_GE
, 64);
7736 case SystemZ::ATOMIC_LOADW_UMIN
:
7737 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7738 SystemZ::CCMASK_CMP_LE
, 0);
7739 case SystemZ::ATOMIC_LOAD_UMIN_32
:
7740 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7741 SystemZ::CCMASK_CMP_LE
, 32);
7742 case SystemZ::ATOMIC_LOAD_UMIN_64
:
7743 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLGR
,
7744 SystemZ::CCMASK_CMP_LE
, 64);
7746 case SystemZ::ATOMIC_LOADW_UMAX
:
7747 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7748 SystemZ::CCMASK_CMP_GE
, 0);
7749 case SystemZ::ATOMIC_LOAD_UMAX_32
:
7750 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7751 SystemZ::CCMASK_CMP_GE
, 32);
7752 case SystemZ::ATOMIC_LOAD_UMAX_64
:
7753 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLGR
,
7754 SystemZ::CCMASK_CMP_GE
, 64);
7756 case SystemZ::ATOMIC_CMP_SWAPW
:
7757 return emitAtomicCmpSwapW(MI
, MBB
);
7758 case SystemZ::MVCSequence
:
7759 case SystemZ::MVCLoop
:
7760 return emitMemMemWrapper(MI
, MBB
, SystemZ::MVC
);
7761 case SystemZ::NCSequence
:
7762 case SystemZ::NCLoop
:
7763 return emitMemMemWrapper(MI
, MBB
, SystemZ::NC
);
7764 case SystemZ::OCSequence
:
7765 case SystemZ::OCLoop
:
7766 return emitMemMemWrapper(MI
, MBB
, SystemZ::OC
);
7767 case SystemZ::XCSequence
:
7768 case SystemZ::XCLoop
:
7769 return emitMemMemWrapper(MI
, MBB
, SystemZ::XC
);
7770 case SystemZ::CLCSequence
:
7771 case SystemZ::CLCLoop
:
7772 return emitMemMemWrapper(MI
, MBB
, SystemZ::CLC
);
7773 case SystemZ::CLSTLoop
:
7774 return emitStringWrapper(MI
, MBB
, SystemZ::CLST
);
7775 case SystemZ::MVSTLoop
:
7776 return emitStringWrapper(MI
, MBB
, SystemZ::MVST
);
7777 case SystemZ::SRSTLoop
:
7778 return emitStringWrapper(MI
, MBB
, SystemZ::SRST
);
7779 case SystemZ::TBEGIN
:
7780 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGIN
, false);
7781 case SystemZ::TBEGIN_nofloat
:
7782 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGIN
, true);
7783 case SystemZ::TBEGINC
:
7784 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGINC
, true);
7785 case SystemZ::LTEBRCompare_VecPseudo
:
7786 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTEBR
);
7787 case SystemZ::LTDBRCompare_VecPseudo
:
7788 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTDBR
);
7789 case SystemZ::LTXBRCompare_VecPseudo
:
7790 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTXBR
);
7792 case TargetOpcode::STACKMAP
:
7793 case TargetOpcode::PATCHPOINT
:
7794 return emitPatchPoint(MI
, MBB
);
7797 llvm_unreachable("Unexpected instr type to insert");
7801 // This is only used by the isel schedulers, and is needed only to prevent
7802 // compiler from crashing when list-ilp is used.
7803 const TargetRegisterClass
*
7804 SystemZTargetLowering::getRepRegClassFor(MVT VT
) const {
7805 if (VT
== MVT::Untyped
)
7806 return &SystemZ::ADDR128BitRegClass
;
7807 return TargetLowering::getRepRegClassFor(VT
);