1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SystemZTargetLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
15 #include "SystemZConstantPoolValue.h"
16 #include "SystemZMachineFunctionInfo.h"
17 #include "SystemZTargetMachine.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22 #include "llvm/IR/Intrinsics.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/KnownBits.h"
30 #define DEBUG_TYPE "systemz-lower"
33 // Represents information about a comparison.
35 Comparison(SDValue Op0In
, SDValue Op1In
)
36 : Op0(Op0In
), Op1(Op1In
), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
38 // The operands to the comparison.
41 // The opcode that should be used to compare Op0 and Op1.
44 // A SystemZICMP value. Only used for integer comparisons.
47 // The mask of CC values that Opcode can produce.
50 // The mask of CC values for which the original condition is true.
53 } // end anonymous namespace
55 // Classify VT as either 32 or 64 bit.
56 static bool is32Bit(EVT VT
) {
57 switch (VT
.getSimpleVT().SimpleTy
) {
63 llvm_unreachable("Unsupported type");
67 // Return a version of MachineOperand that can be safely used before the
69 static MachineOperand
earlyUseOperand(MachineOperand Op
) {
75 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine
&TM
,
76 const SystemZSubtarget
&STI
)
77 : TargetLowering(TM
), Subtarget(STI
) {
78 MVT PtrVT
= MVT::getIntegerVT(8 * TM
.getPointerSize(0));
80 // Set up the register classes.
81 if (Subtarget
.hasHighWord())
82 addRegisterClass(MVT::i32
, &SystemZ::GRX32BitRegClass
);
84 addRegisterClass(MVT::i32
, &SystemZ::GR32BitRegClass
);
85 addRegisterClass(MVT::i64
, &SystemZ::GR64BitRegClass
);
86 if (Subtarget
.hasVector()) {
87 addRegisterClass(MVT::f32
, &SystemZ::VR32BitRegClass
);
88 addRegisterClass(MVT::f64
, &SystemZ::VR64BitRegClass
);
90 addRegisterClass(MVT::f32
, &SystemZ::FP32BitRegClass
);
91 addRegisterClass(MVT::f64
, &SystemZ::FP64BitRegClass
);
93 if (Subtarget
.hasVectorEnhancements1())
94 addRegisterClass(MVT::f128
, &SystemZ::VR128BitRegClass
);
96 addRegisterClass(MVT::f128
, &SystemZ::FP128BitRegClass
);
98 if (Subtarget
.hasVector()) {
99 addRegisterClass(MVT::v16i8
, &SystemZ::VR128BitRegClass
);
100 addRegisterClass(MVT::v8i16
, &SystemZ::VR128BitRegClass
);
101 addRegisterClass(MVT::v4i32
, &SystemZ::VR128BitRegClass
);
102 addRegisterClass(MVT::v2i64
, &SystemZ::VR128BitRegClass
);
103 addRegisterClass(MVT::v4f32
, &SystemZ::VR128BitRegClass
);
104 addRegisterClass(MVT::v2f64
, &SystemZ::VR128BitRegClass
);
107 // Compute derived properties from the register classes
108 computeRegisterProperties(Subtarget
.getRegisterInfo());
110 // Set up special registers.
111 setStackPointerRegisterToSaveRestore(SystemZ::R15D
);
113 // TODO: It may be better to default to latency-oriented scheduling, however
114 // LLVM's current latency-oriented scheduler can't handle physreg definitions
115 // such as SystemZ has with CC, so set this to the register-pressure
116 // scheduler, because it can.
117 setSchedulingPreference(Sched::RegPressure
);
119 setBooleanContents(ZeroOrOneBooleanContent
);
120 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
122 // Instructions are strings of 2-byte aligned 2-byte values.
123 setMinFunctionAlignment(llvm::Align(4));
124 // For performance reasons we prefer 16-byte alignment.
125 setPrefFunctionLogAlignment(4);
127 // Handle operations that are handled in a similar way for all types.
128 for (unsigned I
= MVT::FIRST_INTEGER_VALUETYPE
;
129 I
<= MVT::LAST_FP_VALUETYPE
;
131 MVT VT
= MVT::SimpleValueType(I
);
132 if (isTypeLegal(VT
)) {
133 // Lower SET_CC into an IPM-based sequence.
134 setOperationAction(ISD::SETCC
, VT
, Custom
);
136 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
137 setOperationAction(ISD::SELECT
, VT
, Expand
);
139 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
140 setOperationAction(ISD::SELECT_CC
, VT
, Custom
);
141 setOperationAction(ISD::BR_CC
, VT
, Custom
);
145 // Expand jump table branches as address arithmetic followed by an
147 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
149 // Expand BRCOND into a BR_CC (see above).
150 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
152 // Handle integer types.
153 for (unsigned I
= MVT::FIRST_INTEGER_VALUETYPE
;
154 I
<= MVT::LAST_INTEGER_VALUETYPE
;
156 MVT VT
= MVT::SimpleValueType(I
);
157 if (isTypeLegal(VT
)) {
158 // Expand individual DIV and REMs into DIVREMs.
159 setOperationAction(ISD::SDIV
, VT
, Expand
);
160 setOperationAction(ISD::UDIV
, VT
, Expand
);
161 setOperationAction(ISD::SREM
, VT
, Expand
);
162 setOperationAction(ISD::UREM
, VT
, Expand
);
163 setOperationAction(ISD::SDIVREM
, VT
, Custom
);
164 setOperationAction(ISD::UDIVREM
, VT
, Custom
);
166 // Support addition/subtraction with overflow.
167 setOperationAction(ISD::SADDO
, VT
, Custom
);
168 setOperationAction(ISD::SSUBO
, VT
, Custom
);
170 // Support addition/subtraction with carry.
171 setOperationAction(ISD::UADDO
, VT
, Custom
);
172 setOperationAction(ISD::USUBO
, VT
, Custom
);
174 // Support carry in as value rather than glue.
175 setOperationAction(ISD::ADDCARRY
, VT
, Custom
);
176 setOperationAction(ISD::SUBCARRY
, VT
, Custom
);
178 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
179 // stores, putting a serialization instruction after the stores.
180 setOperationAction(ISD::ATOMIC_LOAD
, VT
, Custom
);
181 setOperationAction(ISD::ATOMIC_STORE
, VT
, Custom
);
183 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
184 // available, or if the operand is constant.
185 setOperationAction(ISD::ATOMIC_LOAD_SUB
, VT
, Custom
);
187 // Use POPCNT on z196 and above.
188 if (Subtarget
.hasPopulationCount())
189 setOperationAction(ISD::CTPOP
, VT
, Custom
);
191 setOperationAction(ISD::CTPOP
, VT
, Expand
);
193 // No special instructions for these.
194 setOperationAction(ISD::CTTZ
, VT
, Expand
);
195 setOperationAction(ISD::ROTR
, VT
, Expand
);
197 // Use *MUL_LOHI where possible instead of MULH*.
198 setOperationAction(ISD::MULHS
, VT
, Expand
);
199 setOperationAction(ISD::MULHU
, VT
, Expand
);
200 setOperationAction(ISD::SMUL_LOHI
, VT
, Custom
);
201 setOperationAction(ISD::UMUL_LOHI
, VT
, Custom
);
203 // Only z196 and above have native support for conversions to unsigned.
204 // On z10, promoting to i64 doesn't generate an inexact condition for
205 // values that are outside the i32 range but in the i64 range, so use
206 // the default expansion.
207 if (!Subtarget
.hasFPExtension())
208 setOperationAction(ISD::FP_TO_UINT
, VT
, Expand
);
210 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
211 // default to Expand, so need to be modified to Legal where appropriate.
212 setOperationAction(ISD::STRICT_FP_TO_SINT
, VT
, Legal
);
213 if (Subtarget
.hasFPExtension())
214 setOperationAction(ISD::STRICT_FP_TO_UINT
, VT
, Legal
);
218 // Type legalization will convert 8- and 16-bit atomic operations into
219 // forms that operate on i32s (but still keeping the original memory VT).
220 // Lower them into full i32 operations.
221 setOperationAction(ISD::ATOMIC_SWAP
, MVT::i32
, Custom
);
222 setOperationAction(ISD::ATOMIC_LOAD_ADD
, MVT::i32
, Custom
);
223 setOperationAction(ISD::ATOMIC_LOAD_SUB
, MVT::i32
, Custom
);
224 setOperationAction(ISD::ATOMIC_LOAD_AND
, MVT::i32
, Custom
);
225 setOperationAction(ISD::ATOMIC_LOAD_OR
, MVT::i32
, Custom
);
226 setOperationAction(ISD::ATOMIC_LOAD_XOR
, MVT::i32
, Custom
);
227 setOperationAction(ISD::ATOMIC_LOAD_NAND
, MVT::i32
, Custom
);
228 setOperationAction(ISD::ATOMIC_LOAD_MIN
, MVT::i32
, Custom
);
229 setOperationAction(ISD::ATOMIC_LOAD_MAX
, MVT::i32
, Custom
);
230 setOperationAction(ISD::ATOMIC_LOAD_UMIN
, MVT::i32
, Custom
);
231 setOperationAction(ISD::ATOMIC_LOAD_UMAX
, MVT::i32
, Custom
);
233 // Even though i128 is not a legal type, we still need to custom lower
234 // the atomic operations in order to exploit SystemZ instructions.
235 setOperationAction(ISD::ATOMIC_LOAD
, MVT::i128
, Custom
);
236 setOperationAction(ISD::ATOMIC_STORE
, MVT::i128
, Custom
);
238 // We can use the CC result of compare-and-swap to implement
239 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
240 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i32
, Custom
);
241 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i64
, Custom
);
242 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
, MVT::i128
, Custom
);
244 setOperationAction(ISD::ATOMIC_FENCE
, MVT::Other
, Custom
);
246 // Traps are legal, as we will convert them to "j .+2".
247 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
249 // z10 has instructions for signed but not unsigned FP conversion.
250 // Handle unsigned 32-bit types as signed 64-bit types.
251 if (!Subtarget
.hasFPExtension()) {
252 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Promote
);
253 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
256 // We have native support for a 64-bit CTLZ, via FLOGR.
257 setOperationAction(ISD::CTLZ
, MVT::i32
, Promote
);
258 setOperationAction(ISD::CTLZ_ZERO_UNDEF
, MVT::i32
, Promote
);
259 setOperationAction(ISD::CTLZ
, MVT::i64
, Legal
);
261 // On arch13 we have native support for a 64-bit CTPOP.
262 if (Subtarget
.hasMiscellaneousExtensions3()) {
263 setOperationAction(ISD::CTPOP
, MVT::i32
, Promote
);
264 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
267 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
268 setOperationAction(ISD::OR
, MVT::i64
, Custom
);
270 // FIXME: Can we support these natively?
271 setOperationAction(ISD::SRL_PARTS
, MVT::i64
, Expand
);
272 setOperationAction(ISD::SHL_PARTS
, MVT::i64
, Expand
);
273 setOperationAction(ISD::SRA_PARTS
, MVT::i64
, Expand
);
275 // We have native instructions for i8, i16 and i32 extensions, but not i1.
276 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
277 for (MVT VT
: MVT::integer_valuetypes()) {
278 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
279 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
280 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i1
, Promote
);
283 // Handle the various types of symbolic address.
284 setOperationAction(ISD::ConstantPool
, PtrVT
, Custom
);
285 setOperationAction(ISD::GlobalAddress
, PtrVT
, Custom
);
286 setOperationAction(ISD::GlobalTLSAddress
, PtrVT
, Custom
);
287 setOperationAction(ISD::BlockAddress
, PtrVT
, Custom
);
288 setOperationAction(ISD::JumpTable
, PtrVT
, Custom
);
290 // We need to handle dynamic allocations specially because of the
291 // 160-byte area at the bottom of the stack.
292 setOperationAction(ISD::DYNAMIC_STACKALLOC
, PtrVT
, Custom
);
293 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, PtrVT
, Custom
);
295 // Use custom expanders so that we can force the function to use
297 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Custom
);
298 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Custom
);
300 // Handle prefetches with PFD or PFDRL.
301 setOperationAction(ISD::PREFETCH
, MVT::Other
, Custom
);
303 for (MVT VT
: MVT::vector_valuetypes()) {
304 // Assume by default that all vector operations need to be expanded.
305 for (unsigned Opcode
= 0; Opcode
< ISD::BUILTIN_OP_END
; ++Opcode
)
306 if (getOperationAction(Opcode
, VT
) == Legal
)
307 setOperationAction(Opcode
, VT
, Expand
);
309 // Likewise all truncating stores and extending loads.
310 for (MVT InnerVT
: MVT::vector_valuetypes()) {
311 setTruncStoreAction(VT
, InnerVT
, Expand
);
312 setLoadExtAction(ISD::SEXTLOAD
, VT
, InnerVT
, Expand
);
313 setLoadExtAction(ISD::ZEXTLOAD
, VT
, InnerVT
, Expand
);
314 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
317 if (isTypeLegal(VT
)) {
318 // These operations are legal for anything that can be stored in a
319 // vector register, even if there is no native support for the format
320 // as such. In particular, we can do these for v4f32 even though there
321 // are no specific instructions for that format.
322 setOperationAction(ISD::LOAD
, VT
, Legal
);
323 setOperationAction(ISD::STORE
, VT
, Legal
);
324 setOperationAction(ISD::VSELECT
, VT
, Legal
);
325 setOperationAction(ISD::BITCAST
, VT
, Legal
);
326 setOperationAction(ISD::UNDEF
, VT
, Legal
);
328 // Likewise, except that we need to replace the nodes with something
330 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
331 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Custom
);
335 // Handle integer vector types.
336 for (MVT VT
: MVT::integer_vector_valuetypes()) {
337 if (isTypeLegal(VT
)) {
338 // These operations have direct equivalents.
339 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Legal
);
340 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Legal
);
341 setOperationAction(ISD::ADD
, VT
, Legal
);
342 setOperationAction(ISD::SUB
, VT
, Legal
);
343 if (VT
!= MVT::v2i64
)
344 setOperationAction(ISD::MUL
, VT
, Legal
);
345 setOperationAction(ISD::AND
, VT
, Legal
);
346 setOperationAction(ISD::OR
, VT
, Legal
);
347 setOperationAction(ISD::XOR
, VT
, Legal
);
348 if (Subtarget
.hasVectorEnhancements1())
349 setOperationAction(ISD::CTPOP
, VT
, Legal
);
351 setOperationAction(ISD::CTPOP
, VT
, Custom
);
352 setOperationAction(ISD::CTTZ
, VT
, Legal
);
353 setOperationAction(ISD::CTLZ
, VT
, Legal
);
355 // Convert a GPR scalar to a vector by inserting it into element 0.
356 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
358 // Use a series of unpacks for extensions.
359 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, VT
, Custom
);
360 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, VT
, Custom
);
362 // Detect shifts by a scalar amount and convert them into
364 setOperationAction(ISD::SHL
, VT
, Custom
);
365 setOperationAction(ISD::SRA
, VT
, Custom
);
366 setOperationAction(ISD::SRL
, VT
, Custom
);
368 // At present ROTL isn't matched by DAGCombiner. ROTR should be
369 // converted into ROTL.
370 setOperationAction(ISD::ROTL
, VT
, Expand
);
371 setOperationAction(ISD::ROTR
, VT
, Expand
);
373 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
374 // and inverting the result as necessary.
375 setOperationAction(ISD::SETCC
, VT
, Custom
);
379 if (Subtarget
.hasVector()) {
380 // There should be no need to check for float types other than v2f64
381 // since <2 x f32> isn't a legal type.
382 setOperationAction(ISD::FP_TO_SINT
, MVT::v2i64
, Legal
);
383 setOperationAction(ISD::FP_TO_SINT
, MVT::v2f64
, Legal
);
384 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Legal
);
385 setOperationAction(ISD::FP_TO_UINT
, MVT::v2f64
, Legal
);
386 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i64
, Legal
);
387 setOperationAction(ISD::SINT_TO_FP
, MVT::v2f64
, Legal
);
388 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Legal
);
389 setOperationAction(ISD::UINT_TO_FP
, MVT::v2f64
, Legal
);
391 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v2i64
, Legal
);
392 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v2f64
, Legal
);
393 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v2i64
, Legal
);
394 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v2f64
, Legal
);
397 if (Subtarget
.hasVectorEnhancements2()) {
398 setOperationAction(ISD::FP_TO_SINT
, MVT::v4i32
, Legal
);
399 setOperationAction(ISD::FP_TO_SINT
, MVT::v4f32
, Legal
);
400 setOperationAction(ISD::FP_TO_UINT
, MVT::v4i32
, Legal
);
401 setOperationAction(ISD::FP_TO_UINT
, MVT::v4f32
, Legal
);
402 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i32
, Legal
);
403 setOperationAction(ISD::SINT_TO_FP
, MVT::v4f32
, Legal
);
404 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i32
, Legal
);
405 setOperationAction(ISD::UINT_TO_FP
, MVT::v4f32
, Legal
);
407 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v4i32
, Legal
);
408 setOperationAction(ISD::STRICT_FP_TO_SINT
, MVT::v4f32
, Legal
);
409 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v4i32
, Legal
);
410 setOperationAction(ISD::STRICT_FP_TO_UINT
, MVT::v4f32
, Legal
);
413 // Handle floating-point types.
414 for (unsigned I
= MVT::FIRST_FP_VALUETYPE
;
415 I
<= MVT::LAST_FP_VALUETYPE
;
417 MVT VT
= MVT::SimpleValueType(I
);
418 if (isTypeLegal(VT
)) {
419 // We can use FI for FRINT.
420 setOperationAction(ISD::FRINT
, VT
, Legal
);
422 // We can use the extended form of FI for other rounding operations.
423 if (Subtarget
.hasFPExtension()) {
424 setOperationAction(ISD::FNEARBYINT
, VT
, Legal
);
425 setOperationAction(ISD::FFLOOR
, VT
, Legal
);
426 setOperationAction(ISD::FCEIL
, VT
, Legal
);
427 setOperationAction(ISD::FTRUNC
, VT
, Legal
);
428 setOperationAction(ISD::FROUND
, VT
, Legal
);
431 // No special instructions for these.
432 setOperationAction(ISD::FSIN
, VT
, Expand
);
433 setOperationAction(ISD::FCOS
, VT
, Expand
);
434 setOperationAction(ISD::FSINCOS
, VT
, Expand
);
435 setOperationAction(ISD::FREM
, VT
, Expand
);
436 setOperationAction(ISD::FPOW
, VT
, Expand
);
438 // Handle constrained floating-point operations.
439 setOperationAction(ISD::STRICT_FADD
, VT
, Legal
);
440 setOperationAction(ISD::STRICT_FSUB
, VT
, Legal
);
441 setOperationAction(ISD::STRICT_FMUL
, VT
, Legal
);
442 setOperationAction(ISD::STRICT_FDIV
, VT
, Legal
);
443 setOperationAction(ISD::STRICT_FMA
, VT
, Legal
);
444 setOperationAction(ISD::STRICT_FSQRT
, VT
, Legal
);
445 setOperationAction(ISD::STRICT_FRINT
, VT
, Legal
);
446 setOperationAction(ISD::STRICT_FP_ROUND
, VT
, Legal
);
447 setOperationAction(ISD::STRICT_FP_EXTEND
, VT
, Legal
);
448 if (Subtarget
.hasFPExtension()) {
449 setOperationAction(ISD::STRICT_FNEARBYINT
, VT
, Legal
);
450 setOperationAction(ISD::STRICT_FFLOOR
, VT
, Legal
);
451 setOperationAction(ISD::STRICT_FCEIL
, VT
, Legal
);
452 setOperationAction(ISD::STRICT_FROUND
, VT
, Legal
);
453 setOperationAction(ISD::STRICT_FTRUNC
, VT
, Legal
);
458 // Handle floating-point vector types.
459 if (Subtarget
.hasVector()) {
460 // Scalar-to-vector conversion is just a subreg.
461 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
462 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2f64
, Legal
);
464 // Some insertions and extractions can be done directly but others
465 // need to go via integers.
466 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
467 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v2f64
, Custom
);
468 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Custom
);
469 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Custom
);
471 // These operations have direct equivalents.
472 setOperationAction(ISD::FADD
, MVT::v2f64
, Legal
);
473 setOperationAction(ISD::FNEG
, MVT::v2f64
, Legal
);
474 setOperationAction(ISD::FSUB
, MVT::v2f64
, Legal
);
475 setOperationAction(ISD::FMUL
, MVT::v2f64
, Legal
);
476 setOperationAction(ISD::FMA
, MVT::v2f64
, Legal
);
477 setOperationAction(ISD::FDIV
, MVT::v2f64
, Legal
);
478 setOperationAction(ISD::FABS
, MVT::v2f64
, Legal
);
479 setOperationAction(ISD::FSQRT
, MVT::v2f64
, Legal
);
480 setOperationAction(ISD::FRINT
, MVT::v2f64
, Legal
);
481 setOperationAction(ISD::FNEARBYINT
, MVT::v2f64
, Legal
);
482 setOperationAction(ISD::FFLOOR
, MVT::v2f64
, Legal
);
483 setOperationAction(ISD::FCEIL
, MVT::v2f64
, Legal
);
484 setOperationAction(ISD::FTRUNC
, MVT::v2f64
, Legal
);
485 setOperationAction(ISD::FROUND
, MVT::v2f64
, Legal
);
487 // Handle constrained floating-point operations.
488 setOperationAction(ISD::STRICT_FADD
, MVT::v2f64
, Legal
);
489 setOperationAction(ISD::STRICT_FSUB
, MVT::v2f64
, Legal
);
490 setOperationAction(ISD::STRICT_FMUL
, MVT::v2f64
, Legal
);
491 setOperationAction(ISD::STRICT_FMA
, MVT::v2f64
, Legal
);
492 setOperationAction(ISD::STRICT_FDIV
, MVT::v2f64
, Legal
);
493 setOperationAction(ISD::STRICT_FSQRT
, MVT::v2f64
, Legal
);
494 setOperationAction(ISD::STRICT_FRINT
, MVT::v2f64
, Legal
);
495 setOperationAction(ISD::STRICT_FNEARBYINT
, MVT::v2f64
, Legal
);
496 setOperationAction(ISD::STRICT_FFLOOR
, MVT::v2f64
, Legal
);
497 setOperationAction(ISD::STRICT_FCEIL
, MVT::v2f64
, Legal
);
498 setOperationAction(ISD::STRICT_FTRUNC
, MVT::v2f64
, Legal
);
499 setOperationAction(ISD::STRICT_FROUND
, MVT::v2f64
, Legal
);
502 // The vector enhancements facility 1 has instructions for these.
503 if (Subtarget
.hasVectorEnhancements1()) {
504 setOperationAction(ISD::FADD
, MVT::v4f32
, Legal
);
505 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
506 setOperationAction(ISD::FSUB
, MVT::v4f32
, Legal
);
507 setOperationAction(ISD::FMUL
, MVT::v4f32
, Legal
);
508 setOperationAction(ISD::FMA
, MVT::v4f32
, Legal
);
509 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
510 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
511 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
512 setOperationAction(ISD::FRINT
, MVT::v4f32
, Legal
);
513 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Legal
);
514 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
515 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
516 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
517 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
519 setOperationAction(ISD::FMAXNUM
, MVT::f64
, Legal
);
520 setOperationAction(ISD::FMAXIMUM
, MVT::f64
, Legal
);
521 setOperationAction(ISD::FMINNUM
, MVT::f64
, Legal
);
522 setOperationAction(ISD::FMINIMUM
, MVT::f64
, Legal
);
524 setOperationAction(ISD::FMAXNUM
, MVT::v2f64
, Legal
);
525 setOperationAction(ISD::FMAXIMUM
, MVT::v2f64
, Legal
);
526 setOperationAction(ISD::FMINNUM
, MVT::v2f64
, Legal
);
527 setOperationAction(ISD::FMINIMUM
, MVT::v2f64
, Legal
);
529 setOperationAction(ISD::FMAXNUM
, MVT::f32
, Legal
);
530 setOperationAction(ISD::FMAXIMUM
, MVT::f32
, Legal
);
531 setOperationAction(ISD::FMINNUM
, MVT::f32
, Legal
);
532 setOperationAction(ISD::FMINIMUM
, MVT::f32
, Legal
);
534 setOperationAction(ISD::FMAXNUM
, MVT::v4f32
, Legal
);
535 setOperationAction(ISD::FMAXIMUM
, MVT::v4f32
, Legal
);
536 setOperationAction(ISD::FMINNUM
, MVT::v4f32
, Legal
);
537 setOperationAction(ISD::FMINIMUM
, MVT::v4f32
, Legal
);
539 setOperationAction(ISD::FMAXNUM
, MVT::f128
, Legal
);
540 setOperationAction(ISD::FMAXIMUM
, MVT::f128
, Legal
);
541 setOperationAction(ISD::FMINNUM
, MVT::f128
, Legal
);
542 setOperationAction(ISD::FMINIMUM
, MVT::f128
, Legal
);
544 // Handle constrained floating-point operations.
545 setOperationAction(ISD::STRICT_FADD
, MVT::v4f32
, Legal
);
546 setOperationAction(ISD::STRICT_FSUB
, MVT::v4f32
, Legal
);
547 setOperationAction(ISD::STRICT_FMUL
, MVT::v4f32
, Legal
);
548 setOperationAction(ISD::STRICT_FMA
, MVT::v4f32
, Legal
);
549 setOperationAction(ISD::STRICT_FDIV
, MVT::v4f32
, Legal
);
550 setOperationAction(ISD::STRICT_FSQRT
, MVT::v4f32
, Legal
);
551 setOperationAction(ISD::STRICT_FRINT
, MVT::v4f32
, Legal
);
552 setOperationAction(ISD::STRICT_FNEARBYINT
, MVT::v4f32
, Legal
);
553 setOperationAction(ISD::STRICT_FFLOOR
, MVT::v4f32
, Legal
);
554 setOperationAction(ISD::STRICT_FCEIL
, MVT::v4f32
, Legal
);
555 setOperationAction(ISD::STRICT_FROUND
, MVT::v4f32
, Legal
);
556 setOperationAction(ISD::STRICT_FTRUNC
, MVT::v4f32
, Legal
);
557 for (auto VT
: { MVT::f32
, MVT::f64
, MVT::f128
,
558 MVT::v4f32
, MVT::v2f64
}) {
559 setOperationAction(ISD::STRICT_FMAXNUM
, VT
, Legal
);
560 setOperationAction(ISD::STRICT_FMINNUM
, VT
, Legal
);
564 // We have fused multiply-addition for f32 and f64 but not f128.
565 setOperationAction(ISD::FMA
, MVT::f32
, Legal
);
566 setOperationAction(ISD::FMA
, MVT::f64
, Legal
);
567 if (Subtarget
.hasVectorEnhancements1())
568 setOperationAction(ISD::FMA
, MVT::f128
, Legal
);
570 setOperationAction(ISD::FMA
, MVT::f128
, Expand
);
572 // We don't have a copysign instruction on vector registers.
573 if (Subtarget
.hasVectorEnhancements1())
574 setOperationAction(ISD::FCOPYSIGN
, MVT::f128
, Expand
);
576 // Needed so that we don't try to implement f128 constant loads using
577 // a load-and-extend of a f80 constant (in cases where the constant
578 // would fit in an f80).
579 for (MVT VT
: MVT::fp_valuetypes())
580 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::f80
, Expand
);
582 // We don't have extending load instruction on vector registers.
583 if (Subtarget
.hasVectorEnhancements1()) {
584 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, MVT::f32
, Expand
);
585 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, MVT::f64
, Expand
);
588 // Floating-point truncation and stores need to be done separately.
589 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
590 setTruncStoreAction(MVT::f128
, MVT::f32
, Expand
);
591 setTruncStoreAction(MVT::f128
, MVT::f64
, Expand
);
593 // We have 64-bit FPR<->GPR moves, but need special handling for
595 if (!Subtarget
.hasVector()) {
596 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
597 setOperationAction(ISD::BITCAST
, MVT::f32
, Custom
);
600 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
601 // structure, but VAEND is a no-op.
602 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
603 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
604 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
606 // Codes for which we want to perform some z-specific combinations.
607 setTargetDAGCombine(ISD::ZERO_EXTEND
);
608 setTargetDAGCombine(ISD::SIGN_EXTEND
);
609 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG
);
610 setTargetDAGCombine(ISD::LOAD
);
611 setTargetDAGCombine(ISD::STORE
);
612 setTargetDAGCombine(ISD::VECTOR_SHUFFLE
);
613 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT
);
614 setTargetDAGCombine(ISD::FP_ROUND
);
615 setTargetDAGCombine(ISD::FP_EXTEND
);
616 setTargetDAGCombine(ISD::BSWAP
);
617 setTargetDAGCombine(ISD::SDIV
);
618 setTargetDAGCombine(ISD::UDIV
);
619 setTargetDAGCombine(ISD::SREM
);
620 setTargetDAGCombine(ISD::UREM
);
622 // Handle intrinsics.
623 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
624 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
626 // We want to use MVC in preference to even a single load/store pair.
627 MaxStoresPerMemcpy
= 0;
628 MaxStoresPerMemcpyOptSize
= 0;
630 // The main memset sequence is a byte store followed by an MVC.
631 // Two STC or MV..I stores win over that, but the kind of fused stores
632 // generated by target-independent code don't when the byte value is
633 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
634 // than "STC;MVC". Handle the choice in target-specific code instead.
635 MaxStoresPerMemset
= 0;
636 MaxStoresPerMemsetOptSize
= 0;
639 EVT
SystemZTargetLowering::getSetCCResultType(const DataLayout
&DL
,
640 LLVMContext
&, EVT VT
) const {
643 return VT
.changeVectorElementTypeToInteger();
646 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT
) const {
647 VT
= VT
.getScalarType();
652 switch (VT
.getSimpleVT().SimpleTy
) {
657 return Subtarget
.hasVectorEnhancements1();
665 // Return true if the constant can be generated with a vector instruction,
666 // such as VGM, VGMB or VREPI.
667 bool SystemZVectorConstantInfo::isVectorConstantLegal(
668 const SystemZSubtarget
&Subtarget
) {
669 const SystemZInstrInfo
*TII
=
670 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
671 if (!Subtarget
.hasVector() ||
672 (isFP128
&& !Subtarget
.hasVectorEnhancements1()))
675 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
676 // preferred way of creating all-zero and all-one vectors so give it
677 // priority over other methods below.
680 for (; I
< SystemZ::VectorBytes
; ++I
) {
681 uint64_t Byte
= IntBits
.lshr(I
* 8).trunc(8).getZExtValue();
687 if (I
== SystemZ::VectorBytes
) {
688 Opcode
= SystemZISD::BYTE_MASK
;
689 OpVals
.push_back(Mask
);
690 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(8), 16);
694 if (SplatBitSize
> 64)
697 auto tryValue
= [&](uint64_t Value
) -> bool {
698 // Try VECTOR REPLICATE IMMEDIATE
699 int64_t SignedValue
= SignExtend64(Value
, SplatBitSize
);
700 if (isInt
<16>(SignedValue
)) {
701 OpVals
.push_back(((unsigned) SignedValue
));
702 Opcode
= SystemZISD::REPLICATE
;
703 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize
),
704 SystemZ::VectorBits
/ SplatBitSize
);
707 // Try VECTOR GENERATE MASK
709 if (TII
->isRxSBGMask(Value
, SplatBitSize
, Start
, End
)) {
710 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
711 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
712 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
713 OpVals
.push_back(Start
- (64 - SplatBitSize
));
714 OpVals
.push_back(End
- (64 - SplatBitSize
));
715 Opcode
= SystemZISD::ROTATE_MASK
;
716 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize
),
717 SystemZ::VectorBits
/ SplatBitSize
);
723 // First try assuming that any undefined bits above the highest set bit
724 // and below the lowest set bit are 1s. This increases the likelihood of
725 // being able to use a sign-extended element value in VECTOR REPLICATE
726 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
727 uint64_t SplatBitsZ
= SplatBits
.getZExtValue();
728 uint64_t SplatUndefZ
= SplatUndef
.getZExtValue();
730 (SplatUndefZ
& ((uint64_t(1) << findFirstSet(SplatBitsZ
)) - 1));
732 (SplatUndefZ
& ~((uint64_t(1) << findLastSet(SplatBitsZ
)) - 1));
733 if (tryValue(SplatBitsZ
| Upper
| Lower
))
736 // Now try assuming that any undefined bits between the first and
737 // last defined set bits are set. This increases the chances of
738 // using a non-wraparound mask.
739 uint64_t Middle
= SplatUndefZ
& ~Upper
& ~Lower
;
740 return tryValue(SplatBitsZ
| Middle
);
743 SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm
) {
744 IntBits
= FPImm
.bitcastToAPInt().zextOrSelf(128);
745 isFP128
= (&FPImm
.getSemantics() == &APFloat::IEEEquad());
747 // Find the smallest splat.
748 SplatBits
= FPImm
.bitcastToAPInt();
749 unsigned Width
= SplatBits
.getBitWidth();
751 unsigned HalfSize
= Width
/ 2;
752 APInt HighValue
= SplatBits
.lshr(HalfSize
).trunc(HalfSize
);
753 APInt LowValue
= SplatBits
.trunc(HalfSize
);
755 // If the two halves do not match, stop here.
756 if (HighValue
!= LowValue
|| 8 > HalfSize
)
759 SplatBits
= HighValue
;
763 SplatBitSize
= Width
;
766 SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode
*BVN
) {
767 assert(BVN
->isConstant() && "Expected a constant BUILD_VECTOR");
770 // Get IntBits by finding the 128 bit splat.
771 BVN
->isConstantSplat(IntBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
, 128,
774 // Get SplatBits by finding the 8 bit or greater splat.
775 BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
, 8,
779 bool SystemZTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
780 bool ForCodeSize
) const {
781 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
782 if (Imm
.isZero() || Imm
.isNegZero())
785 return SystemZVectorConstantInfo(Imm
).isVectorConstantLegal(Subtarget
);
788 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
789 // We can use CGFI or CLGFI.
790 return isInt
<32>(Imm
) || isUInt
<32>(Imm
);
793 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
794 // We can use ALGFI or SLGFI.
795 return isUInt
<32>(Imm
) || isUInt
<32>(-Imm
);
798 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
799 EVT VT
, unsigned, unsigned, MachineMemOperand::Flags
, bool *Fast
) const {
800 // Unaligned accesses should never be slower than the expanded version.
801 // We check specifically for aligned accesses in the few cases where
802 // they are required.
808 // Information about the addressing mode for a memory access.
809 struct AddressingMode
{
810 // True if a long displacement is supported.
811 bool LongDisplacement
;
813 // True if use of index register is supported.
816 AddressingMode(bool LongDispl
, bool IdxReg
) :
817 LongDisplacement(LongDispl
), IndexReg(IdxReg
) {}
820 // Return the desired addressing mode for a Load which has only one use (in
821 // the same block) which is a Store.
822 static AddressingMode
getLoadStoreAddrMode(bool HasVector
,
824 // With vector support a Load->Store combination may be combined to either
825 // an MVC or vector operations and it seems to work best to allow the
826 // vector addressing mode.
828 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
830 // Otherwise only the MVC case is special.
831 bool MVC
= Ty
->isIntegerTy(8);
832 return AddressingMode(!MVC
/*LongDispl*/, !MVC
/*IdxReg*/);
835 // Return the addressing mode which seems most desirable given an LLVM
836 // Instruction pointer.
837 static AddressingMode
838 supportedAddressingMode(Instruction
*I
, bool HasVector
) {
839 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
840 switch (II
->getIntrinsicID()) {
842 case Intrinsic::memset
:
843 case Intrinsic::memmove
:
844 case Intrinsic::memcpy
:
845 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
849 if (isa
<LoadInst
>(I
) && I
->hasOneUse()) {
850 auto *SingleUser
= dyn_cast
<Instruction
>(*I
->user_begin());
851 if (SingleUser
->getParent() == I
->getParent()) {
852 if (isa
<ICmpInst
>(SingleUser
)) {
853 if (auto *C
= dyn_cast
<ConstantInt
>(SingleUser
->getOperand(1)))
854 if (C
->getBitWidth() <= 64 &&
855 (isInt
<16>(C
->getSExtValue()) || isUInt
<16>(C
->getZExtValue())))
856 // Comparison of memory with 16 bit signed / unsigned immediate
857 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
858 } else if (isa
<StoreInst
>(SingleUser
))
860 return getLoadStoreAddrMode(HasVector
, I
->getType());
862 } else if (auto *StoreI
= dyn_cast
<StoreInst
>(I
)) {
863 if (auto *LoadI
= dyn_cast
<LoadInst
>(StoreI
->getValueOperand()))
864 if (LoadI
->hasOneUse() && LoadI
->getParent() == I
->getParent())
866 return getLoadStoreAddrMode(HasVector
, LoadI
->getType());
869 if (HasVector
&& (isa
<LoadInst
>(I
) || isa
<StoreInst
>(I
))) {
871 // * Use LDE instead of LE/LEY for z13 to avoid partial register
872 // dependencies (LDE only supports small offsets).
873 // * Utilize the vector registers to hold floating point
874 // values (vector load / store instructions only support small
877 Type
*MemAccessTy
= (isa
<LoadInst
>(I
) ? I
->getType() :
878 I
->getOperand(0)->getType());
879 bool IsFPAccess
= MemAccessTy
->isFloatingPointTy();
880 bool IsVectorAccess
= MemAccessTy
->isVectorTy();
882 // A store of an extracted vector element will be combined into a VSTE type
884 if (!IsVectorAccess
&& isa
<StoreInst
>(I
)) {
885 Value
*DataOp
= I
->getOperand(0);
886 if (isa
<ExtractElementInst
>(DataOp
))
887 IsVectorAccess
= true;
890 // A load which gets inserted into a vector element will be combined into a
891 // VLE type instruction.
892 if (!IsVectorAccess
&& isa
<LoadInst
>(I
) && I
->hasOneUse()) {
893 User
*LoadUser
= *I
->user_begin();
894 if (isa
<InsertElementInst
>(LoadUser
))
895 IsVectorAccess
= true;
898 if (IsFPAccess
|| IsVectorAccess
)
899 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
902 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
905 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
906 const AddrMode
&AM
, Type
*Ty
, unsigned AS
, Instruction
*I
) const {
907 // Punt on globals for now, although they can be used in limited
908 // RELATIVE LONG cases.
912 // Require a 20-bit signed offset.
913 if (!isInt
<20>(AM
.BaseOffs
))
916 AddressingMode
SupportedAM(true, true);
918 SupportedAM
= supportedAddressingMode(I
, Subtarget
.hasVector());
920 if (!SupportedAM
.LongDisplacement
&& !isUInt
<12>(AM
.BaseOffs
))
923 if (!SupportedAM
.IndexReg
)
924 // No indexing allowed.
925 return AM
.Scale
== 0;
927 // Indexing is OK but no scale factor can be applied.
928 return AM
.Scale
== 0 || AM
.Scale
== 1;
931 bool SystemZTargetLowering::isTruncateFree(Type
*FromType
, Type
*ToType
) const {
932 if (!FromType
->isIntegerTy() || !ToType
->isIntegerTy())
934 unsigned FromBits
= FromType
->getPrimitiveSizeInBits();
935 unsigned ToBits
= ToType
->getPrimitiveSizeInBits();
936 return FromBits
> ToBits
;
939 bool SystemZTargetLowering::isTruncateFree(EVT FromVT
, EVT ToVT
) const {
940 if (!FromVT
.isInteger() || !ToVT
.isInteger())
942 unsigned FromBits
= FromVT
.getSizeInBits();
943 unsigned ToBits
= ToVT
.getSizeInBits();
944 return FromBits
> ToBits
;
947 //===----------------------------------------------------------------------===//
948 // Inline asm support
949 //===----------------------------------------------------------------------===//
951 TargetLowering::ConstraintType
952 SystemZTargetLowering::getConstraintType(StringRef Constraint
) const {
953 if (Constraint
.size() == 1) {
954 switch (Constraint
[0]) {
955 case 'a': // Address register
956 case 'd': // Data register (equivalent to 'r')
957 case 'f': // Floating-point register
958 case 'h': // High-part register
959 case 'r': // General-purpose register
960 case 'v': // Vector register
961 return C_RegisterClass
;
963 case 'Q': // Memory with base and unsigned 12-bit displacement
964 case 'R': // Likewise, plus an index
965 case 'S': // Memory with base and signed 20-bit displacement
966 case 'T': // Likewise, plus an index
967 case 'm': // Equivalent to 'T'.
970 case 'I': // Unsigned 8-bit constant
971 case 'J': // Unsigned 12-bit constant
972 case 'K': // Signed 16-bit constant
973 case 'L': // Signed 20-bit displacement (on all targets we support)
974 case 'M': // 0x7fffffff
981 return TargetLowering::getConstraintType(Constraint
);
984 TargetLowering::ConstraintWeight
SystemZTargetLowering::
985 getSingleConstraintMatchWeight(AsmOperandInfo
&info
,
986 const char *constraint
) const {
987 ConstraintWeight weight
= CW_Invalid
;
988 Value
*CallOperandVal
= info
.CallOperandVal
;
989 // If we don't have a value, we can't do a match,
990 // but allow it at the lowest weight.
993 Type
*type
= CallOperandVal
->getType();
994 // Look at the constraint type.
995 switch (*constraint
) {
997 weight
= TargetLowering::getSingleConstraintMatchWeight(info
, constraint
);
1000 case 'a': // Address register
1001 case 'd': // Data register (equivalent to 'r')
1002 case 'h': // High-part register
1003 case 'r': // General-purpose register
1004 if (CallOperandVal
->getType()->isIntegerTy())
1005 weight
= CW_Register
;
1008 case 'f': // Floating-point register
1009 if (type
->isFloatingPointTy())
1010 weight
= CW_Register
;
1013 case 'v': // Vector register
1014 if ((type
->isVectorTy() || type
->isFloatingPointTy()) &&
1015 Subtarget
.hasVector())
1016 weight
= CW_Register
;
1019 case 'I': // Unsigned 8-bit constant
1020 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1021 if (isUInt
<8>(C
->getZExtValue()))
1022 weight
= CW_Constant
;
1025 case 'J': // Unsigned 12-bit constant
1026 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1027 if (isUInt
<12>(C
->getZExtValue()))
1028 weight
= CW_Constant
;
1031 case 'K': // Signed 16-bit constant
1032 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1033 if (isInt
<16>(C
->getSExtValue()))
1034 weight
= CW_Constant
;
1037 case 'L': // Signed 20-bit displacement (on all targets we support)
1038 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1039 if (isInt
<20>(C
->getSExtValue()))
1040 weight
= CW_Constant
;
1043 case 'M': // 0x7fffffff
1044 if (auto *C
= dyn_cast
<ConstantInt
>(CallOperandVal
))
1045 if (C
->getZExtValue() == 0x7fffffff)
1046 weight
= CW_Constant
;
1052 // Parse a "{tNNN}" register constraint for which the register type "t"
1053 // has already been verified. MC is the class associated with "t" and
1054 // Map maps 0-based register numbers to LLVM register numbers.
1055 static std::pair
<unsigned, const TargetRegisterClass
*>
1056 parseRegisterNumber(StringRef Constraint
, const TargetRegisterClass
*RC
,
1057 const unsigned *Map
, unsigned Size
) {
1058 assert(*(Constraint
.end()-1) == '}' && "Missing '}'");
1059 if (isdigit(Constraint
[2])) {
1062 Constraint
.slice(2, Constraint
.size() - 1).getAsInteger(10, Index
);
1063 if (!Failed
&& Index
< Size
&& Map
[Index
])
1064 return std::make_pair(Map
[Index
], RC
);
1066 return std::make_pair(0U, nullptr);
1069 std::pair
<unsigned, const TargetRegisterClass
*>
1070 SystemZTargetLowering::getRegForInlineAsmConstraint(
1071 const TargetRegisterInfo
*TRI
, StringRef Constraint
, MVT VT
) const {
1072 if (Constraint
.size() == 1) {
1073 // GCC Constraint Letters
1074 switch (Constraint
[0]) {
1076 case 'd': // Data register (equivalent to 'r')
1077 case 'r': // General-purpose register
1079 return std::make_pair(0U, &SystemZ::GR64BitRegClass
);
1080 else if (VT
== MVT::i128
)
1081 return std::make_pair(0U, &SystemZ::GR128BitRegClass
);
1082 return std::make_pair(0U, &SystemZ::GR32BitRegClass
);
1084 case 'a': // Address register
1086 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass
);
1087 else if (VT
== MVT::i128
)
1088 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass
);
1089 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass
);
1091 case 'h': // High-part register (an LLVM extension)
1092 return std::make_pair(0U, &SystemZ::GRH32BitRegClass
);
1094 case 'f': // Floating-point register
1096 return std::make_pair(0U, &SystemZ::FP64BitRegClass
);
1097 else if (VT
== MVT::f128
)
1098 return std::make_pair(0U, &SystemZ::FP128BitRegClass
);
1099 return std::make_pair(0U, &SystemZ::FP32BitRegClass
);
1101 case 'v': // Vector register
1102 if (Subtarget
.hasVector()) {
1104 return std::make_pair(0U, &SystemZ::VR32BitRegClass
);
1106 return std::make_pair(0U, &SystemZ::VR64BitRegClass
);
1107 return std::make_pair(0U, &SystemZ::VR128BitRegClass
);
1112 if (Constraint
.size() > 0 && Constraint
[0] == '{') {
1113 // We need to override the default register parsing for GPRs and FPRs
1114 // because the interpretation depends on VT. The internal names of
1115 // the registers are also different from the external names
1116 // (F0D and F0S instead of F0, etc.).
1117 if (Constraint
[1] == 'r') {
1119 return parseRegisterNumber(Constraint
, &SystemZ::GR32BitRegClass
,
1120 SystemZMC::GR32Regs
, 16);
1121 if (VT
== MVT::i128
)
1122 return parseRegisterNumber(Constraint
, &SystemZ::GR128BitRegClass
,
1123 SystemZMC::GR128Regs
, 16);
1124 return parseRegisterNumber(Constraint
, &SystemZ::GR64BitRegClass
,
1125 SystemZMC::GR64Regs
, 16);
1127 if (Constraint
[1] == 'f') {
1129 return parseRegisterNumber(Constraint
, &SystemZ::FP32BitRegClass
,
1130 SystemZMC::FP32Regs
, 16);
1131 if (VT
== MVT::f128
)
1132 return parseRegisterNumber(Constraint
, &SystemZ::FP128BitRegClass
,
1133 SystemZMC::FP128Regs
, 16);
1134 return parseRegisterNumber(Constraint
, &SystemZ::FP64BitRegClass
,
1135 SystemZMC::FP64Regs
, 16);
1137 if (Constraint
[1] == 'v') {
1139 return parseRegisterNumber(Constraint
, &SystemZ::VR32BitRegClass
,
1140 SystemZMC::VR32Regs
, 32);
1142 return parseRegisterNumber(Constraint
, &SystemZ::VR64BitRegClass
,
1143 SystemZMC::VR64Regs
, 32);
1144 return parseRegisterNumber(Constraint
, &SystemZ::VR128BitRegClass
,
1145 SystemZMC::VR128Regs
, 32);
1148 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
1151 void SystemZTargetLowering::
1152 LowerAsmOperandForConstraint(SDValue Op
, std::string
&Constraint
,
1153 std::vector
<SDValue
> &Ops
,
1154 SelectionDAG
&DAG
) const {
1155 // Only support length 1 constraints for now.
1156 if (Constraint
.length() == 1) {
1157 switch (Constraint
[0]) {
1158 case 'I': // Unsigned 8-bit constant
1159 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1160 if (isUInt
<8>(C
->getZExtValue()))
1161 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
1162 Op
.getValueType()));
1165 case 'J': // Unsigned 12-bit constant
1166 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1167 if (isUInt
<12>(C
->getZExtValue()))
1168 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
1169 Op
.getValueType()));
1172 case 'K': // Signed 16-bit constant
1173 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1174 if (isInt
<16>(C
->getSExtValue()))
1175 Ops
.push_back(DAG
.getTargetConstant(C
->getSExtValue(), SDLoc(Op
),
1176 Op
.getValueType()));
1179 case 'L': // Signed 20-bit displacement (on all targets we support)
1180 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1181 if (isInt
<20>(C
->getSExtValue()))
1182 Ops
.push_back(DAG
.getTargetConstant(C
->getSExtValue(), SDLoc(Op
),
1183 Op
.getValueType()));
1186 case 'M': // 0x7fffffff
1187 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
))
1188 if (C
->getZExtValue() == 0x7fffffff)
1189 Ops
.push_back(DAG
.getTargetConstant(C
->getZExtValue(), SDLoc(Op
),
1190 Op
.getValueType()));
1194 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
1197 //===----------------------------------------------------------------------===//
1198 // Calling conventions
1199 //===----------------------------------------------------------------------===//
1201 #include "SystemZGenCallingConv.inc"
1203 const MCPhysReg
*SystemZTargetLowering::getScratchRegisters(
1204 CallingConv::ID
) const {
1205 static const MCPhysReg ScratchRegs
[] = { SystemZ::R0D
, SystemZ::R1D
,
1210 bool SystemZTargetLowering::allowTruncateForTailCall(Type
*FromType
,
1211 Type
*ToType
) const {
1212 return isTruncateFree(FromType
, ToType
);
1215 bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
1216 return CI
->isTailCall();
1219 // We do not yet support 128-bit single-element vector types. If the user
1220 // attempts to use such types as function argument or return type, prefer
1221 // to error out instead of emitting code violating the ABI.
1222 static void VerifyVectorType(MVT VT
, EVT ArgVT
) {
1223 if (ArgVT
.isVector() && !VT
.isVector())
1224 report_fatal_error("Unsupported vector argument or return type");
1227 static void VerifyVectorTypes(const SmallVectorImpl
<ISD::InputArg
> &Ins
) {
1228 for (unsigned i
= 0; i
< Ins
.size(); ++i
)
1229 VerifyVectorType(Ins
[i
].VT
, Ins
[i
].ArgVT
);
1232 static void VerifyVectorTypes(const SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
1233 for (unsigned i
= 0; i
< Outs
.size(); ++i
)
1234 VerifyVectorType(Outs
[i
].VT
, Outs
[i
].ArgVT
);
1237 // Value is a value that has been passed to us in the location described by VA
1238 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1239 // any loads onto Chain.
1240 static SDValue
convertLocVTToValVT(SelectionDAG
&DAG
, const SDLoc
&DL
,
1241 CCValAssign
&VA
, SDValue Chain
,
1243 // If the argument has been promoted from a smaller type, insert an
1244 // assertion to capture this.
1245 if (VA
.getLocInfo() == CCValAssign::SExt
)
1246 Value
= DAG
.getNode(ISD::AssertSext
, DL
, VA
.getLocVT(), Value
,
1247 DAG
.getValueType(VA
.getValVT()));
1248 else if (VA
.getLocInfo() == CCValAssign::ZExt
)
1249 Value
= DAG
.getNode(ISD::AssertZext
, DL
, VA
.getLocVT(), Value
,
1250 DAG
.getValueType(VA
.getValVT()));
1252 if (VA
.isExtInLoc())
1253 Value
= DAG
.getNode(ISD::TRUNCATE
, DL
, VA
.getValVT(), Value
);
1254 else if (VA
.getLocInfo() == CCValAssign::BCvt
) {
1255 // If this is a short vector argument loaded from the stack,
1256 // extend from i64 to full vector size and then bitcast.
1257 assert(VA
.getLocVT() == MVT::i64
);
1258 assert(VA
.getValVT().isVector());
1259 Value
= DAG
.getBuildVector(MVT::v2i64
, DL
, {Value
, DAG
.getUNDEF(MVT::i64
)});
1260 Value
= DAG
.getNode(ISD::BITCAST
, DL
, VA
.getValVT(), Value
);
1262 assert(VA
.getLocInfo() == CCValAssign::Full
&& "Unsupported getLocInfo");
1266 // Value is a value of type VA.getValVT() that we need to copy into
1267 // the location described by VA. Return a copy of Value converted to
1268 // VA.getValVT(). The caller is responsible for handling indirect values.
1269 static SDValue
convertValVTToLocVT(SelectionDAG
&DAG
, const SDLoc
&DL
,
1270 CCValAssign
&VA
, SDValue Value
) {
1271 switch (VA
.getLocInfo()) {
1272 case CCValAssign::SExt
:
1273 return DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VA
.getLocVT(), Value
);
1274 case CCValAssign::ZExt
:
1275 return DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VA
.getLocVT(), Value
);
1276 case CCValAssign::AExt
:
1277 return DAG
.getNode(ISD::ANY_EXTEND
, DL
, VA
.getLocVT(), Value
);
1278 case CCValAssign::BCvt
:
1279 // If this is a short vector argument to be stored to the stack,
1280 // bitcast to v2i64 and then extract first element.
1281 assert(VA
.getLocVT() == MVT::i64
);
1282 assert(VA
.getValVT().isVector());
1283 Value
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Value
);
1284 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VA
.getLocVT(), Value
,
1285 DAG
.getConstant(0, DL
, MVT::i32
));
1286 case CCValAssign::Full
:
1289 llvm_unreachable("Unhandled getLocInfo()");
1293 SDValue
SystemZTargetLowering::LowerFormalArguments(
1294 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
1295 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
1296 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
1297 MachineFunction
&MF
= DAG
.getMachineFunction();
1298 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1299 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1300 SystemZMachineFunctionInfo
*FuncInfo
=
1301 MF
.getInfo
<SystemZMachineFunctionInfo
>();
1303 static_cast<const SystemZFrameLowering
*>(Subtarget
.getFrameLowering());
1304 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1306 // Detect unsupported vector argument types.
1307 if (Subtarget
.hasVector())
1308 VerifyVectorTypes(Ins
);
1310 // Assign locations to all of the incoming arguments.
1311 SmallVector
<CCValAssign
, 16> ArgLocs
;
1312 SystemZCCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1313 CCInfo
.AnalyzeFormalArguments(Ins
, CC_SystemZ
);
1315 unsigned NumFixedGPRs
= 0;
1316 unsigned NumFixedFPRs
= 0;
1317 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1319 CCValAssign
&VA
= ArgLocs
[I
];
1320 EVT LocVT
= VA
.getLocVT();
1321 if (VA
.isRegLoc()) {
1322 // Arguments passed in registers
1323 const TargetRegisterClass
*RC
;
1324 switch (LocVT
.getSimpleVT().SimpleTy
) {
1326 // Integers smaller than i64 should be promoted to i64.
1327 llvm_unreachable("Unexpected argument type");
1330 RC
= &SystemZ::GR32BitRegClass
;
1334 RC
= &SystemZ::GR64BitRegClass
;
1338 RC
= &SystemZ::FP32BitRegClass
;
1342 RC
= &SystemZ::FP64BitRegClass
;
1350 RC
= &SystemZ::VR128BitRegClass
;
1354 Register VReg
= MRI
.createVirtualRegister(RC
);
1355 MRI
.addLiveIn(VA
.getLocReg(), VReg
);
1356 ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, LocVT
);
1358 assert(VA
.isMemLoc() && "Argument not register or memory");
1360 // Create the frame index object for this incoming parameter.
1361 int FI
= MFI
.CreateFixedObject(LocVT
.getSizeInBits() / 8,
1362 VA
.getLocMemOffset(), true);
1364 // Create the SelectionDAG nodes corresponding to a load
1365 // from this parameter. Unpromoted ints and floats are
1366 // passed as right-justified 8-byte values.
1367 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
1368 if (VA
.getLocVT() == MVT::i32
|| VA
.getLocVT() == MVT::f32
)
1369 FIN
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, FIN
,
1370 DAG
.getIntPtrConstant(4, DL
));
1371 ArgValue
= DAG
.getLoad(LocVT
, DL
, Chain
, FIN
,
1372 MachinePointerInfo::getFixedStack(MF
, FI
));
1375 // Convert the value of the argument register into the value that's
1377 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
1378 InVals
.push_back(DAG
.getLoad(VA
.getValVT(), DL
, Chain
, ArgValue
,
1379 MachinePointerInfo()));
1380 // If the original argument was split (e.g. i128), we need
1381 // to load all parts of it here (using the same address).
1382 unsigned ArgIndex
= Ins
[I
].OrigArgIndex
;
1383 assert (Ins
[I
].PartOffset
== 0);
1384 while (I
+ 1 != E
&& Ins
[I
+ 1].OrigArgIndex
== ArgIndex
) {
1385 CCValAssign
&PartVA
= ArgLocs
[I
+ 1];
1386 unsigned PartOffset
= Ins
[I
+ 1].PartOffset
;
1387 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, ArgValue
,
1388 DAG
.getIntPtrConstant(PartOffset
, DL
));
1389 InVals
.push_back(DAG
.getLoad(PartVA
.getValVT(), DL
, Chain
, Address
,
1390 MachinePointerInfo()));
1394 InVals
.push_back(convertLocVTToValVT(DAG
, DL
, VA
, Chain
, ArgValue
));
1398 // Save the number of non-varargs registers for later use by va_start, etc.
1399 FuncInfo
->setVarArgsFirstGPR(NumFixedGPRs
);
1400 FuncInfo
->setVarArgsFirstFPR(NumFixedFPRs
);
1402 // Likewise the address (in the form of a frame index) of where the
1403 // first stack vararg would be. The 1-byte size here is arbitrary.
1404 int64_t StackSize
= CCInfo
.getNextStackOffset();
1405 FuncInfo
->setVarArgsFrameIndex(MFI
.CreateFixedObject(1, StackSize
, true));
1407 // ...and a similar frame index for the caller-allocated save area
1408 // that will be used to store the incoming registers.
1409 int64_t RegSaveOffset
= TFL
->getOffsetOfLocalArea();
1410 unsigned RegSaveIndex
= MFI
.CreateFixedObject(1, RegSaveOffset
, true);
1411 FuncInfo
->setRegSaveFrameIndex(RegSaveIndex
);
1413 // Store the FPR varargs in the reserved frame slots. (We store the
1414 // GPRs as part of the prologue.)
1415 if (NumFixedFPRs
< SystemZ::NumArgFPRs
) {
1416 SDValue MemOps
[SystemZ::NumArgFPRs
];
1417 for (unsigned I
= NumFixedFPRs
; I
< SystemZ::NumArgFPRs
; ++I
) {
1418 unsigned Offset
= TFL
->getRegSpillOffset(SystemZ::ArgFPRs
[I
]);
1419 int FI
= MFI
.CreateFixedObject(8, RegSaveOffset
+ Offset
, true);
1420 SDValue FIN
= DAG
.getFrameIndex(FI
, getPointerTy(DAG
.getDataLayout()));
1421 unsigned VReg
= MF
.addLiveIn(SystemZ::ArgFPRs
[I
],
1422 &SystemZ::FP64BitRegClass
);
1423 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, DL
, VReg
, MVT::f64
);
1424 MemOps
[I
] = DAG
.getStore(ArgValue
.getValue(1), DL
, ArgValue
, FIN
,
1425 MachinePointerInfo::getFixedStack(MF
, FI
));
1427 // Join the stores, which are independent of one another.
1428 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
,
1429 makeArrayRef(&MemOps
[NumFixedFPRs
],
1430 SystemZ::NumArgFPRs
-NumFixedFPRs
));
1437 static bool canUseSiblingCall(const CCState
&ArgCCInfo
,
1438 SmallVectorImpl
<CCValAssign
> &ArgLocs
,
1439 SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
1440 // Punt if there are any indirect or stack arguments, or if the call
1441 // needs the callee-saved argument register R6, or if the call uses
1442 // the callee-saved register arguments SwiftSelf and SwiftError.
1443 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1444 CCValAssign
&VA
= ArgLocs
[I
];
1445 if (VA
.getLocInfo() == CCValAssign::Indirect
)
1449 Register Reg
= VA
.getLocReg();
1450 if (Reg
== SystemZ::R6H
|| Reg
== SystemZ::R6L
|| Reg
== SystemZ::R6D
)
1452 if (Outs
[I
].Flags
.isSwiftSelf() || Outs
[I
].Flags
.isSwiftError())
1459 SystemZTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
1460 SmallVectorImpl
<SDValue
> &InVals
) const {
1461 SelectionDAG
&DAG
= CLI
.DAG
;
1463 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
1464 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
1465 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
1466 SDValue Chain
= CLI
.Chain
;
1467 SDValue Callee
= CLI
.Callee
;
1468 bool &IsTailCall
= CLI
.IsTailCall
;
1469 CallingConv::ID CallConv
= CLI
.CallConv
;
1470 bool IsVarArg
= CLI
.IsVarArg
;
1471 MachineFunction
&MF
= DAG
.getMachineFunction();
1472 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
1474 // Detect unsupported vector argument and return types.
1475 if (Subtarget
.hasVector()) {
1476 VerifyVectorTypes(Outs
);
1477 VerifyVectorTypes(Ins
);
1480 // Analyze the operands of the call, assigning locations to each operand.
1481 SmallVector
<CCValAssign
, 16> ArgLocs
;
1482 SystemZCCState
ArgCCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1483 ArgCCInfo
.AnalyzeCallOperands(Outs
, CC_SystemZ
);
1485 // We don't support GuaranteedTailCallOpt, only automatically-detected
1487 if (IsTailCall
&& !canUseSiblingCall(ArgCCInfo
, ArgLocs
, Outs
))
1490 // Get a count of how many bytes are to be pushed on the stack.
1491 unsigned NumBytes
= ArgCCInfo
.getNextStackOffset();
1493 // Mark the start of the call.
1495 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, DL
);
1497 // Copy argument values to their designated locations.
1498 SmallVector
<std::pair
<unsigned, SDValue
>, 9> RegsToPass
;
1499 SmallVector
<SDValue
, 8> MemOpChains
;
1501 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
1502 CCValAssign
&VA
= ArgLocs
[I
];
1503 SDValue ArgValue
= OutVals
[I
];
1505 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
1506 // Store the argument in a stack slot and pass its address.
1507 SDValue SpillSlot
= DAG
.CreateStackTemporary(Outs
[I
].ArgVT
);
1508 int FI
= cast
<FrameIndexSDNode
>(SpillSlot
)->getIndex();
1509 MemOpChains
.push_back(
1510 DAG
.getStore(Chain
, DL
, ArgValue
, SpillSlot
,
1511 MachinePointerInfo::getFixedStack(MF
, FI
)));
1512 // If the original argument was split (e.g. i128), we need
1513 // to store all parts of it here (and pass just one address).
1514 unsigned ArgIndex
= Outs
[I
].OrigArgIndex
;
1515 assert (Outs
[I
].PartOffset
== 0);
1516 while (I
+ 1 != E
&& Outs
[I
+ 1].OrigArgIndex
== ArgIndex
) {
1517 SDValue PartValue
= OutVals
[I
+ 1];
1518 unsigned PartOffset
= Outs
[I
+ 1].PartOffset
;
1519 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, SpillSlot
,
1520 DAG
.getIntPtrConstant(PartOffset
, DL
));
1521 MemOpChains
.push_back(
1522 DAG
.getStore(Chain
, DL
, PartValue
, Address
,
1523 MachinePointerInfo::getFixedStack(MF
, FI
)));
1526 ArgValue
= SpillSlot
;
1528 ArgValue
= convertValVTToLocVT(DAG
, DL
, VA
, ArgValue
);
1531 // Queue up the argument copies and emit them at the end.
1532 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), ArgValue
));
1534 assert(VA
.isMemLoc() && "Argument not register or memory");
1536 // Work out the address of the stack slot. Unpromoted ints and
1537 // floats are passed as right-justified 8-byte values.
1538 if (!StackPtr
.getNode())
1539 StackPtr
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R15D
, PtrVT
);
1540 unsigned Offset
= SystemZMC::CallFrameSize
+ VA
.getLocMemOffset();
1541 if (VA
.getLocVT() == MVT::i32
|| VA
.getLocVT() == MVT::f32
)
1543 SDValue Address
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
,
1544 DAG
.getIntPtrConstant(Offset
, DL
));
1547 MemOpChains
.push_back(
1548 DAG
.getStore(Chain
, DL
, ArgValue
, Address
, MachinePointerInfo()));
1552 // Join the stores, which are independent of one another.
1553 if (!MemOpChains
.empty())
1554 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOpChains
);
1556 // Accept direct calls by converting symbolic call addresses to the
1557 // associated Target* opcodes. Force %r1 to be used for indirect
1560 if (auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1561 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), DL
, PtrVT
);
1562 Callee
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Callee
);
1563 } else if (auto *E
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
1564 Callee
= DAG
.getTargetExternalSymbol(E
->getSymbol(), PtrVT
);
1565 Callee
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Callee
);
1566 } else if (IsTailCall
) {
1567 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R1D
, Callee
, Glue
);
1568 Glue
= Chain
.getValue(1);
1569 Callee
= DAG
.getRegister(SystemZ::R1D
, Callee
.getValueType());
1572 // Build a sequence of copy-to-reg nodes, chained and glued together.
1573 for (unsigned I
= 0, E
= RegsToPass
.size(); I
!= E
; ++I
) {
1574 Chain
= DAG
.getCopyToReg(Chain
, DL
, RegsToPass
[I
].first
,
1575 RegsToPass
[I
].second
, Glue
);
1576 Glue
= Chain
.getValue(1);
1579 // The first call operand is the chain and the second is the target address.
1580 SmallVector
<SDValue
, 8> Ops
;
1581 Ops
.push_back(Chain
);
1582 Ops
.push_back(Callee
);
1584 // Add argument registers to the end of the list so that they are
1585 // known live into the call.
1586 for (unsigned I
= 0, E
= RegsToPass
.size(); I
!= E
; ++I
)
1587 Ops
.push_back(DAG
.getRegister(RegsToPass
[I
].first
,
1588 RegsToPass
[I
].second
.getValueType()));
1590 // Add a register mask operand representing the call-preserved registers.
1591 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
1592 const uint32_t *Mask
= TRI
->getCallPreservedMask(MF
, CallConv
);
1593 assert(Mask
&& "Missing call preserved mask for calling convention");
1594 Ops
.push_back(DAG
.getRegisterMask(Mask
));
1596 // Glue the call to the argument copies, if any.
1598 Ops
.push_back(Glue
);
1601 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1603 return DAG
.getNode(SystemZISD::SIBCALL
, DL
, NodeTys
, Ops
);
1604 Chain
= DAG
.getNode(SystemZISD::CALL
, DL
, NodeTys
, Ops
);
1605 Glue
= Chain
.getValue(1);
1607 // Mark the end of the call, which is glued to the call itself.
1608 Chain
= DAG
.getCALLSEQ_END(Chain
,
1609 DAG
.getConstant(NumBytes
, DL
, PtrVT
, true),
1610 DAG
.getConstant(0, DL
, PtrVT
, true),
1612 Glue
= Chain
.getValue(1);
1614 // Assign locations to each value returned by this call.
1615 SmallVector
<CCValAssign
, 16> RetLocs
;
1616 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RetLocs
, *DAG
.getContext());
1617 RetCCInfo
.AnalyzeCallResult(Ins
, RetCC_SystemZ
);
1619 // Copy all of the result registers out of their specified physreg.
1620 for (unsigned I
= 0, E
= RetLocs
.size(); I
!= E
; ++I
) {
1621 CCValAssign
&VA
= RetLocs
[I
];
1623 // Copy the value out, gluing the copy to the end of the call sequence.
1624 SDValue RetValue
= DAG
.getCopyFromReg(Chain
, DL
, VA
.getLocReg(),
1625 VA
.getLocVT(), Glue
);
1626 Chain
= RetValue
.getValue(1);
1627 Glue
= RetValue
.getValue(2);
1629 // Convert the value of the return register into the value that's
1631 InVals
.push_back(convertLocVTToValVT(DAG
, DL
, VA
, Chain
, RetValue
));
1637 bool SystemZTargetLowering::
1638 CanLowerReturn(CallingConv::ID CallConv
,
1639 MachineFunction
&MF
, bool isVarArg
,
1640 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1641 LLVMContext
&Context
) const {
1642 // Detect unsupported vector return types.
1643 if (Subtarget
.hasVector())
1644 VerifyVectorTypes(Outs
);
1646 // Special case that we cannot easily detect in RetCC_SystemZ since
1647 // i128 is not a legal type.
1648 for (auto &Out
: Outs
)
1649 if (Out
.ArgVT
== MVT::i128
)
1652 SmallVector
<CCValAssign
, 16> RetLocs
;
1653 CCState
RetCCInfo(CallConv
, isVarArg
, MF
, RetLocs
, Context
);
1654 return RetCCInfo
.CheckReturn(Outs
, RetCC_SystemZ
);
1658 SystemZTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
1660 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1661 const SmallVectorImpl
<SDValue
> &OutVals
,
1662 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
1663 MachineFunction
&MF
= DAG
.getMachineFunction();
1665 // Detect unsupported vector return types.
1666 if (Subtarget
.hasVector())
1667 VerifyVectorTypes(Outs
);
1669 // Assign locations to each returned value.
1670 SmallVector
<CCValAssign
, 16> RetLocs
;
1671 CCState
RetCCInfo(CallConv
, IsVarArg
, MF
, RetLocs
, *DAG
.getContext());
1672 RetCCInfo
.AnalyzeReturn(Outs
, RetCC_SystemZ
);
1674 // Quick exit for void returns
1675 if (RetLocs
.empty())
1676 return DAG
.getNode(SystemZISD::RET_FLAG
, DL
, MVT::Other
, Chain
);
1678 // Copy the result values into the output registers.
1680 SmallVector
<SDValue
, 4> RetOps
;
1681 RetOps
.push_back(Chain
);
1682 for (unsigned I
= 0, E
= RetLocs
.size(); I
!= E
; ++I
) {
1683 CCValAssign
&VA
= RetLocs
[I
];
1684 SDValue RetValue
= OutVals
[I
];
1686 // Make the return register live on exit.
1687 assert(VA
.isRegLoc() && "Can only return in registers!");
1689 // Promote the value as required.
1690 RetValue
= convertValVTToLocVT(DAG
, DL
, VA
, RetValue
);
1692 // Chain and glue the copies together.
1693 Register Reg
= VA
.getLocReg();
1694 Chain
= DAG
.getCopyToReg(Chain
, DL
, Reg
, RetValue
, Glue
);
1695 Glue
= Chain
.getValue(1);
1696 RetOps
.push_back(DAG
.getRegister(Reg
, VA
.getLocVT()));
1699 // Update chain and glue.
1702 RetOps
.push_back(Glue
);
1704 return DAG
.getNode(SystemZISD::RET_FLAG
, DL
, MVT::Other
, RetOps
);
1707 // Return true if Op is an intrinsic node with chain that returns the CC value
1708 // as its only (other) argument. Provide the associated SystemZISD opcode and
1709 // the mask of valid CC values if so.
1710 static bool isIntrinsicWithCCAndChain(SDValue Op
, unsigned &Opcode
,
1711 unsigned &CCValid
) {
1712 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
1714 case Intrinsic::s390_tbegin
:
1715 Opcode
= SystemZISD::TBEGIN
;
1716 CCValid
= SystemZ::CCMASK_TBEGIN
;
1719 case Intrinsic::s390_tbegin_nofloat
:
1720 Opcode
= SystemZISD::TBEGIN_NOFLOAT
;
1721 CCValid
= SystemZ::CCMASK_TBEGIN
;
1724 case Intrinsic::s390_tend
:
1725 Opcode
= SystemZISD::TEND
;
1726 CCValid
= SystemZ::CCMASK_TEND
;
1734 // Return true if Op is an intrinsic node without chain that returns the
1735 // CC value as its final argument. Provide the associated SystemZISD
1736 // opcode and the mask of valid CC values if so.
1737 static bool isIntrinsicWithCC(SDValue Op
, unsigned &Opcode
, unsigned &CCValid
) {
1738 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1740 case Intrinsic::s390_vpkshs
:
1741 case Intrinsic::s390_vpksfs
:
1742 case Intrinsic::s390_vpksgs
:
1743 Opcode
= SystemZISD::PACKS_CC
;
1744 CCValid
= SystemZ::CCMASK_VCMP
;
1747 case Intrinsic::s390_vpklshs
:
1748 case Intrinsic::s390_vpklsfs
:
1749 case Intrinsic::s390_vpklsgs
:
1750 Opcode
= SystemZISD::PACKLS_CC
;
1751 CCValid
= SystemZ::CCMASK_VCMP
;
1754 case Intrinsic::s390_vceqbs
:
1755 case Intrinsic::s390_vceqhs
:
1756 case Intrinsic::s390_vceqfs
:
1757 case Intrinsic::s390_vceqgs
:
1758 Opcode
= SystemZISD::VICMPES
;
1759 CCValid
= SystemZ::CCMASK_VCMP
;
1762 case Intrinsic::s390_vchbs
:
1763 case Intrinsic::s390_vchhs
:
1764 case Intrinsic::s390_vchfs
:
1765 case Intrinsic::s390_vchgs
:
1766 Opcode
= SystemZISD::VICMPHS
;
1767 CCValid
= SystemZ::CCMASK_VCMP
;
1770 case Intrinsic::s390_vchlbs
:
1771 case Intrinsic::s390_vchlhs
:
1772 case Intrinsic::s390_vchlfs
:
1773 case Intrinsic::s390_vchlgs
:
1774 Opcode
= SystemZISD::VICMPHLS
;
1775 CCValid
= SystemZ::CCMASK_VCMP
;
1778 case Intrinsic::s390_vtm
:
1779 Opcode
= SystemZISD::VTM
;
1780 CCValid
= SystemZ::CCMASK_VCMP
;
1783 case Intrinsic::s390_vfaebs
:
1784 case Intrinsic::s390_vfaehs
:
1785 case Intrinsic::s390_vfaefs
:
1786 Opcode
= SystemZISD::VFAE_CC
;
1787 CCValid
= SystemZ::CCMASK_ANY
;
1790 case Intrinsic::s390_vfaezbs
:
1791 case Intrinsic::s390_vfaezhs
:
1792 case Intrinsic::s390_vfaezfs
:
1793 Opcode
= SystemZISD::VFAEZ_CC
;
1794 CCValid
= SystemZ::CCMASK_ANY
;
1797 case Intrinsic::s390_vfeebs
:
1798 case Intrinsic::s390_vfeehs
:
1799 case Intrinsic::s390_vfeefs
:
1800 Opcode
= SystemZISD::VFEE_CC
;
1801 CCValid
= SystemZ::CCMASK_ANY
;
1804 case Intrinsic::s390_vfeezbs
:
1805 case Intrinsic::s390_vfeezhs
:
1806 case Intrinsic::s390_vfeezfs
:
1807 Opcode
= SystemZISD::VFEEZ_CC
;
1808 CCValid
= SystemZ::CCMASK_ANY
;
1811 case Intrinsic::s390_vfenebs
:
1812 case Intrinsic::s390_vfenehs
:
1813 case Intrinsic::s390_vfenefs
:
1814 Opcode
= SystemZISD::VFENE_CC
;
1815 CCValid
= SystemZ::CCMASK_ANY
;
1818 case Intrinsic::s390_vfenezbs
:
1819 case Intrinsic::s390_vfenezhs
:
1820 case Intrinsic::s390_vfenezfs
:
1821 Opcode
= SystemZISD::VFENEZ_CC
;
1822 CCValid
= SystemZ::CCMASK_ANY
;
1825 case Intrinsic::s390_vistrbs
:
1826 case Intrinsic::s390_vistrhs
:
1827 case Intrinsic::s390_vistrfs
:
1828 Opcode
= SystemZISD::VISTR_CC
;
1829 CCValid
= SystemZ::CCMASK_0
| SystemZ::CCMASK_3
;
1832 case Intrinsic::s390_vstrcbs
:
1833 case Intrinsic::s390_vstrchs
:
1834 case Intrinsic::s390_vstrcfs
:
1835 Opcode
= SystemZISD::VSTRC_CC
;
1836 CCValid
= SystemZ::CCMASK_ANY
;
1839 case Intrinsic::s390_vstrczbs
:
1840 case Intrinsic::s390_vstrczhs
:
1841 case Intrinsic::s390_vstrczfs
:
1842 Opcode
= SystemZISD::VSTRCZ_CC
;
1843 CCValid
= SystemZ::CCMASK_ANY
;
1846 case Intrinsic::s390_vstrsb
:
1847 case Intrinsic::s390_vstrsh
:
1848 case Intrinsic::s390_vstrsf
:
1849 Opcode
= SystemZISD::VSTRS_CC
;
1850 CCValid
= SystemZ::CCMASK_ANY
;
1853 case Intrinsic::s390_vstrszb
:
1854 case Intrinsic::s390_vstrszh
:
1855 case Intrinsic::s390_vstrszf
:
1856 Opcode
= SystemZISD::VSTRSZ_CC
;
1857 CCValid
= SystemZ::CCMASK_ANY
;
1860 case Intrinsic::s390_vfcedbs
:
1861 case Intrinsic::s390_vfcesbs
:
1862 Opcode
= SystemZISD::VFCMPES
;
1863 CCValid
= SystemZ::CCMASK_VCMP
;
1866 case Intrinsic::s390_vfchdbs
:
1867 case Intrinsic::s390_vfchsbs
:
1868 Opcode
= SystemZISD::VFCMPHS
;
1869 CCValid
= SystemZ::CCMASK_VCMP
;
1872 case Intrinsic::s390_vfchedbs
:
1873 case Intrinsic::s390_vfchesbs
:
1874 Opcode
= SystemZISD::VFCMPHES
;
1875 CCValid
= SystemZ::CCMASK_VCMP
;
1878 case Intrinsic::s390_vftcidb
:
1879 case Intrinsic::s390_vftcisb
:
1880 Opcode
= SystemZISD::VFTCI
;
1881 CCValid
= SystemZ::CCMASK_VCMP
;
1884 case Intrinsic::s390_tdc
:
1885 Opcode
= SystemZISD::TDC
;
1886 CCValid
= SystemZ::CCMASK_TDC
;
1894 // Emit an intrinsic with chain and an explicit CC register result.
1895 static SDNode
*emitIntrinsicWithCCAndChain(SelectionDAG
&DAG
, SDValue Op
,
1897 // Copy all operands except the intrinsic ID.
1898 unsigned NumOps
= Op
.getNumOperands();
1899 SmallVector
<SDValue
, 6> Ops
;
1900 Ops
.reserve(NumOps
- 1);
1901 Ops
.push_back(Op
.getOperand(0));
1902 for (unsigned I
= 2; I
< NumOps
; ++I
)
1903 Ops
.push_back(Op
.getOperand(I
));
1905 assert(Op
->getNumValues() == 2 && "Expected only CC result and chain");
1906 SDVTList RawVTs
= DAG
.getVTList(MVT::i32
, MVT::Other
);
1907 SDValue Intr
= DAG
.getNode(Opcode
, SDLoc(Op
), RawVTs
, Ops
);
1908 SDValue OldChain
= SDValue(Op
.getNode(), 1);
1909 SDValue NewChain
= SDValue(Intr
.getNode(), 1);
1910 DAG
.ReplaceAllUsesOfValueWith(OldChain
, NewChain
);
1911 return Intr
.getNode();
1914 // Emit an intrinsic with an explicit CC register result.
1915 static SDNode
*emitIntrinsicWithCC(SelectionDAG
&DAG
, SDValue Op
,
1917 // Copy all operands except the intrinsic ID.
1918 unsigned NumOps
= Op
.getNumOperands();
1919 SmallVector
<SDValue
, 6> Ops
;
1920 Ops
.reserve(NumOps
- 1);
1921 for (unsigned I
= 1; I
< NumOps
; ++I
)
1922 Ops
.push_back(Op
.getOperand(I
));
1924 SDValue Intr
= DAG
.getNode(Opcode
, SDLoc(Op
), Op
->getVTList(), Ops
);
1925 return Intr
.getNode();
1928 // CC is a comparison that will be implemented using an integer or
1929 // floating-point comparison. Return the condition code mask for
1930 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
1931 // unsigned comparisons and clear for signed ones. In the floating-point
1932 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1933 static unsigned CCMaskForCondCode(ISD::CondCode CC
) {
1935 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1936 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1937 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1941 llvm_unreachable("Invalid integer condition!");
1950 case ISD::SETO
: return SystemZ::CCMASK_CMP_O
;
1951 case ISD::SETUO
: return SystemZ::CCMASK_CMP_UO
;
1956 // If C can be converted to a comparison against zero, adjust the operands
1958 static void adjustZeroCmp(SelectionDAG
&DAG
, const SDLoc
&DL
, Comparison
&C
) {
1959 if (C
.ICmpType
== SystemZICMP::UnsignedOnly
)
1962 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
.getNode());
1966 int64_t Value
= ConstOp1
->getSExtValue();
1967 if ((Value
== -1 && C
.CCMask
== SystemZ::CCMASK_CMP_GT
) ||
1968 (Value
== -1 && C
.CCMask
== SystemZ::CCMASK_CMP_LE
) ||
1969 (Value
== 1 && C
.CCMask
== SystemZ::CCMASK_CMP_LT
) ||
1970 (Value
== 1 && C
.CCMask
== SystemZ::CCMASK_CMP_GE
)) {
1971 C
.CCMask
^= SystemZ::CCMASK_CMP_EQ
;
1972 C
.Op1
= DAG
.getConstant(0, DL
, C
.Op1
.getValueType());
1976 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1977 // adjust the operands as necessary.
1978 static void adjustSubwordCmp(SelectionDAG
&DAG
, const SDLoc
&DL
,
1980 // For us to make any changes, it must a comparison between a single-use
1981 // load and a constant.
1982 if (!C
.Op0
.hasOneUse() ||
1983 C
.Op0
.getOpcode() != ISD::LOAD
||
1984 C
.Op1
.getOpcode() != ISD::Constant
)
1987 // We must have an 8- or 16-bit load.
1988 auto *Load
= cast
<LoadSDNode
>(C
.Op0
);
1989 unsigned NumBits
= Load
->getMemoryVT().getStoreSizeInBits();
1990 if (NumBits
!= 8 && NumBits
!= 16)
1993 // The load must be an extending one and the constant must be within the
1994 // range of the unextended value.
1995 auto *ConstOp1
= cast
<ConstantSDNode
>(C
.Op1
);
1996 uint64_t Value
= ConstOp1
->getZExtValue();
1997 uint64_t Mask
= (1 << NumBits
) - 1;
1998 if (Load
->getExtensionType() == ISD::SEXTLOAD
) {
1999 // Make sure that ConstOp1 is in range of C.Op0.
2000 int64_t SignedValue
= ConstOp1
->getSExtValue();
2001 if (uint64_t(SignedValue
) + (uint64_t(1) << (NumBits
- 1)) > Mask
)
2003 if (C
.ICmpType
!= SystemZICMP::SignedOnly
) {
2004 // Unsigned comparison between two sign-extended values is equivalent
2005 // to unsigned comparison between two zero-extended values.
2007 } else if (NumBits
== 8) {
2008 // Try to treat the comparison as unsigned, so that we can use CLI.
2009 // Adjust CCMask and Value as necessary.
2010 if (Value
== 0 && C
.CCMask
== SystemZ::CCMASK_CMP_LT
)
2011 // Test whether the high bit of the byte is set.
2012 Value
= 127, C
.CCMask
= SystemZ::CCMASK_CMP_GT
;
2013 else if (Value
== 0 && C
.CCMask
== SystemZ::CCMASK_CMP_GE
)
2014 // Test whether the high bit of the byte is clear.
2015 Value
= 128, C
.CCMask
= SystemZ::CCMASK_CMP_LT
;
2017 // No instruction exists for this combination.
2019 C
.ICmpType
= SystemZICMP::UnsignedOnly
;
2021 } else if (Load
->getExtensionType() == ISD::ZEXTLOAD
) {
2024 // If the constant is in range, we can use any comparison.
2025 C
.ICmpType
= SystemZICMP::Any
;
2029 // Make sure that the first operand is an i32 of the right extension type.
2030 ISD::LoadExtType ExtType
= (C
.ICmpType
== SystemZICMP::SignedOnly
?
2033 if (C
.Op0
.getValueType() != MVT::i32
||
2034 Load
->getExtensionType() != ExtType
) {
2035 C
.Op0
= DAG
.getExtLoad(ExtType
, SDLoc(Load
), MVT::i32
, Load
->getChain(),
2036 Load
->getBasePtr(), Load
->getPointerInfo(),
2037 Load
->getMemoryVT(), Load
->getAlignment(),
2038 Load
->getMemOperand()->getFlags());
2039 // Update the chain uses.
2040 DAG
.ReplaceAllUsesOfValueWith(SDValue(Load
, 1), C
.Op0
.getValue(1));
2043 // Make sure that the second operand is an i32 with the right value.
2044 if (C
.Op1
.getValueType() != MVT::i32
||
2045 Value
!= ConstOp1
->getZExtValue())
2046 C
.Op1
= DAG
.getConstant(Value
, DL
, MVT::i32
);
2049 // Return true if Op is either an unextended load, or a load suitable
2050 // for integer register-memory comparisons of type ICmpType.
2051 static bool isNaturalMemoryOperand(SDValue Op
, unsigned ICmpType
) {
2052 auto *Load
= dyn_cast
<LoadSDNode
>(Op
.getNode());
2054 // There are no instructions to compare a register with a memory byte.
2055 if (Load
->getMemoryVT() == MVT::i8
)
2057 // Otherwise decide on extension type.
2058 switch (Load
->getExtensionType()) {
2059 case ISD::NON_EXTLOAD
:
2062 return ICmpType
!= SystemZICMP::UnsignedOnly
;
2064 return ICmpType
!= SystemZICMP::SignedOnly
;
2072 // Return true if it is better to swap the operands of C.
2073 static bool shouldSwapCmpOperands(const Comparison
&C
) {
2074 // Leave f128 comparisons alone, since they have no memory forms.
2075 if (C
.Op0
.getValueType() == MVT::f128
)
2078 // Always keep a floating-point constant second, since comparisons with
2079 // zero can use LOAD TEST and comparisons with other constants make a
2080 // natural memory operand.
2081 if (isa
<ConstantFPSDNode
>(C
.Op1
))
2084 // Never swap comparisons with zero since there are many ways to optimize
2086 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
);
2087 if (ConstOp1
&& ConstOp1
->getZExtValue() == 0)
2090 // Also keep natural memory operands second if the loaded value is
2091 // only used here. Several comparisons have memory forms.
2092 if (isNaturalMemoryOperand(C
.Op1
, C
.ICmpType
) && C
.Op1
.hasOneUse())
2095 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2096 // In that case we generally prefer the memory to be second.
2097 if (isNaturalMemoryOperand(C
.Op0
, C
.ICmpType
) && C
.Op0
.hasOneUse()) {
2098 // The only exceptions are when the second operand is a constant and
2099 // we can use things like CHHSI.
2102 // The unsigned memory-immediate instructions can handle 16-bit
2103 // unsigned integers.
2104 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&&
2105 isUInt
<16>(ConstOp1
->getZExtValue()))
2107 // The signed memory-immediate instructions can handle 16-bit
2109 if (C
.ICmpType
!= SystemZICMP::UnsignedOnly
&&
2110 isInt
<16>(ConstOp1
->getSExtValue()))
2115 // Try to promote the use of CGFR and CLGFR.
2116 unsigned Opcode0
= C
.Op0
.getOpcode();
2117 if (C
.ICmpType
!= SystemZICMP::UnsignedOnly
&& Opcode0
== ISD::SIGN_EXTEND
)
2119 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&& Opcode0
== ISD::ZERO_EXTEND
)
2121 if (C
.ICmpType
!= SystemZICMP::SignedOnly
&&
2122 Opcode0
== ISD::AND
&&
2123 C
.Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
2124 cast
<ConstantSDNode
>(C
.Op0
.getOperand(1))->getZExtValue() == 0xffffffff)
2130 // Return a version of comparison CC mask CCMask in which the LT and GT
2131 // actions are swapped.
2132 static unsigned reverseCCMask(unsigned CCMask
) {
2133 return ((CCMask
& SystemZ::CCMASK_CMP_EQ
) |
2134 (CCMask
& SystemZ::CCMASK_CMP_GT
? SystemZ::CCMASK_CMP_LT
: 0) |
2135 (CCMask
& SystemZ::CCMASK_CMP_LT
? SystemZ::CCMASK_CMP_GT
: 0) |
2136 (CCMask
& SystemZ::CCMASK_CMP_UO
));
2139 // Check whether C tests for equality between X and Y and whether X - Y
2140 // or Y - X is also computed. In that case it's better to compare the
2141 // result of the subtraction against zero.
2142 static void adjustForSubtraction(SelectionDAG
&DAG
, const SDLoc
&DL
,
2144 if (C
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
2145 C
.CCMask
== SystemZ::CCMASK_CMP_NE
) {
2146 for (auto I
= C
.Op0
->use_begin(), E
= C
.Op0
->use_end(); I
!= E
; ++I
) {
2148 if (N
->getOpcode() == ISD::SUB
&&
2149 ((N
->getOperand(0) == C
.Op0
&& N
->getOperand(1) == C
.Op1
) ||
2150 (N
->getOperand(0) == C
.Op1
&& N
->getOperand(1) == C
.Op0
))) {
2151 C
.Op0
= SDValue(N
, 0);
2152 C
.Op1
= DAG
.getConstant(0, DL
, N
->getValueType(0));
2159 // Check whether C compares a floating-point value with zero and if that
2160 // floating-point value is also negated. In this case we can use the
2161 // negation to set CC, so avoiding separate LOAD AND TEST and
2162 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2163 static void adjustForFNeg(Comparison
&C
) {
2164 auto *C1
= dyn_cast
<ConstantFPSDNode
>(C
.Op1
);
2165 if (C1
&& C1
->isZero()) {
2166 for (auto I
= C
.Op0
->use_begin(), E
= C
.Op0
->use_end(); I
!= E
; ++I
) {
2168 if (N
->getOpcode() == ISD::FNEG
) {
2169 C
.Op0
= SDValue(N
, 0);
2170 C
.CCMask
= reverseCCMask(C
.CCMask
);
2177 // Check whether C compares (shl X, 32) with 0 and whether X is
2178 // also sign-extended. In that case it is better to test the result
2179 // of the sign extension using LTGFR.
2181 // This case is important because InstCombine transforms a comparison
2182 // with (sext (trunc X)) into a comparison with (shl X, 32).
2183 static void adjustForLTGFR(Comparison
&C
) {
2184 // Check for a comparison between (shl X, 32) and 0.
2185 if (C
.Op0
.getOpcode() == ISD::SHL
&&
2186 C
.Op0
.getValueType() == MVT::i64
&&
2187 C
.Op1
.getOpcode() == ISD::Constant
&&
2188 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2189 auto *C1
= dyn_cast
<ConstantSDNode
>(C
.Op0
.getOperand(1));
2190 if (C1
&& C1
->getZExtValue() == 32) {
2191 SDValue ShlOp0
= C
.Op0
.getOperand(0);
2192 // See whether X has any SIGN_EXTEND_INREG uses.
2193 for (auto I
= ShlOp0
->use_begin(), E
= ShlOp0
->use_end(); I
!= E
; ++I
) {
2195 if (N
->getOpcode() == ISD::SIGN_EXTEND_INREG
&&
2196 cast
<VTSDNode
>(N
->getOperand(1))->getVT() == MVT::i32
) {
2197 C
.Op0
= SDValue(N
, 0);
2205 // If C compares the truncation of an extending load, try to compare
2206 // the untruncated value instead. This exposes more opportunities to
2208 static void adjustICmpTruncate(SelectionDAG
&DAG
, const SDLoc
&DL
,
2210 if (C
.Op0
.getOpcode() == ISD::TRUNCATE
&&
2211 C
.Op0
.getOperand(0).getOpcode() == ISD::LOAD
&&
2212 C
.Op1
.getOpcode() == ISD::Constant
&&
2213 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2214 auto *L
= cast
<LoadSDNode
>(C
.Op0
.getOperand(0));
2215 if (L
->getMemoryVT().getStoreSizeInBits() <= C
.Op0
.getValueSizeInBits()) {
2216 unsigned Type
= L
->getExtensionType();
2217 if ((Type
== ISD::ZEXTLOAD
&& C
.ICmpType
!= SystemZICMP::SignedOnly
) ||
2218 (Type
== ISD::SEXTLOAD
&& C
.ICmpType
!= SystemZICMP::UnsignedOnly
)) {
2219 C
.Op0
= C
.Op0
.getOperand(0);
2220 C
.Op1
= DAG
.getConstant(0, DL
, C
.Op0
.getValueType());
2226 // Return true if shift operation N has an in-range constant shift value.
2227 // Store it in ShiftVal if so.
2228 static bool isSimpleShift(SDValue N
, unsigned &ShiftVal
) {
2229 auto *Shift
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
2233 uint64_t Amount
= Shift
->getZExtValue();
2234 if (Amount
>= N
.getValueSizeInBits())
2241 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2242 // instruction and whether the CC value is descriptive enough to handle
2243 // a comparison of type Opcode between the AND result and CmpVal.
2244 // CCMask says which comparison result is being tested and BitSize is
2245 // the number of bits in the operands. If TEST UNDER MASK can be used,
2246 // return the corresponding CC mask, otherwise return 0.
2247 static unsigned getTestUnderMaskCond(unsigned BitSize
, unsigned CCMask
,
2248 uint64_t Mask
, uint64_t CmpVal
,
2249 unsigned ICmpType
) {
2250 assert(Mask
!= 0 && "ANDs with zero should have been removed by now");
2252 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2253 if (!SystemZ::isImmLL(Mask
) && !SystemZ::isImmLH(Mask
) &&
2254 !SystemZ::isImmHL(Mask
) && !SystemZ::isImmHH(Mask
))
2257 // Work out the masks for the lowest and highest bits.
2258 unsigned HighShift
= 63 - countLeadingZeros(Mask
);
2259 uint64_t High
= uint64_t(1) << HighShift
;
2260 uint64_t Low
= uint64_t(1) << countTrailingZeros(Mask
);
2262 // Signed ordered comparisons are effectively unsigned if the sign
2264 bool EffectivelyUnsigned
= (ICmpType
!= SystemZICMP::SignedOnly
);
2266 // Check for equality comparisons with 0, or the equivalent.
2268 if (CCMask
== SystemZ::CCMASK_CMP_EQ
)
2269 return SystemZ::CCMASK_TM_ALL_0
;
2270 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
2271 return SystemZ::CCMASK_TM_SOME_1
;
2273 if (EffectivelyUnsigned
&& CmpVal
> 0 && CmpVal
<= Low
) {
2274 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2275 return SystemZ::CCMASK_TM_ALL_0
;
2276 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2277 return SystemZ::CCMASK_TM_SOME_1
;
2279 if (EffectivelyUnsigned
&& CmpVal
< Low
) {
2280 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2281 return SystemZ::CCMASK_TM_ALL_0
;
2282 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2283 return SystemZ::CCMASK_TM_SOME_1
;
2286 // Check for equality comparisons with the mask, or the equivalent.
2287 if (CmpVal
== Mask
) {
2288 if (CCMask
== SystemZ::CCMASK_CMP_EQ
)
2289 return SystemZ::CCMASK_TM_ALL_1
;
2290 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
2291 return SystemZ::CCMASK_TM_SOME_0
;
2293 if (EffectivelyUnsigned
&& CmpVal
>= Mask
- Low
&& CmpVal
< Mask
) {
2294 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2295 return SystemZ::CCMASK_TM_ALL_1
;
2296 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2297 return SystemZ::CCMASK_TM_SOME_0
;
2299 if (EffectivelyUnsigned
&& CmpVal
> Mask
- Low
&& CmpVal
<= Mask
) {
2300 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2301 return SystemZ::CCMASK_TM_ALL_1
;
2302 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2303 return SystemZ::CCMASK_TM_SOME_0
;
2306 // Check for ordered comparisons with the top bit.
2307 if (EffectivelyUnsigned
&& CmpVal
>= Mask
- High
&& CmpVal
< High
) {
2308 if (CCMask
== SystemZ::CCMASK_CMP_LE
)
2309 return SystemZ::CCMASK_TM_MSB_0
;
2310 if (CCMask
== SystemZ::CCMASK_CMP_GT
)
2311 return SystemZ::CCMASK_TM_MSB_1
;
2313 if (EffectivelyUnsigned
&& CmpVal
> Mask
- High
&& CmpVal
<= High
) {
2314 if (CCMask
== SystemZ::CCMASK_CMP_LT
)
2315 return SystemZ::CCMASK_TM_MSB_0
;
2316 if (CCMask
== SystemZ::CCMASK_CMP_GE
)
2317 return SystemZ::CCMASK_TM_MSB_1
;
2320 // If there are just two bits, we can do equality checks for Low and High
2322 if (Mask
== Low
+ High
) {
2323 if (CCMask
== SystemZ::CCMASK_CMP_EQ
&& CmpVal
== Low
)
2324 return SystemZ::CCMASK_TM_MIXED_MSB_0
;
2325 if (CCMask
== SystemZ::CCMASK_CMP_NE
&& CmpVal
== Low
)
2326 return SystemZ::CCMASK_TM_MIXED_MSB_0
^ SystemZ::CCMASK_ANY
;
2327 if (CCMask
== SystemZ::CCMASK_CMP_EQ
&& CmpVal
== High
)
2328 return SystemZ::CCMASK_TM_MIXED_MSB_1
;
2329 if (CCMask
== SystemZ::CCMASK_CMP_NE
&& CmpVal
== High
)
2330 return SystemZ::CCMASK_TM_MIXED_MSB_1
^ SystemZ::CCMASK_ANY
;
2333 // Looks like we've exhausted our options.
2337 // See whether C can be implemented as a TEST UNDER MASK instruction.
2338 // Update the arguments with the TM version if so.
2339 static void adjustForTestUnderMask(SelectionDAG
&DAG
, const SDLoc
&DL
,
2341 // Check that we have a comparison with a constant.
2342 auto *ConstOp1
= dyn_cast
<ConstantSDNode
>(C
.Op1
);
2345 uint64_t CmpVal
= ConstOp1
->getZExtValue();
2347 // Check whether the nonconstant input is an AND with a constant mask.
2350 ConstantSDNode
*Mask
= nullptr;
2351 if (C
.Op0
.getOpcode() == ISD::AND
) {
2352 NewC
.Op0
= C
.Op0
.getOperand(0);
2353 NewC
.Op1
= C
.Op0
.getOperand(1);
2354 Mask
= dyn_cast
<ConstantSDNode
>(NewC
.Op1
);
2357 MaskVal
= Mask
->getZExtValue();
2359 // There is no instruction to compare with a 64-bit immediate
2360 // so use TMHH instead if possible. We need an unsigned ordered
2361 // comparison with an i64 immediate.
2362 if (NewC
.Op0
.getValueType() != MVT::i64
||
2363 NewC
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
2364 NewC
.CCMask
== SystemZ::CCMASK_CMP_NE
||
2365 NewC
.ICmpType
== SystemZICMP::SignedOnly
)
2367 // Convert LE and GT comparisons into LT and GE.
2368 if (NewC
.CCMask
== SystemZ::CCMASK_CMP_LE
||
2369 NewC
.CCMask
== SystemZ::CCMASK_CMP_GT
) {
2370 if (CmpVal
== uint64_t(-1))
2373 NewC
.CCMask
^= SystemZ::CCMASK_CMP_EQ
;
2375 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2376 // be masked off without changing the result.
2377 MaskVal
= -(CmpVal
& -CmpVal
);
2378 NewC
.ICmpType
= SystemZICMP::UnsignedOnly
;
2383 // Check whether the combination of mask, comparison value and comparison
2384 // type are suitable.
2385 unsigned BitSize
= NewC
.Op0
.getValueSizeInBits();
2386 unsigned NewCCMask
, ShiftVal
;
2387 if (NewC
.ICmpType
!= SystemZICMP::SignedOnly
&&
2388 NewC
.Op0
.getOpcode() == ISD::SHL
&&
2389 isSimpleShift(NewC
.Op0
, ShiftVal
) &&
2390 (MaskVal
>> ShiftVal
!= 0) &&
2391 ((CmpVal
>> ShiftVal
) << ShiftVal
) == CmpVal
&&
2392 (NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
,
2393 MaskVal
>> ShiftVal
,
2395 SystemZICMP::Any
))) {
2396 NewC
.Op0
= NewC
.Op0
.getOperand(0);
2397 MaskVal
>>= ShiftVal
;
2398 } else if (NewC
.ICmpType
!= SystemZICMP::SignedOnly
&&
2399 NewC
.Op0
.getOpcode() == ISD::SRL
&&
2400 isSimpleShift(NewC
.Op0
, ShiftVal
) &&
2401 (MaskVal
<< ShiftVal
!= 0) &&
2402 ((CmpVal
<< ShiftVal
) >> ShiftVal
) == CmpVal
&&
2403 (NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
,
2404 MaskVal
<< ShiftVal
,
2406 SystemZICMP::UnsignedOnly
))) {
2407 NewC
.Op0
= NewC
.Op0
.getOperand(0);
2408 MaskVal
<<= ShiftVal
;
2410 NewCCMask
= getTestUnderMaskCond(BitSize
, NewC
.CCMask
, MaskVal
, CmpVal
,
2416 // Go ahead and make the change.
2417 C
.Opcode
= SystemZISD::TM
;
2419 if (Mask
&& Mask
->getZExtValue() == MaskVal
)
2420 C
.Op1
= SDValue(Mask
, 0);
2422 C
.Op1
= DAG
.getConstant(MaskVal
, DL
, C
.Op0
.getValueType());
2423 C
.CCValid
= SystemZ::CCMASK_TM
;
2424 C
.CCMask
= NewCCMask
;
2427 // See whether the comparison argument contains a redundant AND
2428 // and remove it if so. This sometimes happens due to the generic
2429 // BRCOND expansion.
2430 static void adjustForRedundantAnd(SelectionDAG
&DAG
, const SDLoc
&DL
,
2432 if (C
.Op0
.getOpcode() != ISD::AND
)
2434 auto *Mask
= dyn_cast
<ConstantSDNode
>(C
.Op0
.getOperand(1));
2437 KnownBits Known
= DAG
.computeKnownBits(C
.Op0
.getOperand(0));
2438 if ((~Known
.Zero
).getZExtValue() & ~Mask
->getZExtValue())
2441 C
.Op0
= C
.Op0
.getOperand(0);
2444 // Return a Comparison that tests the condition-code result of intrinsic
2445 // node Call against constant integer CC using comparison code Cond.
2446 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2447 // and CCValid is the set of possible condition-code results.
2448 static Comparison
getIntrinsicCmp(SelectionDAG
&DAG
, unsigned Opcode
,
2449 SDValue Call
, unsigned CCValid
, uint64_t CC
,
2450 ISD::CondCode Cond
) {
2451 Comparison
C(Call
, SDValue());
2453 C
.CCValid
= CCValid
;
2454 if (Cond
== ISD::SETEQ
)
2455 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2456 C
.CCMask
= CC
< 4 ? 1 << (3 - CC
) : 0;
2457 else if (Cond
== ISD::SETNE
)
2458 // ...and the inverse of that.
2459 C
.CCMask
= CC
< 4 ? ~(1 << (3 - CC
)) : -1;
2460 else if (Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
)
2461 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2462 // always true for CC>3.
2463 C
.CCMask
= CC
< 4 ? ~0U << (4 - CC
) : -1;
2464 else if (Cond
== ISD::SETGE
|| Cond
== ISD::SETUGE
)
2465 // ...and the inverse of that.
2466 C
.CCMask
= CC
< 4 ? ~(~0U << (4 - CC
)) : 0;
2467 else if (Cond
== ISD::SETLE
|| Cond
== ISD::SETULE
)
2468 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2469 // always true for CC>3.
2470 C
.CCMask
= CC
< 4 ? ~0U << (3 - CC
) : -1;
2471 else if (Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
)
2472 // ...and the inverse of that.
2473 C
.CCMask
= CC
< 4 ? ~(~0U << (3 - CC
)) : 0;
2475 llvm_unreachable("Unexpected integer comparison type");
2476 C
.CCMask
&= CCValid
;
2480 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2481 static Comparison
getCmp(SelectionDAG
&DAG
, SDValue CmpOp0
, SDValue CmpOp1
,
2482 ISD::CondCode Cond
, const SDLoc
&DL
) {
2483 if (CmpOp1
.getOpcode() == ISD::Constant
) {
2484 uint64_t Constant
= cast
<ConstantSDNode
>(CmpOp1
)->getZExtValue();
2485 unsigned Opcode
, CCValid
;
2486 if (CmpOp0
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
2487 CmpOp0
.getResNo() == 0 && CmpOp0
->hasNUsesOfValue(1, 0) &&
2488 isIntrinsicWithCCAndChain(CmpOp0
, Opcode
, CCValid
))
2489 return getIntrinsicCmp(DAG
, Opcode
, CmpOp0
, CCValid
, Constant
, Cond
);
2490 if (CmpOp0
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
2491 CmpOp0
.getResNo() == CmpOp0
->getNumValues() - 1 &&
2492 isIntrinsicWithCC(CmpOp0
, Opcode
, CCValid
))
2493 return getIntrinsicCmp(DAG
, Opcode
, CmpOp0
, CCValid
, Constant
, Cond
);
2495 Comparison
C(CmpOp0
, CmpOp1
);
2496 C
.CCMask
= CCMaskForCondCode(Cond
);
2497 if (C
.Op0
.getValueType().isFloatingPoint()) {
2498 C
.CCValid
= SystemZ::CCMASK_FCMP
;
2499 C
.Opcode
= SystemZISD::FCMP
;
2502 C
.CCValid
= SystemZ::CCMASK_ICMP
;
2503 C
.Opcode
= SystemZISD::ICMP
;
2504 // Choose the type of comparison. Equality and inequality tests can
2505 // use either signed or unsigned comparisons. The choice also doesn't
2506 // matter if both sign bits are known to be clear. In those cases we
2507 // want to give the main isel code the freedom to choose whichever
2509 if (C
.CCMask
== SystemZ::CCMASK_CMP_EQ
||
2510 C
.CCMask
== SystemZ::CCMASK_CMP_NE
||
2511 (DAG
.SignBitIsZero(C
.Op0
) && DAG
.SignBitIsZero(C
.Op1
)))
2512 C
.ICmpType
= SystemZICMP::Any
;
2513 else if (C
.CCMask
& SystemZ::CCMASK_CMP_UO
)
2514 C
.ICmpType
= SystemZICMP::UnsignedOnly
;
2516 C
.ICmpType
= SystemZICMP::SignedOnly
;
2517 C
.CCMask
&= ~SystemZ::CCMASK_CMP_UO
;
2518 adjustForRedundantAnd(DAG
, DL
, C
);
2519 adjustZeroCmp(DAG
, DL
, C
);
2520 adjustSubwordCmp(DAG
, DL
, C
);
2521 adjustForSubtraction(DAG
, DL
, C
);
2523 adjustICmpTruncate(DAG
, DL
, C
);
2526 if (shouldSwapCmpOperands(C
)) {
2527 std::swap(C
.Op0
, C
.Op1
);
2528 C
.CCMask
= reverseCCMask(C
.CCMask
);
2531 adjustForTestUnderMask(DAG
, DL
, C
);
2535 // Emit the comparison instruction described by C.
2536 static SDValue
emitCmp(SelectionDAG
&DAG
, const SDLoc
&DL
, Comparison
&C
) {
2537 if (!C
.Op1
.getNode()) {
2539 switch (C
.Op0
.getOpcode()) {
2540 case ISD::INTRINSIC_W_CHAIN
:
2541 Node
= emitIntrinsicWithCCAndChain(DAG
, C
.Op0
, C
.Opcode
);
2542 return SDValue(Node
, 0);
2543 case ISD::INTRINSIC_WO_CHAIN
:
2544 Node
= emitIntrinsicWithCC(DAG
, C
.Op0
, C
.Opcode
);
2545 return SDValue(Node
, Node
->getNumValues() - 1);
2547 llvm_unreachable("Invalid comparison operands");
2550 if (C
.Opcode
== SystemZISD::ICMP
)
2551 return DAG
.getNode(SystemZISD::ICMP
, DL
, MVT::i32
, C
.Op0
, C
.Op1
,
2552 DAG
.getConstant(C
.ICmpType
, DL
, MVT::i32
));
2553 if (C
.Opcode
== SystemZISD::TM
) {
2554 bool RegisterOnly
= (bool(C
.CCMask
& SystemZ::CCMASK_TM_MIXED_MSB_0
) !=
2555 bool(C
.CCMask
& SystemZ::CCMASK_TM_MIXED_MSB_1
));
2556 return DAG
.getNode(SystemZISD::TM
, DL
, MVT::i32
, C
.Op0
, C
.Op1
,
2557 DAG
.getConstant(RegisterOnly
, DL
, MVT::i32
));
2559 return DAG
.getNode(C
.Opcode
, DL
, MVT::i32
, C
.Op0
, C
.Op1
);
2562 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2563 // 64 bits. Extend is the extension type to use. Store the high part
2564 // in Hi and the low part in Lo.
2565 static void lowerMUL_LOHI32(SelectionDAG
&DAG
, const SDLoc
&DL
, unsigned Extend
,
2566 SDValue Op0
, SDValue Op1
, SDValue
&Hi
,
2568 Op0
= DAG
.getNode(Extend
, DL
, MVT::i64
, Op0
);
2569 Op1
= DAG
.getNode(Extend
, DL
, MVT::i64
, Op1
);
2570 SDValue Mul
= DAG
.getNode(ISD::MUL
, DL
, MVT::i64
, Op0
, Op1
);
2571 Hi
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Mul
,
2572 DAG
.getConstant(32, DL
, MVT::i64
));
2573 Hi
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Hi
);
2574 Lo
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Mul
);
2577 // Lower a binary operation that produces two VT results, one in each
2578 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2579 // and Opcode performs the GR128 operation. Store the even register result
2580 // in Even and the odd register result in Odd.
2581 static void lowerGR128Binary(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
2582 unsigned Opcode
, SDValue Op0
, SDValue Op1
,
2583 SDValue
&Even
, SDValue
&Odd
) {
2584 SDValue Result
= DAG
.getNode(Opcode
, DL
, MVT::Untyped
, Op0
, Op1
);
2585 bool Is32Bit
= is32Bit(VT
);
2586 Even
= DAG
.getTargetExtractSubreg(SystemZ::even128(Is32Bit
), DL
, VT
, Result
);
2587 Odd
= DAG
.getTargetExtractSubreg(SystemZ::odd128(Is32Bit
), DL
, VT
, Result
);
2590 // Return an i32 value that is 1 if the CC value produced by CCReg is
2591 // in the mask CCMask and 0 otherwise. CC is known to have a value
2592 // in CCValid, so other values can be ignored.
2593 static SDValue
emitSETCC(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue CCReg
,
2594 unsigned CCValid
, unsigned CCMask
) {
2595 SDValue Ops
[] = { DAG
.getConstant(1, DL
, MVT::i32
),
2596 DAG
.getConstant(0, DL
, MVT::i32
),
2597 DAG
.getConstant(CCValid
, DL
, MVT::i32
),
2598 DAG
.getConstant(CCMask
, DL
, MVT::i32
), CCReg
};
2599 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, MVT::i32
, Ops
);
2602 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2603 // be done directly. IsFP is true if CC is for a floating-point rather than
2604 // integer comparison.
2605 static unsigned getVectorComparison(ISD::CondCode CC
, bool IsFP
) {
2609 return IsFP
? SystemZISD::VFCMPE
: SystemZISD::VICMPE
;
2613 return IsFP
? SystemZISD::VFCMPHE
: static_cast<SystemZISD::NodeType
>(0);
2617 return IsFP
? SystemZISD::VFCMPH
: SystemZISD::VICMPH
;
2620 return IsFP
? static_cast<SystemZISD::NodeType
>(0) : SystemZISD::VICMPHL
;
2627 // Return the SystemZISD vector comparison operation for CC or its inverse,
2628 // or 0 if neither can be done directly. Indicate in Invert whether the
2629 // result is for the inverse of CC. IsFP is true if CC is for a
2630 // floating-point rather than integer comparison.
2631 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC
, bool IsFP
,
2633 if (unsigned Opcode
= getVectorComparison(CC
, IsFP
)) {
2638 CC
= ISD::getSetCCInverse(CC
, !IsFP
);
2639 if (unsigned Opcode
= getVectorComparison(CC
, IsFP
)) {
2647 // Return a v2f64 that contains the extended form of elements Start and Start+1
2648 // of v4f32 value Op.
2649 static SDValue
expandV4F32ToV2F64(SelectionDAG
&DAG
, int Start
, const SDLoc
&DL
,
2651 int Mask
[] = { Start
, -1, Start
+ 1, -1 };
2652 Op
= DAG
.getVectorShuffle(MVT::v4f32
, DL
, Op
, DAG
.getUNDEF(MVT::v4f32
), Mask
);
2653 return DAG
.getNode(SystemZISD::VEXTEND
, DL
, MVT::v2f64
, Op
);
2656 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2657 // producing a result of type VT.
2658 SDValue
SystemZTargetLowering::getVectorCmp(SelectionDAG
&DAG
, unsigned Opcode
,
2659 const SDLoc
&DL
, EVT VT
,
2661 SDValue CmpOp1
) const {
2662 // There is no hardware support for v4f32 (unless we have the vector
2663 // enhancements facility 1), so extend the vector into two v2f64s
2664 // and compare those.
2665 if (CmpOp0
.getValueType() == MVT::v4f32
&&
2666 !Subtarget
.hasVectorEnhancements1()) {
2667 SDValue H0
= expandV4F32ToV2F64(DAG
, 0, DL
, CmpOp0
);
2668 SDValue L0
= expandV4F32ToV2F64(DAG
, 2, DL
, CmpOp0
);
2669 SDValue H1
= expandV4F32ToV2F64(DAG
, 0, DL
, CmpOp1
);
2670 SDValue L1
= expandV4F32ToV2F64(DAG
, 2, DL
, CmpOp1
);
2671 SDValue HRes
= DAG
.getNode(Opcode
, DL
, MVT::v2i64
, H0
, H1
);
2672 SDValue LRes
= DAG
.getNode(Opcode
, DL
, MVT::v2i64
, L0
, L1
);
2673 return DAG
.getNode(SystemZISD::PACK
, DL
, VT
, HRes
, LRes
);
2675 return DAG
.getNode(Opcode
, DL
, VT
, CmpOp0
, CmpOp1
);
2678 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2679 // an integer mask of type VT.
2680 SDValue
SystemZTargetLowering::lowerVectorSETCC(SelectionDAG
&DAG
,
2681 const SDLoc
&DL
, EVT VT
,
2684 SDValue CmpOp1
) const {
2685 bool IsFP
= CmpOp0
.getValueType().isFloatingPoint();
2686 bool Invert
= false;
2689 // Handle tests for order using (or (ogt y x) (oge x y)).
2694 assert(IsFP
&& "Unexpected integer comparison");
2695 SDValue LT
= getVectorCmp(DAG
, SystemZISD::VFCMPH
, DL
, VT
, CmpOp1
, CmpOp0
);
2696 SDValue GE
= getVectorCmp(DAG
, SystemZISD::VFCMPHE
, DL
, VT
, CmpOp0
, CmpOp1
);
2697 Cmp
= DAG
.getNode(ISD::OR
, DL
, VT
, LT
, GE
);
2701 // Handle <> tests using (or (ogt y x) (ogt x y)).
2706 assert(IsFP
&& "Unexpected integer comparison");
2707 SDValue LT
= getVectorCmp(DAG
, SystemZISD::VFCMPH
, DL
, VT
, CmpOp1
, CmpOp0
);
2708 SDValue GT
= getVectorCmp(DAG
, SystemZISD::VFCMPH
, DL
, VT
, CmpOp0
, CmpOp1
);
2709 Cmp
= DAG
.getNode(ISD::OR
, DL
, VT
, LT
, GT
);
2713 // Otherwise a single comparison is enough. It doesn't really
2714 // matter whether we try the inversion or the swap first, since
2715 // there are no cases where both work.
2717 if (unsigned Opcode
= getVectorComparisonOrInvert(CC
, IsFP
, Invert
))
2718 Cmp
= getVectorCmp(DAG
, Opcode
, DL
, VT
, CmpOp0
, CmpOp1
);
2720 CC
= ISD::getSetCCSwappedOperands(CC
);
2721 if (unsigned Opcode
= getVectorComparisonOrInvert(CC
, IsFP
, Invert
))
2722 Cmp
= getVectorCmp(DAG
, Opcode
, DL
, VT
, CmpOp1
, CmpOp0
);
2724 llvm_unreachable("Unhandled comparison");
2730 DAG
.getSplatBuildVector(VT
, DL
, DAG
.getConstant(-1, DL
, MVT::i64
));
2731 Cmp
= DAG
.getNode(ISD::XOR
, DL
, VT
, Cmp
, Mask
);
2736 SDValue
SystemZTargetLowering::lowerSETCC(SDValue Op
,
2737 SelectionDAG
&DAG
) const {
2738 SDValue CmpOp0
= Op
.getOperand(0);
2739 SDValue CmpOp1
= Op
.getOperand(1);
2740 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
2742 EVT VT
= Op
.getValueType();
2744 return lowerVectorSETCC(DAG
, DL
, VT
, CC
, CmpOp0
, CmpOp1
);
2746 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2747 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2748 return emitSETCC(DAG
, DL
, CCReg
, C
.CCValid
, C
.CCMask
);
2751 SDValue
SystemZTargetLowering::lowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const {
2752 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(1))->get();
2753 SDValue CmpOp0
= Op
.getOperand(2);
2754 SDValue CmpOp1
= Op
.getOperand(3);
2755 SDValue Dest
= Op
.getOperand(4);
2758 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2759 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2760 return DAG
.getNode(SystemZISD::BR_CCMASK
, DL
, Op
.getValueType(),
2761 Op
.getOperand(0), DAG
.getConstant(C
.CCValid
, DL
, MVT::i32
),
2762 DAG
.getConstant(C
.CCMask
, DL
, MVT::i32
), Dest
, CCReg
);
2765 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2766 // allowing Pos and Neg to be wider than CmpOp.
2767 static bool isAbsolute(SDValue CmpOp
, SDValue Pos
, SDValue Neg
) {
2768 return (Neg
.getOpcode() == ISD::SUB
&&
2769 Neg
.getOperand(0).getOpcode() == ISD::Constant
&&
2770 cast
<ConstantSDNode
>(Neg
.getOperand(0))->getZExtValue() == 0 &&
2771 Neg
.getOperand(1) == Pos
&&
2773 (Pos
.getOpcode() == ISD::SIGN_EXTEND
&&
2774 Pos
.getOperand(0) == CmpOp
)));
2777 // Return the absolute or negative absolute of Op; IsNegative decides which.
2778 static SDValue
getAbsolute(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Op
,
2780 Op
= DAG
.getNode(SystemZISD::IABS
, DL
, Op
.getValueType(), Op
);
2782 Op
= DAG
.getNode(ISD::SUB
, DL
, Op
.getValueType(),
2783 DAG
.getConstant(0, DL
, Op
.getValueType()), Op
);
2787 SDValue
SystemZTargetLowering::lowerSELECT_CC(SDValue Op
,
2788 SelectionDAG
&DAG
) const {
2789 SDValue CmpOp0
= Op
.getOperand(0);
2790 SDValue CmpOp1
= Op
.getOperand(1);
2791 SDValue TrueOp
= Op
.getOperand(2);
2792 SDValue FalseOp
= Op
.getOperand(3);
2793 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
2796 Comparison
C(getCmp(DAG
, CmpOp0
, CmpOp1
, CC
, DL
));
2798 // Check for absolute and negative-absolute selections, including those
2799 // where the comparison value is sign-extended (for LPGFR and LNGFR).
2800 // This check supplements the one in DAGCombiner.
2801 if (C
.Opcode
== SystemZISD::ICMP
&&
2802 C
.CCMask
!= SystemZ::CCMASK_CMP_EQ
&&
2803 C
.CCMask
!= SystemZ::CCMASK_CMP_NE
&&
2804 C
.Op1
.getOpcode() == ISD::Constant
&&
2805 cast
<ConstantSDNode
>(C
.Op1
)->getZExtValue() == 0) {
2806 if (isAbsolute(C
.Op0
, TrueOp
, FalseOp
))
2807 return getAbsolute(DAG
, DL
, TrueOp
, C
.CCMask
& SystemZ::CCMASK_CMP_LT
);
2808 if (isAbsolute(C
.Op0
, FalseOp
, TrueOp
))
2809 return getAbsolute(DAG
, DL
, FalseOp
, C
.CCMask
& SystemZ::CCMASK_CMP_GT
);
2812 SDValue CCReg
= emitCmp(DAG
, DL
, C
);
2813 SDValue Ops
[] = {TrueOp
, FalseOp
, DAG
.getConstant(C
.CCValid
, DL
, MVT::i32
),
2814 DAG
.getConstant(C
.CCMask
, DL
, MVT::i32
), CCReg
};
2816 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, Op
.getValueType(), Ops
);
2819 SDValue
SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode
*Node
,
2820 SelectionDAG
&DAG
) const {
2822 const GlobalValue
*GV
= Node
->getGlobal();
2823 int64_t Offset
= Node
->getOffset();
2824 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2825 CodeModel::Model CM
= DAG
.getTarget().getCodeModel();
2828 if (Subtarget
.isPC32DBLSymbol(GV
, CM
)) {
2829 // Assign anchors at 1<<12 byte boundaries.
2830 uint64_t Anchor
= Offset
& ~uint64_t(0xfff);
2831 Result
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, Anchor
);
2832 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
2834 // The offset can be folded into the address if it is aligned to a halfword.
2836 if (Offset
!= 0 && (Offset
& 1) == 0) {
2837 SDValue Full
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, Anchor
+ Offset
);
2838 Result
= DAG
.getNode(SystemZISD::PCREL_OFFSET
, DL
, PtrVT
, Full
, Result
);
2842 Result
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0, SystemZII::MO_GOT
);
2843 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
2844 Result
= DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Result
,
2845 MachinePointerInfo::getGOT(DAG
.getMachineFunction()));
2848 // If there was a non-zero offset that we didn't fold, create an explicit
2851 Result
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Result
,
2852 DAG
.getConstant(Offset
, DL
, PtrVT
));
2857 SDValue
SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode
*Node
,
2860 SDValue GOTOffset
) const {
2862 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2863 SDValue Chain
= DAG
.getEntryNode();
2866 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
2867 SDValue GOT
= DAG
.getGLOBAL_OFFSET_TABLE(PtrVT
);
2868 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R12D
, GOT
, Glue
);
2869 Glue
= Chain
.getValue(1);
2870 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R2D
, GOTOffset
, Glue
);
2871 Glue
= Chain
.getValue(1);
2873 // The first call operand is the chain and the second is the TLS symbol.
2874 SmallVector
<SDValue
, 8> Ops
;
2875 Ops
.push_back(Chain
);
2876 Ops
.push_back(DAG
.getTargetGlobalAddress(Node
->getGlobal(), DL
,
2877 Node
->getValueType(0),
2880 // Add argument registers to the end of the list so that they are
2881 // known live into the call.
2882 Ops
.push_back(DAG
.getRegister(SystemZ::R2D
, PtrVT
));
2883 Ops
.push_back(DAG
.getRegister(SystemZ::R12D
, PtrVT
));
2885 // Add a register mask operand representing the call-preserved registers.
2886 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
2887 const uint32_t *Mask
=
2888 TRI
->getCallPreservedMask(DAG
.getMachineFunction(), CallingConv::C
);
2889 assert(Mask
&& "Missing call preserved mask for calling convention");
2890 Ops
.push_back(DAG
.getRegisterMask(Mask
));
2892 // Glue the call to the argument copies.
2893 Ops
.push_back(Glue
);
2896 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
2897 Chain
= DAG
.getNode(Opcode
, DL
, NodeTys
, Ops
);
2898 Glue
= Chain
.getValue(1);
2900 // Copy the return value from %r2.
2901 return DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R2D
, PtrVT
, Glue
);
2904 SDValue
SystemZTargetLowering::lowerThreadPointer(const SDLoc
&DL
,
2905 SelectionDAG
&DAG
) const {
2906 SDValue Chain
= DAG
.getEntryNode();
2907 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2909 // The high part of the thread pointer is in access register 0.
2910 SDValue TPHi
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::A0
, MVT::i32
);
2911 TPHi
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, PtrVT
, TPHi
);
2913 // The low part of the thread pointer is in access register 1.
2914 SDValue TPLo
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::A1
, MVT::i32
);
2915 TPLo
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, PtrVT
, TPLo
);
2917 // Merge them into a single 64-bit address.
2918 SDValue TPHiShifted
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, TPHi
,
2919 DAG
.getConstant(32, DL
, PtrVT
));
2920 return DAG
.getNode(ISD::OR
, DL
, PtrVT
, TPHiShifted
, TPLo
);
2923 SDValue
SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode
*Node
,
2924 SelectionDAG
&DAG
) const {
2925 if (DAG
.getTarget().useEmulatedTLS())
2926 return LowerToTLSEmulatedModel(Node
, DAG
);
2928 const GlobalValue
*GV
= Node
->getGlobal();
2929 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2930 TLSModel::Model model
= DAG
.getTarget().getTLSModel(GV
);
2932 SDValue TP
= lowerThreadPointer(DL
, DAG
);
2934 // Get the offset of GA from the thread pointer, based on the TLS model.
2937 case TLSModel::GeneralDynamic
: {
2938 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
2939 SystemZConstantPoolValue
*CPV
=
2940 SystemZConstantPoolValue::Create(GV
, SystemZCP::TLSGD
);
2942 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
2943 Offset
= DAG
.getLoad(
2944 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
2945 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
2947 // Call __tls_get_offset to retrieve the offset.
2948 Offset
= lowerTLSGetOffset(Node
, DAG
, SystemZISD::TLS_GDCALL
, Offset
);
2952 case TLSModel::LocalDynamic
: {
2953 // Load the GOT offset of the module ID.
2954 SystemZConstantPoolValue
*CPV
=
2955 SystemZConstantPoolValue::Create(GV
, SystemZCP::TLSLDM
);
2957 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
2958 Offset
= DAG
.getLoad(
2959 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
2960 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
2962 // Call __tls_get_offset to retrieve the module base offset.
2963 Offset
= lowerTLSGetOffset(Node
, DAG
, SystemZISD::TLS_LDCALL
, Offset
);
2965 // Note: The SystemZLDCleanupPass will remove redundant computations
2966 // of the module base offset. Count total number of local-dynamic
2967 // accesses to trigger execution of that pass.
2968 SystemZMachineFunctionInfo
* MFI
=
2969 DAG
.getMachineFunction().getInfo
<SystemZMachineFunctionInfo
>();
2970 MFI
->incNumLocalDynamicTLSAccesses();
2972 // Add the per-symbol offset.
2973 CPV
= SystemZConstantPoolValue::Create(GV
, SystemZCP::DTPOFF
);
2975 SDValue DTPOffset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
2976 DTPOffset
= DAG
.getLoad(
2977 PtrVT
, DL
, DAG
.getEntryNode(), DTPOffset
,
2978 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
2980 Offset
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Offset
, DTPOffset
);
2984 case TLSModel::InitialExec
: {
2985 // Load the offset from the GOT.
2986 Offset
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, 0,
2987 SystemZII::MO_INDNTPOFF
);
2988 Offset
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Offset
);
2990 DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
2991 MachinePointerInfo::getGOT(DAG
.getMachineFunction()));
2995 case TLSModel::LocalExec
: {
2996 // Force the offset into the constant pool and load it from there.
2997 SystemZConstantPoolValue
*CPV
=
2998 SystemZConstantPoolValue::Create(GV
, SystemZCP::NTPOFF
);
3000 Offset
= DAG
.getConstantPool(CPV
, PtrVT
, 8);
3001 Offset
= DAG
.getLoad(
3002 PtrVT
, DL
, DAG
.getEntryNode(), Offset
,
3003 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
3008 // Add the base and offset together.
3009 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, TP
, Offset
);
3012 SDValue
SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode
*Node
,
3013 SelectionDAG
&DAG
) const {
3015 const BlockAddress
*BA
= Node
->getBlockAddress();
3016 int64_t Offset
= Node
->getOffset();
3017 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3019 SDValue Result
= DAG
.getTargetBlockAddress(BA
, PtrVT
, Offset
);
3020 Result
= DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
3024 SDValue
SystemZTargetLowering::lowerJumpTable(JumpTableSDNode
*JT
,
3025 SelectionDAG
&DAG
) const {
3027 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3028 SDValue Result
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
3030 // Use LARL to load the address of the table.
3031 return DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
3034 SDValue
SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode
*CP
,
3035 SelectionDAG
&DAG
) const {
3037 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3040 if (CP
->isMachineConstantPoolEntry())
3041 Result
= DAG
.getTargetConstantPool(CP
->getMachineCPVal(), PtrVT
,
3042 CP
->getAlignment());
3044 Result
= DAG
.getTargetConstantPool(CP
->getConstVal(), PtrVT
,
3045 CP
->getAlignment(), CP
->getOffset());
3047 // Use LARL to load the address of the constant pool entry.
3048 return DAG
.getNode(SystemZISD::PCREL_WRAPPER
, DL
, PtrVT
, Result
);
3051 SDValue
SystemZTargetLowering::lowerFRAMEADDR(SDValue Op
,
3052 SelectionDAG
&DAG
) const {
3053 MachineFunction
&MF
= DAG
.getMachineFunction();
3054 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3055 MFI
.setFrameAddressIsTaken(true);
3058 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
3059 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3061 // If the back chain frame index has not been allocated yet, do so.
3062 SystemZMachineFunctionInfo
*FI
= MF
.getInfo
<SystemZMachineFunctionInfo
>();
3063 int BackChainIdx
= FI
->getFramePointerSaveIndex();
3064 if (!BackChainIdx
) {
3065 // By definition, the frame address is the address of the back chain.
3066 BackChainIdx
= MFI
.CreateFixedObject(8, -SystemZMC::CallFrameSize
, false);
3067 FI
->setFramePointerSaveIndex(BackChainIdx
);
3069 SDValue BackChain
= DAG
.getFrameIndex(BackChainIdx
, PtrVT
);
3071 // FIXME The frontend should detect this case.
3073 report_fatal_error("Unsupported stack frame traversal count");
3079 SDValue
SystemZTargetLowering::lowerRETURNADDR(SDValue Op
,
3080 SelectionDAG
&DAG
) const {
3081 MachineFunction
&MF
= DAG
.getMachineFunction();
3082 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3083 MFI
.setReturnAddressIsTaken(true);
3085 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
3089 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
3090 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3092 // FIXME The frontend should detect this case.
3094 report_fatal_error("Unsupported stack frame traversal count");
3097 // Return R14D, which has the return address. Mark it an implicit live-in.
3098 unsigned LinkReg
= MF
.addLiveIn(SystemZ::R14D
, &SystemZ::GR64BitRegClass
);
3099 return DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
, LinkReg
, PtrVT
);
3102 SDValue
SystemZTargetLowering::lowerBITCAST(SDValue Op
,
3103 SelectionDAG
&DAG
) const {
3105 SDValue In
= Op
.getOperand(0);
3106 EVT InVT
= In
.getValueType();
3107 EVT ResVT
= Op
.getValueType();
3109 // Convert loads directly. This is normally done by DAGCombiner,
3110 // but we need this case for bitcasts that are created during lowering
3111 // and which are then lowered themselves.
3112 if (auto *LoadN
= dyn_cast
<LoadSDNode
>(In
))
3113 if (ISD::isNormalLoad(LoadN
)) {
3114 SDValue NewLoad
= DAG
.getLoad(ResVT
, DL
, LoadN
->getChain(),
3115 LoadN
->getBasePtr(), LoadN
->getMemOperand());
3116 // Update the chain uses.
3117 DAG
.ReplaceAllUsesOfValueWith(SDValue(LoadN
, 1), NewLoad
.getValue(1));
3121 if (InVT
== MVT::i32
&& ResVT
== MVT::f32
) {
3123 if (Subtarget
.hasHighWord()) {
3124 SDNode
*U64
= DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
,
3126 In64
= DAG
.getTargetInsertSubreg(SystemZ::subreg_h32
, DL
,
3127 MVT::i64
, SDValue(U64
, 0), In
);
3129 In64
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, In
);
3130 In64
= DAG
.getNode(ISD::SHL
, DL
, MVT::i64
, In64
,
3131 DAG
.getConstant(32, DL
, MVT::i64
));
3133 SDValue Out64
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::f64
, In64
);
3134 return DAG
.getTargetExtractSubreg(SystemZ::subreg_h32
,
3135 DL
, MVT::f32
, Out64
);
3137 if (InVT
== MVT::f32
&& ResVT
== MVT::i32
) {
3138 SDNode
*U64
= DAG
.getMachineNode(TargetOpcode::IMPLICIT_DEF
, DL
, MVT::f64
);
3139 SDValue In64
= DAG
.getTargetInsertSubreg(SystemZ::subreg_h32
, DL
,
3140 MVT::f64
, SDValue(U64
, 0), In
);
3141 SDValue Out64
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::i64
, In64
);
3142 if (Subtarget
.hasHighWord())
3143 return DAG
.getTargetExtractSubreg(SystemZ::subreg_h32
, DL
,
3145 SDValue Shift
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, Out64
,
3146 DAG
.getConstant(32, DL
, MVT::i64
));
3147 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Shift
);
3149 llvm_unreachable("Unexpected bitcast combination");
3152 SDValue
SystemZTargetLowering::lowerVASTART(SDValue Op
,
3153 SelectionDAG
&DAG
) const {
3154 MachineFunction
&MF
= DAG
.getMachineFunction();
3155 SystemZMachineFunctionInfo
*FuncInfo
=
3156 MF
.getInfo
<SystemZMachineFunctionInfo
>();
3157 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3159 SDValue Chain
= Op
.getOperand(0);
3160 SDValue Addr
= Op
.getOperand(1);
3161 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
3164 // The initial values of each field.
3165 const unsigned NumFields
= 4;
3166 SDValue Fields
[NumFields
] = {
3167 DAG
.getConstant(FuncInfo
->getVarArgsFirstGPR(), DL
, PtrVT
),
3168 DAG
.getConstant(FuncInfo
->getVarArgsFirstFPR(), DL
, PtrVT
),
3169 DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
),
3170 DAG
.getFrameIndex(FuncInfo
->getRegSaveFrameIndex(), PtrVT
)
3173 // Store each field into its respective slot.
3174 SDValue MemOps
[NumFields
];
3175 unsigned Offset
= 0;
3176 for (unsigned I
= 0; I
< NumFields
; ++I
) {
3177 SDValue FieldAddr
= Addr
;
3179 FieldAddr
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, FieldAddr
,
3180 DAG
.getIntPtrConstant(Offset
, DL
));
3181 MemOps
[I
] = DAG
.getStore(Chain
, DL
, Fields
[I
], FieldAddr
,
3182 MachinePointerInfo(SV
, Offset
));
3185 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, MemOps
);
3188 SDValue
SystemZTargetLowering::lowerVACOPY(SDValue Op
,
3189 SelectionDAG
&DAG
) const {
3190 SDValue Chain
= Op
.getOperand(0);
3191 SDValue DstPtr
= Op
.getOperand(1);
3192 SDValue SrcPtr
= Op
.getOperand(2);
3193 const Value
*DstSV
= cast
<SrcValueSDNode
>(Op
.getOperand(3))->getValue();
3194 const Value
*SrcSV
= cast
<SrcValueSDNode
>(Op
.getOperand(4))->getValue();
3197 return DAG
.getMemcpy(Chain
, DL
, DstPtr
, SrcPtr
, DAG
.getIntPtrConstant(32, DL
),
3198 /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
3199 /*isTailCall*/false,
3200 MachinePointerInfo(DstSV
), MachinePointerInfo(SrcSV
));
3203 SDValue
SystemZTargetLowering::
3204 lowerDYNAMIC_STACKALLOC(SDValue Op
, SelectionDAG
&DAG
) const {
3205 const TargetFrameLowering
*TFI
= Subtarget
.getFrameLowering();
3206 MachineFunction
&MF
= DAG
.getMachineFunction();
3207 bool RealignOpt
= !MF
.getFunction().hasFnAttribute("no-realign-stack");
3208 bool StoreBackchain
= MF
.getFunction().hasFnAttribute("backchain");
3210 SDValue Chain
= Op
.getOperand(0);
3211 SDValue Size
= Op
.getOperand(1);
3212 SDValue Align
= Op
.getOperand(2);
3215 // If user has set the no alignment function attribute, ignore
3216 // alloca alignments.
3217 uint64_t AlignVal
= (RealignOpt
?
3218 dyn_cast
<ConstantSDNode
>(Align
)->getZExtValue() : 0);
3220 uint64_t StackAlign
= TFI
->getStackAlignment();
3221 uint64_t RequiredAlign
= std::max(AlignVal
, StackAlign
);
3222 uint64_t ExtraAlignSpace
= RequiredAlign
- StackAlign
;
3224 unsigned SPReg
= getStackPointerRegisterToSaveRestore();
3225 SDValue NeededSpace
= Size
;
3227 // Get a reference to the stack pointer.
3228 SDValue OldSP
= DAG
.getCopyFromReg(Chain
, DL
, SPReg
, MVT::i64
);
3230 // If we need a backchain, save it now.
3233 Backchain
= DAG
.getLoad(MVT::i64
, DL
, Chain
, OldSP
, MachinePointerInfo());
3235 // Add extra space for alignment if needed.
3236 if (ExtraAlignSpace
)
3237 NeededSpace
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, NeededSpace
,
3238 DAG
.getConstant(ExtraAlignSpace
, DL
, MVT::i64
));
3240 // Get the new stack pointer value.
3241 SDValue NewSP
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, OldSP
, NeededSpace
);
3243 // Copy the new stack pointer back.
3244 Chain
= DAG
.getCopyToReg(Chain
, DL
, SPReg
, NewSP
);
3246 // The allocated data lives above the 160 bytes allocated for the standard
3247 // frame, plus any outgoing stack arguments. We don't know how much that
3248 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3249 SDValue ArgAdjust
= DAG
.getNode(SystemZISD::ADJDYNALLOC
, DL
, MVT::i64
);
3250 SDValue Result
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, NewSP
, ArgAdjust
);
3252 // Dynamically realign if needed.
3253 if (RequiredAlign
> StackAlign
) {
3255 DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Result
,
3256 DAG
.getConstant(ExtraAlignSpace
, DL
, MVT::i64
));
3258 DAG
.getNode(ISD::AND
, DL
, MVT::i64
, Result
,
3259 DAG
.getConstant(~(RequiredAlign
- 1), DL
, MVT::i64
));
3263 Chain
= DAG
.getStore(Chain
, DL
, Backchain
, NewSP
, MachinePointerInfo());
3265 SDValue Ops
[2] = { Result
, Chain
};
3266 return DAG
.getMergeValues(Ops
, DL
);
3269 SDValue
SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3270 SDValue Op
, SelectionDAG
&DAG
) const {
3273 return DAG
.getNode(SystemZISD::ADJDYNALLOC
, DL
, MVT::i64
);
3276 SDValue
SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op
,
3277 SelectionDAG
&DAG
) const {
3278 EVT VT
= Op
.getValueType();
3282 // Just do a normal 64-bit multiplication and extract the results.
3283 // We define this so that it can be used for constant division.
3284 lowerMUL_LOHI32(DAG
, DL
, ISD::SIGN_EXTEND
, Op
.getOperand(0),
3285 Op
.getOperand(1), Ops
[1], Ops
[0]);
3286 else if (Subtarget
.hasMiscellaneousExtensions2())
3287 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3288 // the high result in the even register. ISD::SMUL_LOHI is defined to
3289 // return the low half first, so the results are in reverse order.
3290 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::SMUL_LOHI
,
3291 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3293 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3295 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3297 // but using the fact that the upper halves are either all zeros
3300 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3302 // and grouping the right terms together since they are quicker than the
3305 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3306 SDValue C63
= DAG
.getConstant(63, DL
, MVT::i64
);
3307 SDValue LL
= Op
.getOperand(0);
3308 SDValue RL
= Op
.getOperand(1);
3309 SDValue LH
= DAG
.getNode(ISD::SRA
, DL
, VT
, LL
, C63
);
3310 SDValue RH
= DAG
.getNode(ISD::SRA
, DL
, VT
, RL
, C63
);
3311 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3312 // the high result in the even register. ISD::SMUL_LOHI is defined to
3313 // return the low half first, so the results are in reverse order.
3314 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UMUL_LOHI
,
3315 LL
, RL
, Ops
[1], Ops
[0]);
3316 SDValue NegLLTimesRH
= DAG
.getNode(ISD::AND
, DL
, VT
, LL
, RH
);
3317 SDValue NegLHTimesRL
= DAG
.getNode(ISD::AND
, DL
, VT
, LH
, RL
);
3318 SDValue NegSum
= DAG
.getNode(ISD::ADD
, DL
, VT
, NegLLTimesRH
, NegLHTimesRL
);
3319 Ops
[1] = DAG
.getNode(ISD::SUB
, DL
, VT
, Ops
[1], NegSum
);
3321 return DAG
.getMergeValues(Ops
, DL
);
3324 SDValue
SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op
,
3325 SelectionDAG
&DAG
) const {
3326 EVT VT
= Op
.getValueType();
3330 // Just do a normal 64-bit multiplication and extract the results.
3331 // We define this so that it can be used for constant division.
3332 lowerMUL_LOHI32(DAG
, DL
, ISD::ZERO_EXTEND
, Op
.getOperand(0),
3333 Op
.getOperand(1), Ops
[1], Ops
[0]);
3335 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3336 // the high result in the even register. ISD::UMUL_LOHI is defined to
3337 // return the low half first, so the results are in reverse order.
3338 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UMUL_LOHI
,
3339 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3340 return DAG
.getMergeValues(Ops
, DL
);
3343 SDValue
SystemZTargetLowering::lowerSDIVREM(SDValue Op
,
3344 SelectionDAG
&DAG
) const {
3345 SDValue Op0
= Op
.getOperand(0);
3346 SDValue Op1
= Op
.getOperand(1);
3347 EVT VT
= Op
.getValueType();
3350 // We use DSGF for 32-bit division. This means the first operand must
3351 // always be 64-bit, and the second operand should be 32-bit whenever
3352 // that is possible, to improve performance.
3354 Op0
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i64
, Op0
);
3355 else if (DAG
.ComputeNumSignBits(Op1
) > 32)
3356 Op1
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Op1
);
3358 // DSG(F) returns the remainder in the even register and the
3359 // quotient in the odd register.
3361 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::SDIVREM
, Op0
, Op1
, Ops
[1], Ops
[0]);
3362 return DAG
.getMergeValues(Ops
, DL
);
3365 SDValue
SystemZTargetLowering::lowerUDIVREM(SDValue Op
,
3366 SelectionDAG
&DAG
) const {
3367 EVT VT
= Op
.getValueType();
3370 // DL(G) returns the remainder in the even register and the
3371 // quotient in the odd register.
3373 lowerGR128Binary(DAG
, DL
, VT
, SystemZISD::UDIVREM
,
3374 Op
.getOperand(0), Op
.getOperand(1), Ops
[1], Ops
[0]);
3375 return DAG
.getMergeValues(Ops
, DL
);
3378 SDValue
SystemZTargetLowering::lowerOR(SDValue Op
, SelectionDAG
&DAG
) const {
3379 assert(Op
.getValueType() == MVT::i64
&& "Should be 64-bit operation");
3381 // Get the known-zero masks for each operand.
3382 SDValue Ops
[] = {Op
.getOperand(0), Op
.getOperand(1)};
3383 KnownBits Known
[2] = {DAG
.computeKnownBits(Ops
[0]),
3384 DAG
.computeKnownBits(Ops
[1])};
3386 // See if the upper 32 bits of one operand and the lower 32 bits of the
3387 // other are known zero. They are the low and high operands respectively.
3388 uint64_t Masks
[] = { Known
[0].Zero
.getZExtValue(),
3389 Known
[1].Zero
.getZExtValue() };
3391 if ((Masks
[0] >> 32) == 0xffffffff && uint32_t(Masks
[1]) == 0xffffffff)
3393 else if ((Masks
[1] >> 32) == 0xffffffff && uint32_t(Masks
[0]) == 0xffffffff)
3398 SDValue LowOp
= Ops
[Low
];
3399 SDValue HighOp
= Ops
[High
];
3401 // If the high part is a constant, we're better off using IILH.
3402 if (HighOp
.getOpcode() == ISD::Constant
)
3405 // If the low part is a constant that is outside the range of LHI,
3406 // then we're better off using IILF.
3407 if (LowOp
.getOpcode() == ISD::Constant
) {
3408 int64_t Value
= int32_t(cast
<ConstantSDNode
>(LowOp
)->getZExtValue());
3409 if (!isInt
<16>(Value
))
3413 // Check whether the high part is an AND that doesn't change the
3414 // high 32 bits and just masks out low bits. We can skip it if so.
3415 if (HighOp
.getOpcode() == ISD::AND
&&
3416 HighOp
.getOperand(1).getOpcode() == ISD::Constant
) {
3417 SDValue HighOp0
= HighOp
.getOperand(0);
3418 uint64_t Mask
= cast
<ConstantSDNode
>(HighOp
.getOperand(1))->getZExtValue();
3419 if (DAG
.MaskedValueIsZero(HighOp0
, APInt(64, ~(Mask
| 0xffffffff))))
3423 // Take advantage of the fact that all GR32 operations only change the
3424 // low 32 bits by truncating Low to an i32 and inserting it directly
3425 // using a subreg. The interesting cases are those where the truncation
3428 SDValue Low32
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, LowOp
);
3429 return DAG
.getTargetInsertSubreg(SystemZ::subreg_l32
, DL
,
3430 MVT::i64
, HighOp
, Low32
);
3433 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3434 SDValue
SystemZTargetLowering::lowerXALUO(SDValue Op
,
3435 SelectionDAG
&DAG
) const {
3436 SDNode
*N
= Op
.getNode();
3437 SDValue LHS
= N
->getOperand(0);
3438 SDValue RHS
= N
->getOperand(1);
3440 unsigned BaseOp
= 0;
3441 unsigned CCValid
= 0;
3442 unsigned CCMask
= 0;
3444 switch (Op
.getOpcode()) {
3445 default: llvm_unreachable("Unknown instruction!");
3447 BaseOp
= SystemZISD::SADDO
;
3448 CCValid
= SystemZ::CCMASK_ARITH
;
3449 CCMask
= SystemZ::CCMASK_ARITH_OVERFLOW
;
3452 BaseOp
= SystemZISD::SSUBO
;
3453 CCValid
= SystemZ::CCMASK_ARITH
;
3454 CCMask
= SystemZ::CCMASK_ARITH_OVERFLOW
;
3457 BaseOp
= SystemZISD::UADDO
;
3458 CCValid
= SystemZ::CCMASK_LOGICAL
;
3459 CCMask
= SystemZ::CCMASK_LOGICAL_CARRY
;
3462 BaseOp
= SystemZISD::USUBO
;
3463 CCValid
= SystemZ::CCMASK_LOGICAL
;
3464 CCMask
= SystemZ::CCMASK_LOGICAL_BORROW
;
3468 SDVTList VTs
= DAG
.getVTList(N
->getValueType(0), MVT::i32
);
3469 SDValue Result
= DAG
.getNode(BaseOp
, DL
, VTs
, LHS
, RHS
);
3471 SDValue SetCC
= emitSETCC(DAG
, DL
, Result
.getValue(1), CCValid
, CCMask
);
3472 if (N
->getValueType(1) == MVT::i1
)
3473 SetCC
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, SetCC
);
3475 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, N
->getVTList(), Result
, SetCC
);
3478 static bool isAddCarryChain(SDValue Carry
) {
3479 while (Carry
.getOpcode() == ISD::ADDCARRY
)
3480 Carry
= Carry
.getOperand(2);
3481 return Carry
.getOpcode() == ISD::UADDO
;
3484 static bool isSubBorrowChain(SDValue Carry
) {
3485 while (Carry
.getOpcode() == ISD::SUBCARRY
)
3486 Carry
= Carry
.getOperand(2);
3487 return Carry
.getOpcode() == ISD::USUBO
;
3490 // Lower ADDCARRY/SUBCARRY nodes.
3491 SDValue
SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op
,
3492 SelectionDAG
&DAG
) const {
3494 SDNode
*N
= Op
.getNode();
3495 MVT VT
= N
->getSimpleValueType(0);
3497 // Let legalize expand this if it isn't a legal type yet.
3498 if (!DAG
.getTargetLoweringInfo().isTypeLegal(VT
))
3501 SDValue LHS
= N
->getOperand(0);
3502 SDValue RHS
= N
->getOperand(1);
3503 SDValue Carry
= Op
.getOperand(2);
3505 unsigned BaseOp
= 0;
3506 unsigned CCValid
= 0;
3507 unsigned CCMask
= 0;
3509 switch (Op
.getOpcode()) {
3510 default: llvm_unreachable("Unknown instruction!");
3512 if (!isAddCarryChain(Carry
))
3515 BaseOp
= SystemZISD::ADDCARRY
;
3516 CCValid
= SystemZ::CCMASK_LOGICAL
;
3517 CCMask
= SystemZ::CCMASK_LOGICAL_CARRY
;
3520 if (!isSubBorrowChain(Carry
))
3523 BaseOp
= SystemZISD::SUBCARRY
;
3524 CCValid
= SystemZ::CCMASK_LOGICAL
;
3525 CCMask
= SystemZ::CCMASK_LOGICAL_BORROW
;
3529 // Set the condition code from the carry flag.
3530 Carry
= DAG
.getNode(SystemZISD::GET_CCMASK
, DL
, MVT::i32
, Carry
,
3531 DAG
.getConstant(CCValid
, DL
, MVT::i32
),
3532 DAG
.getConstant(CCMask
, DL
, MVT::i32
));
3534 SDVTList VTs
= DAG
.getVTList(VT
, MVT::i32
);
3535 SDValue Result
= DAG
.getNode(BaseOp
, DL
, VTs
, LHS
, RHS
, Carry
);
3537 SDValue SetCC
= emitSETCC(DAG
, DL
, Result
.getValue(1), CCValid
, CCMask
);
3538 if (N
->getValueType(1) == MVT::i1
)
3539 SetCC
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, SetCC
);
3541 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, N
->getVTList(), Result
, SetCC
);
3544 SDValue
SystemZTargetLowering::lowerCTPOP(SDValue Op
,
3545 SelectionDAG
&DAG
) const {
3546 EVT VT
= Op
.getValueType();
3548 Op
= Op
.getOperand(0);
3550 // Handle vector types via VPOPCT.
3551 if (VT
.isVector()) {
3552 Op
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v16i8
, Op
);
3553 Op
= DAG
.getNode(SystemZISD::POPCNT
, DL
, MVT::v16i8
, Op
);
3554 switch (VT
.getScalarSizeInBits()) {
3558 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
3559 SDValue Shift
= DAG
.getConstant(8, DL
, MVT::i32
);
3560 SDValue Tmp
= DAG
.getNode(SystemZISD::VSHL_BY_SCALAR
, DL
, VT
, Op
, Shift
);
3561 Op
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op
, Tmp
);
3562 Op
= DAG
.getNode(SystemZISD::VSRL_BY_SCALAR
, DL
, VT
, Op
, Shift
);
3566 SDValue Tmp
= DAG
.getSplatBuildVector(MVT::v16i8
, DL
,
3567 DAG
.getConstant(0, DL
, MVT::i32
));
3568 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, VT
, Op
, Tmp
);
3572 SDValue Tmp
= DAG
.getSplatBuildVector(MVT::v16i8
, DL
,
3573 DAG
.getConstant(0, DL
, MVT::i32
));
3574 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, MVT::v4i32
, Op
, Tmp
);
3575 Op
= DAG
.getNode(SystemZISD::VSUM
, DL
, VT
, Op
, Tmp
);
3579 llvm_unreachable("Unexpected type");
3584 // Get the known-zero mask for the operand.
3585 KnownBits Known
= DAG
.computeKnownBits(Op
);
3586 unsigned NumSignificantBits
= (~Known
.Zero
).getActiveBits();
3587 if (NumSignificantBits
== 0)
3588 return DAG
.getConstant(0, DL
, VT
);
3590 // Skip known-zero high parts of the operand.
3591 int64_t OrigBitSize
= VT
.getSizeInBits();
3592 int64_t BitSize
= (int64_t)1 << Log2_32_Ceil(NumSignificantBits
);
3593 BitSize
= std::min(BitSize
, OrigBitSize
);
3595 // The POPCNT instruction counts the number of bits in each byte.
3596 Op
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op
);
3597 Op
= DAG
.getNode(SystemZISD::POPCNT
, DL
, MVT::i64
, Op
);
3598 Op
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Op
);
3600 // Add up per-byte counts in a binary tree. All bits of Op at
3601 // position larger than BitSize remain zero throughout.
3602 for (int64_t I
= BitSize
/ 2; I
>= 8; I
= I
/ 2) {
3603 SDValue Tmp
= DAG
.getNode(ISD::SHL
, DL
, VT
, Op
, DAG
.getConstant(I
, DL
, VT
));
3604 if (BitSize
!= OrigBitSize
)
3605 Tmp
= DAG
.getNode(ISD::AND
, DL
, VT
, Tmp
,
3606 DAG
.getConstant(((uint64_t)1 << BitSize
) - 1, DL
, VT
));
3607 Op
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op
, Tmp
);
3610 // Extract overall result from high byte.
3612 Op
= DAG
.getNode(ISD::SRL
, DL
, VT
, Op
,
3613 DAG
.getConstant(BitSize
- 8, DL
, VT
));
3618 SDValue
SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op
,
3619 SelectionDAG
&DAG
) const {
3621 AtomicOrdering FenceOrdering
= static_cast<AtomicOrdering
>(
3622 cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue());
3623 SyncScope::ID FenceSSID
= static_cast<SyncScope::ID
>(
3624 cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue());
3626 // The only fence that needs an instruction is a sequentially-consistent
3627 // cross-thread fence.
3628 if (FenceOrdering
== AtomicOrdering::SequentiallyConsistent
&&
3629 FenceSSID
== SyncScope::System
) {
3630 return SDValue(DAG
.getMachineNode(SystemZ::Serialize
, DL
, MVT::Other
,
3635 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3636 return DAG
.getNode(SystemZISD::MEMBARRIER
, DL
, MVT::Other
, Op
.getOperand(0));
3639 // Op is an atomic load. Lower it into a normal volatile load.
3640 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op
,
3641 SelectionDAG
&DAG
) const {
3642 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3643 return DAG
.getExtLoad(ISD::EXTLOAD
, SDLoc(Op
), Op
.getValueType(),
3644 Node
->getChain(), Node
->getBasePtr(),
3645 Node
->getMemoryVT(), Node
->getMemOperand());
3648 // Op is an atomic store. Lower it into a normal volatile store.
3649 SDValue
SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op
,
3650 SelectionDAG
&DAG
) const {
3651 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3652 SDValue Chain
= DAG
.getTruncStore(Node
->getChain(), SDLoc(Op
), Node
->getVal(),
3653 Node
->getBasePtr(), Node
->getMemoryVT(),
3654 Node
->getMemOperand());
3655 // We have to enforce sequential consistency by performing a
3656 // serialization operation after the store.
3657 if (Node
->getOrdering() == AtomicOrdering::SequentiallyConsistent
)
3658 Chain
= SDValue(DAG
.getMachineNode(SystemZ::Serialize
, SDLoc(Op
),
3659 MVT::Other
, Chain
), 0);
3663 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3664 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3665 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op
,
3667 unsigned Opcode
) const {
3668 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3670 // 32-bit operations need no code outside the main loop.
3671 EVT NarrowVT
= Node
->getMemoryVT();
3672 EVT WideVT
= MVT::i32
;
3673 if (NarrowVT
== WideVT
)
3676 int64_t BitSize
= NarrowVT
.getSizeInBits();
3677 SDValue ChainIn
= Node
->getChain();
3678 SDValue Addr
= Node
->getBasePtr();
3679 SDValue Src2
= Node
->getVal();
3680 MachineMemOperand
*MMO
= Node
->getMemOperand();
3682 EVT PtrVT
= Addr
.getValueType();
3684 // Convert atomic subtracts of constants into additions.
3685 if (Opcode
== SystemZISD::ATOMIC_LOADW_SUB
)
3686 if (auto *Const
= dyn_cast
<ConstantSDNode
>(Src2
)) {
3687 Opcode
= SystemZISD::ATOMIC_LOADW_ADD
;
3688 Src2
= DAG
.getConstant(-Const
->getSExtValue(), DL
, Src2
.getValueType());
3691 // Get the address of the containing word.
3692 SDValue AlignedAddr
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, Addr
,
3693 DAG
.getConstant(-4, DL
, PtrVT
));
3695 // Get the number of bits that the word must be rotated left in order
3696 // to bring the field to the top bits of a GR32.
3697 SDValue BitShift
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, Addr
,
3698 DAG
.getConstant(3, DL
, PtrVT
));
3699 BitShift
= DAG
.getNode(ISD::TRUNCATE
, DL
, WideVT
, BitShift
);
3701 // Get the complementing shift amount, for rotating a field in the top
3702 // bits back to its proper position.
3703 SDValue NegBitShift
= DAG
.getNode(ISD::SUB
, DL
, WideVT
,
3704 DAG
.getConstant(0, DL
, WideVT
), BitShift
);
3706 // Extend the source operand to 32 bits and prepare it for the inner loop.
3707 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3708 // operations require the source to be shifted in advance. (This shift
3709 // can be folded if the source is constant.) For AND and NAND, the lower
3710 // bits must be set, while for other opcodes they should be left clear.
3711 if (Opcode
!= SystemZISD::ATOMIC_SWAPW
)
3712 Src2
= DAG
.getNode(ISD::SHL
, DL
, WideVT
, Src2
,
3713 DAG
.getConstant(32 - BitSize
, DL
, WideVT
));
3714 if (Opcode
== SystemZISD::ATOMIC_LOADW_AND
||
3715 Opcode
== SystemZISD::ATOMIC_LOADW_NAND
)
3716 Src2
= DAG
.getNode(ISD::OR
, DL
, WideVT
, Src2
,
3717 DAG
.getConstant(uint32_t(-1) >> BitSize
, DL
, WideVT
));
3719 // Construct the ATOMIC_LOADW_* node.
3720 SDVTList VTList
= DAG
.getVTList(WideVT
, MVT::Other
);
3721 SDValue Ops
[] = { ChainIn
, AlignedAddr
, Src2
, BitShift
, NegBitShift
,
3722 DAG
.getConstant(BitSize
, DL
, WideVT
) };
3723 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(Opcode
, DL
, VTList
, Ops
,
3726 // Rotate the result of the final CS so that the field is in the lower
3727 // bits of a GR32, then truncate it.
3728 SDValue ResultShift
= DAG
.getNode(ISD::ADD
, DL
, WideVT
, BitShift
,
3729 DAG
.getConstant(BitSize
, DL
, WideVT
));
3730 SDValue Result
= DAG
.getNode(ISD::ROTL
, DL
, WideVT
, AtomicOp
, ResultShift
);
3732 SDValue RetOps
[2] = { Result
, AtomicOp
.getValue(1) };
3733 return DAG
.getMergeValues(RetOps
, DL
);
3736 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
3737 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3738 // operations into additions.
3739 SDValue
SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op
,
3740 SelectionDAG
&DAG
) const {
3741 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3742 EVT MemVT
= Node
->getMemoryVT();
3743 if (MemVT
== MVT::i32
|| MemVT
== MVT::i64
) {
3744 // A full-width operation.
3745 assert(Op
.getValueType() == MemVT
&& "Mismatched VTs");
3746 SDValue Src2
= Node
->getVal();
3750 if (auto *Op2
= dyn_cast
<ConstantSDNode
>(Src2
)) {
3751 // Use an addition if the operand is constant and either LAA(G) is
3752 // available or the negative value is in the range of A(G)FHI.
3753 int64_t Value
= (-Op2
->getAPIntValue()).getSExtValue();
3754 if (isInt
<32>(Value
) || Subtarget
.hasInterlockedAccess1())
3755 NegSrc2
= DAG
.getConstant(Value
, DL
, MemVT
);
3756 } else if (Subtarget
.hasInterlockedAccess1())
3757 // Use LAA(G) if available.
3758 NegSrc2
= DAG
.getNode(ISD::SUB
, DL
, MemVT
, DAG
.getConstant(0, DL
, MemVT
),
3761 if (NegSrc2
.getNode())
3762 return DAG
.getAtomic(ISD::ATOMIC_LOAD_ADD
, DL
, MemVT
,
3763 Node
->getChain(), Node
->getBasePtr(), NegSrc2
,
3764 Node
->getMemOperand());
3766 // Use the node as-is.
3770 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_SUB
);
3773 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
3774 SDValue
SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op
,
3775 SelectionDAG
&DAG
) const {
3776 auto *Node
= cast
<AtomicSDNode
>(Op
.getNode());
3777 SDValue ChainIn
= Node
->getOperand(0);
3778 SDValue Addr
= Node
->getOperand(1);
3779 SDValue CmpVal
= Node
->getOperand(2);
3780 SDValue SwapVal
= Node
->getOperand(3);
3781 MachineMemOperand
*MMO
= Node
->getMemOperand();
3784 // We have native support for 32-bit and 64-bit compare and swap, but we
3785 // still need to expand extracting the "success" result from the CC.
3786 EVT NarrowVT
= Node
->getMemoryVT();
3787 EVT WideVT
= NarrowVT
== MVT::i64
? MVT::i64
: MVT::i32
;
3788 if (NarrowVT
== WideVT
) {
3789 SDVTList Tys
= DAG
.getVTList(WideVT
, MVT::i32
, MVT::Other
);
3790 SDValue Ops
[] = { ChainIn
, Addr
, CmpVal
, SwapVal
};
3791 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP
,
3792 DL
, Tys
, Ops
, NarrowVT
, MMO
);
3793 SDValue Success
= emitSETCC(DAG
, DL
, AtomicOp
.getValue(1),
3794 SystemZ::CCMASK_CS
, SystemZ::CCMASK_CS_EQ
);
3796 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(0), AtomicOp
.getValue(0));
3797 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(1), Success
);
3798 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(2), AtomicOp
.getValue(2));
3802 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
3803 // via a fullword ATOMIC_CMP_SWAPW operation.
3804 int64_t BitSize
= NarrowVT
.getSizeInBits();
3805 EVT PtrVT
= Addr
.getValueType();
3807 // Get the address of the containing word.
3808 SDValue AlignedAddr
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, Addr
,
3809 DAG
.getConstant(-4, DL
, PtrVT
));
3811 // Get the number of bits that the word must be rotated left in order
3812 // to bring the field to the top bits of a GR32.
3813 SDValue BitShift
= DAG
.getNode(ISD::SHL
, DL
, PtrVT
, Addr
,
3814 DAG
.getConstant(3, DL
, PtrVT
));
3815 BitShift
= DAG
.getNode(ISD::TRUNCATE
, DL
, WideVT
, BitShift
);
3817 // Get the complementing shift amount, for rotating a field in the top
3818 // bits back to its proper position.
3819 SDValue NegBitShift
= DAG
.getNode(ISD::SUB
, DL
, WideVT
,
3820 DAG
.getConstant(0, DL
, WideVT
), BitShift
);
3822 // Construct the ATOMIC_CMP_SWAPW node.
3823 SDVTList VTList
= DAG
.getVTList(WideVT
, MVT::i32
, MVT::Other
);
3824 SDValue Ops
[] = { ChainIn
, AlignedAddr
, CmpVal
, SwapVal
, BitShift
,
3825 NegBitShift
, DAG
.getConstant(BitSize
, DL
, WideVT
) };
3826 SDValue AtomicOp
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW
, DL
,
3827 VTList
, Ops
, NarrowVT
, MMO
);
3828 SDValue Success
= emitSETCC(DAG
, DL
, AtomicOp
.getValue(1),
3829 SystemZ::CCMASK_ICMP
, SystemZ::CCMASK_CMP_EQ
);
3831 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(0), AtomicOp
.getValue(0));
3832 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(1), Success
);
3833 DAG
.ReplaceAllUsesOfValueWith(Op
.getValue(2), AtomicOp
.getValue(2));
3837 MachineMemOperand::Flags
3838 SystemZTargetLowering::getMMOFlags(const Instruction
&I
) const {
3839 // Because of how we convert atomic_load and atomic_store to normal loads and
3840 // stores in the DAG, we need to ensure that the MMOs are marked volatile
3841 // since DAGCombine hasn't been updated to account for atomic, but non
3842 // volatile loads. (See D57601)
3843 if (auto *SI
= dyn_cast
<StoreInst
>(&I
))
3845 return MachineMemOperand::MOVolatile
;
3846 if (auto *LI
= dyn_cast
<LoadInst
>(&I
))
3848 return MachineMemOperand::MOVolatile
;
3849 if (auto *AI
= dyn_cast
<AtomicRMWInst
>(&I
))
3851 return MachineMemOperand::MOVolatile
;
3852 if (auto *AI
= dyn_cast
<AtomicCmpXchgInst
>(&I
))
3854 return MachineMemOperand::MOVolatile
;
3855 return MachineMemOperand::MONone
;
3858 SDValue
SystemZTargetLowering::lowerSTACKSAVE(SDValue Op
,
3859 SelectionDAG
&DAG
) const {
3860 MachineFunction
&MF
= DAG
.getMachineFunction();
3861 MF
.getInfo
<SystemZMachineFunctionInfo
>()->setManipulatesSP(true);
3862 return DAG
.getCopyFromReg(Op
.getOperand(0), SDLoc(Op
),
3863 SystemZ::R15D
, Op
.getValueType());
3866 SDValue
SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op
,
3867 SelectionDAG
&DAG
) const {
3868 MachineFunction
&MF
= DAG
.getMachineFunction();
3869 MF
.getInfo
<SystemZMachineFunctionInfo
>()->setManipulatesSP(true);
3870 bool StoreBackchain
= MF
.getFunction().hasFnAttribute("backchain");
3872 SDValue Chain
= Op
.getOperand(0);
3873 SDValue NewSP
= Op
.getOperand(1);
3877 if (StoreBackchain
) {
3878 SDValue OldSP
= DAG
.getCopyFromReg(Chain
, DL
, SystemZ::R15D
, MVT::i64
);
3879 Backchain
= DAG
.getLoad(MVT::i64
, DL
, Chain
, OldSP
, MachinePointerInfo());
3882 Chain
= DAG
.getCopyToReg(Chain
, DL
, SystemZ::R15D
, NewSP
);
3885 Chain
= DAG
.getStore(Chain
, DL
, Backchain
, NewSP
, MachinePointerInfo());
3890 SDValue
SystemZTargetLowering::lowerPREFETCH(SDValue Op
,
3891 SelectionDAG
&DAG
) const {
3892 bool IsData
= cast
<ConstantSDNode
>(Op
.getOperand(4))->getZExtValue();
3894 // Just preserve the chain.
3895 return Op
.getOperand(0);
3898 bool IsWrite
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue();
3899 unsigned Code
= IsWrite
? SystemZ::PFD_WRITE
: SystemZ::PFD_READ
;
3900 auto *Node
= cast
<MemIntrinsicSDNode
>(Op
.getNode());
3903 DAG
.getConstant(Code
, DL
, MVT::i32
),
3906 return DAG
.getMemIntrinsicNode(SystemZISD::PREFETCH
, DL
,
3907 Node
->getVTList(), Ops
,
3908 Node
->getMemoryVT(), Node
->getMemOperand());
3911 // Convert condition code in CCReg to an i32 value.
3912 static SDValue
getCCResult(SelectionDAG
&DAG
, SDValue CCReg
) {
3914 SDValue IPM
= DAG
.getNode(SystemZISD::IPM
, DL
, MVT::i32
, CCReg
);
3915 return DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, IPM
,
3916 DAG
.getConstant(SystemZ::IPM_CC
, DL
, MVT::i32
));
3920 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op
,
3921 SelectionDAG
&DAG
) const {
3922 unsigned Opcode
, CCValid
;
3923 if (isIntrinsicWithCCAndChain(Op
, Opcode
, CCValid
)) {
3924 assert(Op
->getNumValues() == 2 && "Expected only CC result and chain");
3925 SDNode
*Node
= emitIntrinsicWithCCAndChain(DAG
, Op
, Opcode
);
3926 SDValue CC
= getCCResult(DAG
, SDValue(Node
, 0));
3927 DAG
.ReplaceAllUsesOfValueWith(SDValue(Op
.getNode(), 0), CC
);
3935 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op
,
3936 SelectionDAG
&DAG
) const {
3937 unsigned Opcode
, CCValid
;
3938 if (isIntrinsicWithCC(Op
, Opcode
, CCValid
)) {
3939 SDNode
*Node
= emitIntrinsicWithCC(DAG
, Op
, Opcode
);
3940 if (Op
->getNumValues() == 1)
3941 return getCCResult(DAG
, SDValue(Node
, 0));
3942 assert(Op
->getNumValues() == 2 && "Expected a CC and non-CC result");
3943 return DAG
.getNode(ISD::MERGE_VALUES
, SDLoc(Op
), Op
->getVTList(),
3944 SDValue(Node
, 0), getCCResult(DAG
, SDValue(Node
, 1)));
3947 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
3949 case Intrinsic::thread_pointer
:
3950 return lowerThreadPointer(SDLoc(Op
), DAG
);
3952 case Intrinsic::s390_vpdi
:
3953 return DAG
.getNode(SystemZISD::PERMUTE_DWORDS
, SDLoc(Op
), Op
.getValueType(),
3954 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
3956 case Intrinsic::s390_vperm
:
3957 return DAG
.getNode(SystemZISD::PERMUTE
, SDLoc(Op
), Op
.getValueType(),
3958 Op
.getOperand(1), Op
.getOperand(2), Op
.getOperand(3));
3960 case Intrinsic::s390_vuphb
:
3961 case Intrinsic::s390_vuphh
:
3962 case Intrinsic::s390_vuphf
:
3963 return DAG
.getNode(SystemZISD::UNPACK_HIGH
, SDLoc(Op
), Op
.getValueType(),
3966 case Intrinsic::s390_vuplhb
:
3967 case Intrinsic::s390_vuplhh
:
3968 case Intrinsic::s390_vuplhf
:
3969 return DAG
.getNode(SystemZISD::UNPACKL_HIGH
, SDLoc(Op
), Op
.getValueType(),
3972 case Intrinsic::s390_vuplb
:
3973 case Intrinsic::s390_vuplhw
:
3974 case Intrinsic::s390_vuplf
:
3975 return DAG
.getNode(SystemZISD::UNPACK_LOW
, SDLoc(Op
), Op
.getValueType(),
3978 case Intrinsic::s390_vupllb
:
3979 case Intrinsic::s390_vupllh
:
3980 case Intrinsic::s390_vupllf
:
3981 return DAG
.getNode(SystemZISD::UNPACKL_LOW
, SDLoc(Op
), Op
.getValueType(),
3984 case Intrinsic::s390_vsumb
:
3985 case Intrinsic::s390_vsumh
:
3986 case Intrinsic::s390_vsumgh
:
3987 case Intrinsic::s390_vsumgf
:
3988 case Intrinsic::s390_vsumqf
:
3989 case Intrinsic::s390_vsumqg
:
3990 return DAG
.getNode(SystemZISD::VSUM
, SDLoc(Op
), Op
.getValueType(),
3991 Op
.getOperand(1), Op
.getOperand(2));
3998 // Says that SystemZISD operation Opcode can be used to perform the equivalent
3999 // of a VPERM with permute vector Bytes. If Opcode takes three operands,
4000 // Operand is the constant third operand, otherwise it is the number of
4001 // bytes in each element of the result.
4005 unsigned char Bytes
[SystemZ::VectorBytes
];
4009 static const Permute PermuteForms
[] = {
4011 { SystemZISD::MERGE_HIGH
, 8,
4012 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4014 { SystemZISD::MERGE_HIGH
, 4,
4015 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4017 { SystemZISD::MERGE_HIGH
, 2,
4018 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4020 { SystemZISD::MERGE_HIGH
, 1,
4021 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4023 { SystemZISD::MERGE_LOW
, 8,
4024 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4026 { SystemZISD::MERGE_LOW
, 4,
4027 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4029 { SystemZISD::MERGE_LOW
, 2,
4030 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4032 { SystemZISD::MERGE_LOW
, 1,
4033 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4035 { SystemZISD::PACK
, 4,
4036 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4038 { SystemZISD::PACK
, 2,
4039 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4041 { SystemZISD::PACK
, 1,
4042 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4043 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4044 { SystemZISD::PERMUTE_DWORDS
, 4,
4045 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4046 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4047 { SystemZISD::PERMUTE_DWORDS
, 1,
4048 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4051 // Called after matching a vector shuffle against a particular pattern.
4052 // Both the original shuffle and the pattern have two vector operands.
4053 // OpNos[0] is the operand of the original shuffle that should be used for
4054 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4055 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4056 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4057 // for operands 0 and 1 of the pattern.
4058 static bool chooseShuffleOpNos(int *OpNos
, unsigned &OpNo0
, unsigned &OpNo1
) {
4062 OpNo0
= OpNo1
= OpNos
[1];
4063 } else if (OpNos
[1] < 0) {
4064 OpNo0
= OpNo1
= OpNos
[0];
4072 // Bytes is a VPERM-like permute vector, except that -1 is used for
4073 // undefined bytes. Return true if the VPERM can be implemented using P.
4074 // When returning true set OpNo0 to the VPERM operand that should be
4075 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4077 // For example, if swapping the VPERM operands allows P to match, OpNo0
4078 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4079 // operand, but rewriting it to use two duplicated operands allows it to
4080 // match P, then OpNo0 and OpNo1 will be the same.
4081 static bool matchPermute(const SmallVectorImpl
<int> &Bytes
, const Permute
&P
,
4082 unsigned &OpNo0
, unsigned &OpNo1
) {
4083 int OpNos
[] = { -1, -1 };
4084 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
) {
4087 // Make sure that the two permute vectors use the same suboperand
4088 // byte number. Only the operand numbers (the high bits) are
4089 // allowed to differ.
4090 if ((Elt
^ P
.Bytes
[I
]) & (SystemZ::VectorBytes
- 1))
4092 int ModelOpNo
= P
.Bytes
[I
] / SystemZ::VectorBytes
;
4093 int RealOpNo
= unsigned(Elt
) / SystemZ::VectorBytes
;
4094 // Make sure that the operand mappings are consistent with previous
4096 if (OpNos
[ModelOpNo
] == 1 - RealOpNo
)
4098 OpNos
[ModelOpNo
] = RealOpNo
;
4101 return chooseShuffleOpNos(OpNos
, OpNo0
, OpNo1
);
4104 // As above, but search for a matching permute.
4105 static const Permute
*matchPermute(const SmallVectorImpl
<int> &Bytes
,
4106 unsigned &OpNo0
, unsigned &OpNo1
) {
4107 for (auto &P
: PermuteForms
)
4108 if (matchPermute(Bytes
, P
, OpNo0
, OpNo1
))
4113 // Bytes is a VPERM-like permute vector, except that -1 is used for
4114 // undefined bytes. This permute is an operand of an outer permute.
4115 // See whether redistributing the -1 bytes gives a shuffle that can be
4116 // implemented using P. If so, set Transform to a VPERM-like permute vector
4117 // that, when applied to the result of P, gives the original permute in Bytes.
4118 static bool matchDoublePermute(const SmallVectorImpl
<int> &Bytes
,
4120 SmallVectorImpl
<int> &Transform
) {
4122 for (unsigned From
= 0; From
< SystemZ::VectorBytes
; ++From
) {
4123 int Elt
= Bytes
[From
];
4125 // Byte number From of the result is undefined.
4126 Transform
[From
] = -1;
4128 while (P
.Bytes
[To
] != Elt
) {
4130 if (To
== SystemZ::VectorBytes
)
4133 Transform
[From
] = To
;
4139 // As above, but search for a matching permute.
4140 static const Permute
*matchDoublePermute(const SmallVectorImpl
<int> &Bytes
,
4141 SmallVectorImpl
<int> &Transform
) {
4142 for (auto &P
: PermuteForms
)
4143 if (matchDoublePermute(Bytes
, P
, Transform
))
4148 // Convert the mask of the given shuffle op into a byte-level mask,
4149 // as if it had type vNi8.
4150 static bool getVPermMask(SDValue ShuffleOp
,
4151 SmallVectorImpl
<int> &Bytes
) {
4152 EVT VT
= ShuffleOp
.getValueType();
4153 unsigned NumElements
= VT
.getVectorNumElements();
4154 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
4156 if (auto *VSN
= dyn_cast
<ShuffleVectorSDNode
>(ShuffleOp
)) {
4157 Bytes
.resize(NumElements
* BytesPerElement
, -1);
4158 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4159 int Index
= VSN
->getMaskElt(I
);
4161 for (unsigned J
= 0; J
< BytesPerElement
; ++J
)
4162 Bytes
[I
* BytesPerElement
+ J
] = Index
* BytesPerElement
+ J
;
4166 if (SystemZISD::SPLAT
== ShuffleOp
.getOpcode() &&
4167 isa
<ConstantSDNode
>(ShuffleOp
.getOperand(1))) {
4168 unsigned Index
= ShuffleOp
.getConstantOperandVal(1);
4169 Bytes
.resize(NumElements
* BytesPerElement
, -1);
4170 for (unsigned I
= 0; I
< NumElements
; ++I
)
4171 for (unsigned J
= 0; J
< BytesPerElement
; ++J
)
4172 Bytes
[I
* BytesPerElement
+ J
] = Index
* BytesPerElement
+ J
;
4178 // Bytes is a VPERM-like permute vector, except that -1 is used for
4179 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
4180 // the result come from a contiguous sequence of bytes from one input.
4181 // Set Base to the selector for the first byte if so.
4182 static bool getShuffleInput(const SmallVectorImpl
<int> &Bytes
, unsigned Start
,
4183 unsigned BytesPerElement
, int &Base
) {
4185 for (unsigned I
= 0; I
< BytesPerElement
; ++I
) {
4186 if (Bytes
[Start
+ I
] >= 0) {
4187 unsigned Elem
= Bytes
[Start
+ I
];
4190 // Make sure the bytes would come from one input operand.
4191 if (unsigned(Base
) % Bytes
.size() + BytesPerElement
> Bytes
.size())
4193 } else if (unsigned(Base
) != Elem
- I
)
4200 // Bytes is a VPERM-like permute vector, except that -1 is used for
4201 // undefined bytes. Return true if it can be performed using VSLDI.
4202 // When returning true, set StartIndex to the shift amount and OpNo0
4203 // and OpNo1 to the VPERM operands that should be used as the first
4204 // and second shift operand respectively.
4205 static bool isShlDoublePermute(const SmallVectorImpl
<int> &Bytes
,
4206 unsigned &StartIndex
, unsigned &OpNo0
,
4208 int OpNos
[] = { -1, -1 };
4210 for (unsigned I
= 0; I
< 16; ++I
) {
4211 int Index
= Bytes
[I
];
4213 int ExpectedShift
= (Index
- I
) % SystemZ::VectorBytes
;
4214 int ModelOpNo
= unsigned(ExpectedShift
+ I
) / SystemZ::VectorBytes
;
4215 int RealOpNo
= unsigned(Index
) / SystemZ::VectorBytes
;
4217 Shift
= ExpectedShift
;
4218 else if (Shift
!= ExpectedShift
)
4220 // Make sure that the operand mappings are consistent with previous
4222 if (OpNos
[ModelOpNo
] == 1 - RealOpNo
)
4224 OpNos
[ModelOpNo
] = RealOpNo
;
4228 return chooseShuffleOpNos(OpNos
, OpNo0
, OpNo1
);
4231 // Create a node that performs P on operands Op0 and Op1, casting the
4232 // operands to the appropriate type. The type of the result is determined by P.
4233 static SDValue
getPermuteNode(SelectionDAG
&DAG
, const SDLoc
&DL
,
4234 const Permute
&P
, SDValue Op0
, SDValue Op1
) {
4235 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4236 // elements of a PACK are twice as wide as the outputs.
4237 unsigned InBytes
= (P
.Opcode
== SystemZISD::PERMUTE_DWORDS
? 8 :
4238 P
.Opcode
== SystemZISD::PACK
? P
.Operand
* 2 :
4240 // Cast both operands to the appropriate type.
4241 MVT InVT
= MVT::getVectorVT(MVT::getIntegerVT(InBytes
* 8),
4242 SystemZ::VectorBytes
/ InBytes
);
4243 Op0
= DAG
.getNode(ISD::BITCAST
, DL
, InVT
, Op0
);
4244 Op1
= DAG
.getNode(ISD::BITCAST
, DL
, InVT
, Op1
);
4246 if (P
.Opcode
== SystemZISD::PERMUTE_DWORDS
) {
4247 SDValue Op2
= DAG
.getConstant(P
.Operand
, DL
, MVT::i32
);
4248 Op
= DAG
.getNode(SystemZISD::PERMUTE_DWORDS
, DL
, InVT
, Op0
, Op1
, Op2
);
4249 } else if (P
.Opcode
== SystemZISD::PACK
) {
4250 MVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(P
.Operand
* 8),
4251 SystemZ::VectorBytes
/ P
.Operand
);
4252 Op
= DAG
.getNode(SystemZISD::PACK
, DL
, OutVT
, Op0
, Op1
);
4254 Op
= DAG
.getNode(P
.Opcode
, DL
, InVT
, Op0
, Op1
);
4259 // Bytes is a VPERM-like permute vector, except that -1 is used for
4260 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4262 static SDValue
getGeneralPermuteNode(SelectionDAG
&DAG
, const SDLoc
&DL
,
4264 const SmallVectorImpl
<int> &Bytes
) {
4265 for (unsigned I
= 0; I
< 2; ++I
)
4266 Ops
[I
] = DAG
.getNode(ISD::BITCAST
, DL
, MVT::v16i8
, Ops
[I
]);
4268 // First see whether VSLDI can be used.
4269 unsigned StartIndex
, OpNo0
, OpNo1
;
4270 if (isShlDoublePermute(Bytes
, StartIndex
, OpNo0
, OpNo1
))
4271 return DAG
.getNode(SystemZISD::SHL_DOUBLE
, DL
, MVT::v16i8
, Ops
[OpNo0
],
4272 Ops
[OpNo1
], DAG
.getConstant(StartIndex
, DL
, MVT::i32
));
4274 // Fall back on VPERM. Construct an SDNode for the permute vector.
4275 SDValue IndexNodes
[SystemZ::VectorBytes
];
4276 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
)
4278 IndexNodes
[I
] = DAG
.getConstant(Bytes
[I
], DL
, MVT::i32
);
4280 IndexNodes
[I
] = DAG
.getUNDEF(MVT::i32
);
4281 SDValue Op2
= DAG
.getBuildVector(MVT::v16i8
, DL
, IndexNodes
);
4282 return DAG
.getNode(SystemZISD::PERMUTE
, DL
, MVT::v16i8
, Ops
[0], Ops
[1], Op2
);
4286 // Describes a general N-operand vector shuffle.
4287 struct GeneralShuffle
{
4288 GeneralShuffle(EVT vt
) : VT(vt
) {}
4290 bool add(SDValue
, unsigned);
4291 SDValue
getNode(SelectionDAG
&, const SDLoc
&);
4293 // The operands of the shuffle.
4294 SmallVector
<SDValue
, SystemZ::VectorBytes
> Ops
;
4296 // Index I is -1 if byte I of the result is undefined. Otherwise the
4297 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4298 // Bytes[I] / SystemZ::VectorBytes.
4299 SmallVector
<int, SystemZ::VectorBytes
> Bytes
;
4301 // The type of the shuffle result.
4306 // Add an extra undefined element to the shuffle.
4307 void GeneralShuffle::addUndef() {
4308 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
4309 for (unsigned I
= 0; I
< BytesPerElement
; ++I
)
4310 Bytes
.push_back(-1);
4313 // Add an extra element to the shuffle, taking it from element Elem of Op.
4314 // A null Op indicates a vector input whose value will be calculated later;
4315 // there is at most one such input per shuffle and it always has the same
4316 // type as the result. Aborts and returns false if the source vector elements
4317 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4318 // LLVM they become implicitly extended, but this is rare and not optimized.
4319 bool GeneralShuffle::add(SDValue Op
, unsigned Elem
) {
4320 unsigned BytesPerElement
= VT
.getVectorElementType().getStoreSize();
4322 // The source vector can have wider elements than the result,
4323 // either through an explicit TRUNCATE or because of type legalization.
4324 // We want the least significant part.
4325 EVT FromVT
= Op
.getNode() ? Op
.getValueType() : VT
;
4326 unsigned FromBytesPerElement
= FromVT
.getVectorElementType().getStoreSize();
4328 // Return false if the source elements are smaller than their destination
4330 if (FromBytesPerElement
< BytesPerElement
)
4333 unsigned Byte
= ((Elem
* FromBytesPerElement
) % SystemZ::VectorBytes
+
4334 (FromBytesPerElement
- BytesPerElement
));
4336 // Look through things like shuffles and bitcasts.
4337 while (Op
.getNode()) {
4338 if (Op
.getOpcode() == ISD::BITCAST
)
4339 Op
= Op
.getOperand(0);
4340 else if (Op
.getOpcode() == ISD::VECTOR_SHUFFLE
&& Op
.hasOneUse()) {
4341 // See whether the bytes we need come from a contiguous part of one
4343 SmallVector
<int, SystemZ::VectorBytes
> OpBytes
;
4344 if (!getVPermMask(Op
, OpBytes
))
4347 if (!getShuffleInput(OpBytes
, Byte
, BytesPerElement
, NewByte
))
4353 Op
= Op
.getOperand(unsigned(NewByte
) / SystemZ::VectorBytes
);
4354 Byte
= unsigned(NewByte
) % SystemZ::VectorBytes
;
4355 } else if (Op
.isUndef()) {
4362 // Make sure that the source of the extraction is in Ops.
4364 for (; OpNo
< Ops
.size(); ++OpNo
)
4365 if (Ops
[OpNo
] == Op
)
4367 if (OpNo
== Ops
.size())
4370 // Add the element to Bytes.
4371 unsigned Base
= OpNo
* SystemZ::VectorBytes
+ Byte
;
4372 for (unsigned I
= 0; I
< BytesPerElement
; ++I
)
4373 Bytes
.push_back(Base
+ I
);
4378 // Return SDNodes for the completed shuffle.
4379 SDValue
GeneralShuffle::getNode(SelectionDAG
&DAG
, const SDLoc
&DL
) {
4380 assert(Bytes
.size() == SystemZ::VectorBytes
&& "Incomplete vector");
4382 if (Ops
.size() == 0)
4383 return DAG
.getUNDEF(VT
);
4385 // Make sure that there are at least two shuffle operands.
4386 if (Ops
.size() == 1)
4387 Ops
.push_back(DAG
.getUNDEF(MVT::v16i8
));
4389 // Create a tree of shuffles, deferring root node until after the loop.
4390 // Try to redistribute the undefined elements of non-root nodes so that
4391 // the non-root shuffles match something like a pack or merge, then adjust
4392 // the parent node's permute vector to compensate for the new order.
4393 // Among other things, this copes with vectors like <2 x i16> that were
4394 // padded with undefined elements during type legalization.
4396 // In the best case this redistribution will lead to the whole tree
4397 // using packs and merges. It should rarely be a loss in other cases.
4398 unsigned Stride
= 1;
4399 for (; Stride
* 2 < Ops
.size(); Stride
*= 2) {
4400 for (unsigned I
= 0; I
< Ops
.size() - Stride
; I
+= Stride
* 2) {
4401 SDValue SubOps
[] = { Ops
[I
], Ops
[I
+ Stride
] };
4403 // Create a mask for just these two operands.
4404 SmallVector
<int, SystemZ::VectorBytes
> NewBytes(SystemZ::VectorBytes
);
4405 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
) {
4406 unsigned OpNo
= unsigned(Bytes
[J
]) / SystemZ::VectorBytes
;
4407 unsigned Byte
= unsigned(Bytes
[J
]) % SystemZ::VectorBytes
;
4410 else if (OpNo
== I
+ Stride
)
4411 NewBytes
[J
] = SystemZ::VectorBytes
+ Byte
;
4415 // See if it would be better to reorganize NewMask to avoid using VPERM.
4416 SmallVector
<int, SystemZ::VectorBytes
> NewBytesMap(SystemZ::VectorBytes
);
4417 if (const Permute
*P
= matchDoublePermute(NewBytes
, NewBytesMap
)) {
4418 Ops
[I
] = getPermuteNode(DAG
, DL
, *P
, SubOps
[0], SubOps
[1]);
4419 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4420 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
) {
4421 if (NewBytes
[J
] >= 0) {
4422 assert(unsigned(NewBytesMap
[J
]) < SystemZ::VectorBytes
&&
4423 "Invalid double permute");
4424 Bytes
[J
] = I
* SystemZ::VectorBytes
+ NewBytesMap
[J
];
4426 assert(NewBytesMap
[J
] < 0 && "Invalid double permute");
4429 // Just use NewBytes on the operands.
4430 Ops
[I
] = getGeneralPermuteNode(DAG
, DL
, SubOps
, NewBytes
);
4431 for (unsigned J
= 0; J
< SystemZ::VectorBytes
; ++J
)
4432 if (NewBytes
[J
] >= 0)
4433 Bytes
[J
] = I
* SystemZ::VectorBytes
+ J
;
4438 // Now we just have 2 inputs. Put the second operand in Ops[1].
4440 Ops
[1] = Ops
[Stride
];
4441 for (unsigned I
= 0; I
< SystemZ::VectorBytes
; ++I
)
4442 if (Bytes
[I
] >= int(SystemZ::VectorBytes
))
4443 Bytes
[I
] -= (Stride
- 1) * SystemZ::VectorBytes
;
4446 // Look for an instruction that can do the permute without resorting
4448 unsigned OpNo0
, OpNo1
;
4450 if (const Permute
*P
= matchPermute(Bytes
, OpNo0
, OpNo1
))
4451 Op
= getPermuteNode(DAG
, DL
, *P
, Ops
[OpNo0
], Ops
[OpNo1
]);
4453 Op
= getGeneralPermuteNode(DAG
, DL
, &Ops
[0], Bytes
);
4454 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
4457 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4458 static bool isScalarToVector(SDValue Op
) {
4459 for (unsigned I
= 1, E
= Op
.getNumOperands(); I
!= E
; ++I
)
4460 if (!Op
.getOperand(I
).isUndef())
4465 // Return a vector of type VT that contains Value in the first element.
4466 // The other elements don't matter.
4467 static SDValue
buildScalarToVector(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4469 // If we have a constant, replicate it to all elements and let the
4470 // BUILD_VECTOR lowering take care of it.
4471 if (Value
.getOpcode() == ISD::Constant
||
4472 Value
.getOpcode() == ISD::ConstantFP
) {
4473 SmallVector
<SDValue
, 16> Ops(VT
.getVectorNumElements(), Value
);
4474 return DAG
.getBuildVector(VT
, DL
, Ops
);
4476 if (Value
.isUndef())
4477 return DAG
.getUNDEF(VT
);
4478 return DAG
.getNode(ISD::SCALAR_TO_VECTOR
, DL
, VT
, Value
);
4481 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4482 // element 1. Used for cases in which replication is cheap.
4483 static SDValue
buildMergeScalars(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4484 SDValue Op0
, SDValue Op1
) {
4485 if (Op0
.isUndef()) {
4487 return DAG
.getUNDEF(VT
);
4488 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op1
);
4491 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op0
);
4492 return DAG
.getNode(SystemZISD::MERGE_HIGH
, DL
, VT
,
4493 buildScalarToVector(DAG
, DL
, VT
, Op0
),
4494 buildScalarToVector(DAG
, DL
, VT
, Op1
));
4497 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4499 static SDValue
joinDwords(SelectionDAG
&DAG
, const SDLoc
&DL
, SDValue Op0
,
4501 if (Op0
.isUndef() && Op1
.isUndef())
4502 return DAG
.getUNDEF(MVT::v2i64
);
4503 // If one of the two inputs is undefined then replicate the other one,
4504 // in order to avoid using another register unnecessarily.
4506 Op0
= Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op1
);
4507 else if (Op1
.isUndef())
4508 Op0
= Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
4510 Op0
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op0
);
4511 Op1
= DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
, Op1
);
4513 return DAG
.getNode(SystemZISD::JOIN_DWORDS
, DL
, MVT::v2i64
, Op0
, Op1
);
4516 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4517 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4518 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
4519 // would benefit from this representation and return it if so.
4520 static SDValue
tryBuildVectorShuffle(SelectionDAG
&DAG
,
4521 BuildVectorSDNode
*BVN
) {
4522 EVT VT
= BVN
->getValueType(0);
4523 unsigned NumElements
= VT
.getVectorNumElements();
4525 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4526 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
4527 // need a BUILD_VECTOR, add an additional placeholder operand for that
4528 // BUILD_VECTOR and store its operands in ResidueOps.
4529 GeneralShuffle
GS(VT
);
4530 SmallVector
<SDValue
, SystemZ::VectorBytes
> ResidueOps
;
4531 bool FoundOne
= false;
4532 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4533 SDValue Op
= BVN
->getOperand(I
);
4534 if (Op
.getOpcode() == ISD::TRUNCATE
)
4535 Op
= Op
.getOperand(0);
4536 if (Op
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
4537 Op
.getOperand(1).getOpcode() == ISD::Constant
) {
4538 unsigned Elem
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
4539 if (!GS
.add(Op
.getOperand(0), Elem
))
4542 } else if (Op
.isUndef()) {
4545 if (!GS
.add(SDValue(), ResidueOps
.size()))
4547 ResidueOps
.push_back(BVN
->getOperand(I
));
4551 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
4555 // Create the BUILD_VECTOR for the remaining elements, if any.
4556 if (!ResidueOps
.empty()) {
4557 while (ResidueOps
.size() < NumElements
)
4558 ResidueOps
.push_back(DAG
.getUNDEF(ResidueOps
[0].getValueType()));
4559 for (auto &Op
: GS
.Ops
) {
4560 if (!Op
.getNode()) {
4561 Op
= DAG
.getBuildVector(VT
, SDLoc(BVN
), ResidueOps
);
4566 return GS
.getNode(DAG
, SDLoc(BVN
));
4569 bool SystemZTargetLowering::isVectorElementLoad(SDValue Op
) const {
4570 if (Op
.getOpcode() == ISD::LOAD
&& cast
<LoadSDNode
>(Op
)->isUnindexed())
4572 if (Subtarget
.hasVectorEnhancements2() && Op
.getOpcode() == SystemZISD::LRV
)
4577 // Combine GPR scalar values Elems into a vector of type VT.
4579 SystemZTargetLowering::buildVector(SelectionDAG
&DAG
, const SDLoc
&DL
, EVT VT
,
4580 SmallVectorImpl
<SDValue
> &Elems
) const {
4581 // See whether there is a single replicated value.
4583 unsigned int NumElements
= Elems
.size();
4584 unsigned int Count
= 0;
4585 for (auto Elem
: Elems
) {
4586 if (!Elem
.isUndef()) {
4587 if (!Single
.getNode())
4589 else if (Elem
!= Single
) {
4596 // There are three cases here:
4598 // - if the only defined element is a loaded one, the best sequence
4599 // is a replicating load.
4601 // - otherwise, if the only defined element is an i64 value, we will
4602 // end up with the same VLVGP sequence regardless of whether we short-cut
4603 // for replication or fall through to the later code.
4605 // - otherwise, if the only defined element is an i32 or smaller value,
4606 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
4607 // This is only a win if the single defined element is used more than once.
4608 // In other cases we're better off using a single VLVGx.
4609 if (Single
.getNode() && (Count
> 1 || isVectorElementLoad(Single
)))
4610 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Single
);
4612 // If all elements are loads, use VLREP/VLEs (below).
4613 bool AllLoads
= true;
4614 for (auto Elem
: Elems
)
4615 if (!isVectorElementLoad(Elem
)) {
4620 // The best way of building a v2i64 from two i64s is to use VLVGP.
4621 if (VT
== MVT::v2i64
&& !AllLoads
)
4622 return joinDwords(DAG
, DL
, Elems
[0], Elems
[1]);
4624 // Use a 64-bit merge high to combine two doubles.
4625 if (VT
== MVT::v2f64
&& !AllLoads
)
4626 return buildMergeScalars(DAG
, DL
, VT
, Elems
[0], Elems
[1]);
4628 // Build v4f32 values directly from the FPRs:
4630 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
4635 if (VT
== MVT::v4f32
&& !AllLoads
) {
4636 SDValue Op01
= buildMergeScalars(DAG
, DL
, VT
, Elems
[0], Elems
[1]);
4637 SDValue Op23
= buildMergeScalars(DAG
, DL
, VT
, Elems
[2], Elems
[3]);
4638 // Avoid unnecessary undefs by reusing the other operand.
4641 else if (Op23
.isUndef())
4643 // Merging identical replications is a no-op.
4644 if (Op01
.getOpcode() == SystemZISD::REPLICATE
&& Op01
== Op23
)
4646 Op01
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Op01
);
4647 Op23
= DAG
.getNode(ISD::BITCAST
, DL
, MVT::v2i64
, Op23
);
4648 SDValue Op
= DAG
.getNode(SystemZISD::MERGE_HIGH
,
4649 DL
, MVT::v2i64
, Op01
, Op23
);
4650 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
4653 // Collect the constant terms.
4654 SmallVector
<SDValue
, SystemZ::VectorBytes
> Constants(NumElements
, SDValue());
4655 SmallVector
<bool, SystemZ::VectorBytes
> Done(NumElements
, false);
4657 unsigned NumConstants
= 0;
4658 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4659 SDValue Elem
= Elems
[I
];
4660 if (Elem
.getOpcode() == ISD::Constant
||
4661 Elem
.getOpcode() == ISD::ConstantFP
) {
4663 Constants
[I
] = Elem
;
4667 // If there was at least one constant, fill in the other elements of
4668 // Constants with undefs to get a full vector constant and use that
4669 // as the starting point.
4671 SDValue ReplicatedVal
;
4672 if (NumConstants
> 0) {
4673 for (unsigned I
= 0; I
< NumElements
; ++I
)
4674 if (!Constants
[I
].getNode())
4675 Constants
[I
] = DAG
.getUNDEF(Elems
[I
].getValueType());
4676 Result
= DAG
.getBuildVector(VT
, DL
, Constants
);
4678 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
4679 // avoid a false dependency on any previous contents of the vector
4682 // Use a VLREP if at least one element is a load. Make sure to replicate
4683 // the load with the most elements having its value.
4684 std::map
<const SDNode
*, unsigned> UseCounts
;
4685 SDNode
*LoadMaxUses
= nullptr;
4686 for (unsigned I
= 0; I
< NumElements
; ++I
)
4687 if (isVectorElementLoad(Elems
[I
])) {
4688 SDNode
*Ld
= Elems
[I
].getNode();
4690 if (LoadMaxUses
== nullptr || UseCounts
[LoadMaxUses
] < UseCounts
[Ld
])
4693 if (LoadMaxUses
!= nullptr) {
4694 ReplicatedVal
= SDValue(LoadMaxUses
, 0);
4695 Result
= DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, ReplicatedVal
);
4697 // Try to use VLVGP.
4698 unsigned I1
= NumElements
/ 2 - 1;
4699 unsigned I2
= NumElements
- 1;
4700 bool Def1
= !Elems
[I1
].isUndef();
4701 bool Def2
= !Elems
[I2
].isUndef();
4703 SDValue Elem1
= Elems
[Def1
? I1
: I2
];
4704 SDValue Elem2
= Elems
[Def2
? I2
: I1
];
4705 Result
= DAG
.getNode(ISD::BITCAST
, DL
, VT
,
4706 joinDwords(DAG
, DL
, Elem1
, Elem2
));
4710 Result
= DAG
.getUNDEF(VT
);
4714 // Use VLVGx to insert the other elements.
4715 for (unsigned I
= 0; I
< NumElements
; ++I
)
4716 if (!Done
[I
] && !Elems
[I
].isUndef() && Elems
[I
] != ReplicatedVal
)
4717 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
, Result
, Elems
[I
],
4718 DAG
.getConstant(I
, DL
, MVT::i32
));
4722 SDValue
SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op
,
4723 SelectionDAG
&DAG
) const {
4724 auto *BVN
= cast
<BuildVectorSDNode
>(Op
.getNode());
4726 EVT VT
= Op
.getValueType();
4728 if (BVN
->isConstant()) {
4729 if (SystemZVectorConstantInfo(BVN
).isVectorConstantLegal(Subtarget
))
4732 // Fall back to loading it from memory.
4736 // See if we should use shuffles to construct the vector from other vectors.
4737 if (SDValue Res
= tryBuildVectorShuffle(DAG
, BVN
))
4740 // Detect SCALAR_TO_VECTOR conversions.
4741 if (isOperationLegal(ISD::SCALAR_TO_VECTOR
, VT
) && isScalarToVector(Op
))
4742 return buildScalarToVector(DAG
, DL
, VT
, Op
.getOperand(0));
4744 // Otherwise use buildVector to build the vector up from GPRs.
4745 unsigned NumElements
= Op
.getNumOperands();
4746 SmallVector
<SDValue
, SystemZ::VectorBytes
> Ops(NumElements
);
4747 for (unsigned I
= 0; I
< NumElements
; ++I
)
4748 Ops
[I
] = Op
.getOperand(I
);
4749 return buildVector(DAG
, DL
, VT
, Ops
);
4752 SDValue
SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op
,
4753 SelectionDAG
&DAG
) const {
4754 auto *VSN
= cast
<ShuffleVectorSDNode
>(Op
.getNode());
4756 EVT VT
= Op
.getValueType();
4757 unsigned NumElements
= VT
.getVectorNumElements();
4759 if (VSN
->isSplat()) {
4760 SDValue Op0
= Op
.getOperand(0);
4761 unsigned Index
= VSN
->getSplatIndex();
4762 assert(Index
< VT
.getVectorNumElements() &&
4763 "Splat index should be defined and in first operand");
4764 // See whether the value we're splatting is directly available as a scalar.
4765 if ((Index
== 0 && Op0
.getOpcode() == ISD::SCALAR_TO_VECTOR
) ||
4766 Op0
.getOpcode() == ISD::BUILD_VECTOR
)
4767 return DAG
.getNode(SystemZISD::REPLICATE
, DL
, VT
, Op0
.getOperand(Index
));
4768 // Otherwise keep it as a vector-to-vector operation.
4769 return DAG
.getNode(SystemZISD::SPLAT
, DL
, VT
, Op
.getOperand(0),
4770 DAG
.getConstant(Index
, DL
, MVT::i32
));
4773 GeneralShuffle
GS(VT
);
4774 for (unsigned I
= 0; I
< NumElements
; ++I
) {
4775 int Elt
= VSN
->getMaskElt(I
);
4778 else if (!GS
.add(Op
.getOperand(unsigned(Elt
) / NumElements
),
4779 unsigned(Elt
) % NumElements
))
4782 return GS
.getNode(DAG
, SDLoc(VSN
));
4785 SDValue
SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op
,
4786 SelectionDAG
&DAG
) const {
4788 // Just insert the scalar into element 0 of an undefined vector.
4789 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
,
4790 Op
.getValueType(), DAG
.getUNDEF(Op
.getValueType()),
4791 Op
.getOperand(0), DAG
.getConstant(0, DL
, MVT::i32
));
4794 SDValue
SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op
,
4795 SelectionDAG
&DAG
) const {
4796 // Handle insertions of floating-point values.
4798 SDValue Op0
= Op
.getOperand(0);
4799 SDValue Op1
= Op
.getOperand(1);
4800 SDValue Op2
= Op
.getOperand(2);
4801 EVT VT
= Op
.getValueType();
4803 // Insertions into constant indices of a v2f64 can be done using VPDI.
4804 // However, if the inserted value is a bitcast or a constant then it's
4805 // better to use GPRs, as below.
4806 if (VT
== MVT::v2f64
&&
4807 Op1
.getOpcode() != ISD::BITCAST
&&
4808 Op1
.getOpcode() != ISD::ConstantFP
&&
4809 Op2
.getOpcode() == ISD::Constant
) {
4810 uint64_t Index
= cast
<ConstantSDNode
>(Op2
)->getZExtValue();
4811 unsigned Mask
= VT
.getVectorNumElements() - 1;
4816 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
4817 MVT IntVT
= MVT::getIntegerVT(VT
.getScalarSizeInBits());
4818 MVT IntVecVT
= MVT::getVectorVT(IntVT
, VT
.getVectorNumElements());
4819 SDValue Res
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, IntVecVT
,
4820 DAG
.getNode(ISD::BITCAST
, DL
, IntVecVT
, Op0
),
4821 DAG
.getNode(ISD::BITCAST
, DL
, IntVT
, Op1
), Op2
);
4822 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Res
);
4826 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op
,
4827 SelectionDAG
&DAG
) const {
4828 // Handle extractions of floating-point values.
4830 SDValue Op0
= Op
.getOperand(0);
4831 SDValue Op1
= Op
.getOperand(1);
4832 EVT VT
= Op
.getValueType();
4833 EVT VecVT
= Op0
.getValueType();
4835 // Extractions of constant indices can be done directly.
4836 if (auto *CIndexN
= dyn_cast
<ConstantSDNode
>(Op1
)) {
4837 uint64_t Index
= CIndexN
->getZExtValue();
4838 unsigned Mask
= VecVT
.getVectorNumElements() - 1;
4843 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
4844 MVT IntVT
= MVT::getIntegerVT(VT
.getSizeInBits());
4845 MVT IntVecVT
= MVT::getVectorVT(IntVT
, VecVT
.getVectorNumElements());
4846 SDValue Res
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, IntVT
,
4847 DAG
.getNode(ISD::BITCAST
, DL
, IntVecVT
, Op0
), Op1
);
4848 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Res
);
4852 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op
, SelectionDAG
&DAG
,
4853 unsigned UnpackHigh
) const {
4854 SDValue PackedOp
= Op
.getOperand(0);
4855 EVT OutVT
= Op
.getValueType();
4856 EVT InVT
= PackedOp
.getValueType();
4857 unsigned ToBits
= OutVT
.getScalarSizeInBits();
4858 unsigned FromBits
= InVT
.getScalarSizeInBits();
4861 EVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(FromBits
),
4862 SystemZ::VectorBits
/ FromBits
);
4863 PackedOp
= DAG
.getNode(UnpackHigh
, SDLoc(PackedOp
), OutVT
, PackedOp
);
4864 } while (FromBits
!= ToBits
);
4868 SDValue
SystemZTargetLowering::lowerShift(SDValue Op
, SelectionDAG
&DAG
,
4869 unsigned ByScalar
) const {
4870 // Look for cases where a vector shift can use the *_BY_SCALAR form.
4871 SDValue Op0
= Op
.getOperand(0);
4872 SDValue Op1
= Op
.getOperand(1);
4874 EVT VT
= Op
.getValueType();
4875 unsigned ElemBitSize
= VT
.getScalarSizeInBits();
4877 // See whether the shift vector is a splat represented as BUILD_VECTOR.
4878 if (auto *BVN
= dyn_cast
<BuildVectorSDNode
>(Op1
)) {
4879 APInt SplatBits
, SplatUndef
;
4880 unsigned SplatBitSize
;
4882 // Check for constant splats. Use ElemBitSize as the minimum element
4883 // width and reject splats that need wider elements.
4884 if (BVN
->isConstantSplat(SplatBits
, SplatUndef
, SplatBitSize
, HasAnyUndefs
,
4885 ElemBitSize
, true) &&
4886 SplatBitSize
== ElemBitSize
) {
4887 SDValue Shift
= DAG
.getConstant(SplatBits
.getZExtValue() & 0xfff,
4889 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
4891 // Check for variable splats.
4892 BitVector UndefElements
;
4893 SDValue Splat
= BVN
->getSplatValue(&UndefElements
);
4895 // Since i32 is the smallest legal type, we either need a no-op
4897 SDValue Shift
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
, Splat
);
4898 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
4902 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
4903 // and the shift amount is directly available in a GPR.
4904 if (auto *VSN
= dyn_cast
<ShuffleVectorSDNode
>(Op1
)) {
4905 if (VSN
->isSplat()) {
4906 SDValue VSNOp0
= VSN
->getOperand(0);
4907 unsigned Index
= VSN
->getSplatIndex();
4908 assert(Index
< VT
.getVectorNumElements() &&
4909 "Splat index should be defined and in first operand");
4910 if ((Index
== 0 && VSNOp0
.getOpcode() == ISD::SCALAR_TO_VECTOR
) ||
4911 VSNOp0
.getOpcode() == ISD::BUILD_VECTOR
) {
4912 // Since i32 is the smallest legal type, we either need a no-op
4914 SDValue Shift
= DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i32
,
4915 VSNOp0
.getOperand(Index
));
4916 return DAG
.getNode(ByScalar
, DL
, VT
, Op0
, Shift
);
4921 // Otherwise just treat the current form as legal.
4925 SDValue
SystemZTargetLowering::LowerOperation(SDValue Op
,
4926 SelectionDAG
&DAG
) const {
4927 switch (Op
.getOpcode()) {
4928 case ISD::FRAMEADDR
:
4929 return lowerFRAMEADDR(Op
, DAG
);
4930 case ISD::RETURNADDR
:
4931 return lowerRETURNADDR(Op
, DAG
);
4933 return lowerBR_CC(Op
, DAG
);
4934 case ISD::SELECT_CC
:
4935 return lowerSELECT_CC(Op
, DAG
);
4937 return lowerSETCC(Op
, DAG
);
4938 case ISD::GlobalAddress
:
4939 return lowerGlobalAddress(cast
<GlobalAddressSDNode
>(Op
), DAG
);
4940 case ISD::GlobalTLSAddress
:
4941 return lowerGlobalTLSAddress(cast
<GlobalAddressSDNode
>(Op
), DAG
);
4942 case ISD::BlockAddress
:
4943 return lowerBlockAddress(cast
<BlockAddressSDNode
>(Op
), DAG
);
4944 case ISD::JumpTable
:
4945 return lowerJumpTable(cast
<JumpTableSDNode
>(Op
), DAG
);
4946 case ISD::ConstantPool
:
4947 return lowerConstantPool(cast
<ConstantPoolSDNode
>(Op
), DAG
);
4949 return lowerBITCAST(Op
, DAG
);
4951 return lowerVASTART(Op
, DAG
);
4953 return lowerVACOPY(Op
, DAG
);
4954 case ISD::DYNAMIC_STACKALLOC
:
4955 return lowerDYNAMIC_STACKALLOC(Op
, DAG
);
4956 case ISD::GET_DYNAMIC_AREA_OFFSET
:
4957 return lowerGET_DYNAMIC_AREA_OFFSET(Op
, DAG
);
4958 case ISD::SMUL_LOHI
:
4959 return lowerSMUL_LOHI(Op
, DAG
);
4960 case ISD::UMUL_LOHI
:
4961 return lowerUMUL_LOHI(Op
, DAG
);
4963 return lowerSDIVREM(Op
, DAG
);
4965 return lowerUDIVREM(Op
, DAG
);
4970 return lowerXALUO(Op
, DAG
);
4973 return lowerADDSUBCARRY(Op
, DAG
);
4975 return lowerOR(Op
, DAG
);
4977 return lowerCTPOP(Op
, DAG
);
4978 case ISD::ATOMIC_FENCE
:
4979 return lowerATOMIC_FENCE(Op
, DAG
);
4980 case ISD::ATOMIC_SWAP
:
4981 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_SWAPW
);
4982 case ISD::ATOMIC_STORE
:
4983 return lowerATOMIC_STORE(Op
, DAG
);
4984 case ISD::ATOMIC_LOAD
:
4985 return lowerATOMIC_LOAD(Op
, DAG
);
4986 case ISD::ATOMIC_LOAD_ADD
:
4987 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_ADD
);
4988 case ISD::ATOMIC_LOAD_SUB
:
4989 return lowerATOMIC_LOAD_SUB(Op
, DAG
);
4990 case ISD::ATOMIC_LOAD_AND
:
4991 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_AND
);
4992 case ISD::ATOMIC_LOAD_OR
:
4993 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_OR
);
4994 case ISD::ATOMIC_LOAD_XOR
:
4995 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_XOR
);
4996 case ISD::ATOMIC_LOAD_NAND
:
4997 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_NAND
);
4998 case ISD::ATOMIC_LOAD_MIN
:
4999 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_MIN
);
5000 case ISD::ATOMIC_LOAD_MAX
:
5001 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_MAX
);
5002 case ISD::ATOMIC_LOAD_UMIN
:
5003 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_UMIN
);
5004 case ISD::ATOMIC_LOAD_UMAX
:
5005 return lowerATOMIC_LOAD_OP(Op
, DAG
, SystemZISD::ATOMIC_LOADW_UMAX
);
5006 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
:
5007 return lowerATOMIC_CMP_SWAP(Op
, DAG
);
5008 case ISD::STACKSAVE
:
5009 return lowerSTACKSAVE(Op
, DAG
);
5010 case ISD::STACKRESTORE
:
5011 return lowerSTACKRESTORE(Op
, DAG
);
5013 return lowerPREFETCH(Op
, DAG
);
5014 case ISD::INTRINSIC_W_CHAIN
:
5015 return lowerINTRINSIC_W_CHAIN(Op
, DAG
);
5016 case ISD::INTRINSIC_WO_CHAIN
:
5017 return lowerINTRINSIC_WO_CHAIN(Op
, DAG
);
5018 case ISD::BUILD_VECTOR
:
5019 return lowerBUILD_VECTOR(Op
, DAG
);
5020 case ISD::VECTOR_SHUFFLE
:
5021 return lowerVECTOR_SHUFFLE(Op
, DAG
);
5022 case ISD::SCALAR_TO_VECTOR
:
5023 return lowerSCALAR_TO_VECTOR(Op
, DAG
);
5024 case ISD::INSERT_VECTOR_ELT
:
5025 return lowerINSERT_VECTOR_ELT(Op
, DAG
);
5026 case ISD::EXTRACT_VECTOR_ELT
:
5027 return lowerEXTRACT_VECTOR_ELT(Op
, DAG
);
5028 case ISD::SIGN_EXTEND_VECTOR_INREG
:
5029 return lowerExtendVectorInreg(Op
, DAG
, SystemZISD::UNPACK_HIGH
);
5030 case ISD::ZERO_EXTEND_VECTOR_INREG
:
5031 return lowerExtendVectorInreg(Op
, DAG
, SystemZISD::UNPACKL_HIGH
);
5033 return lowerShift(Op
, DAG
, SystemZISD::VSHL_BY_SCALAR
);
5035 return lowerShift(Op
, DAG
, SystemZISD::VSRL_BY_SCALAR
);
5037 return lowerShift(Op
, DAG
, SystemZISD::VSRA_BY_SCALAR
);
5039 llvm_unreachable("Unexpected node to lower");
5043 // Lower operations with invalid operand or result types (currently used
5044 // only for 128-bit integer types).
5046 static SDValue
lowerI128ToGR128(SelectionDAG
&DAG
, SDValue In
) {
5048 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i64
, In
,
5049 DAG
.getIntPtrConstant(0, DL
));
5050 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, DL
, MVT::i64
, In
,
5051 DAG
.getIntPtrConstant(1, DL
));
5052 SDNode
*Pair
= DAG
.getMachineNode(SystemZ::PAIR128
, DL
,
5053 MVT::Untyped
, Hi
, Lo
);
5054 return SDValue(Pair
, 0);
5057 static SDValue
lowerGR128ToI128(SelectionDAG
&DAG
, SDValue In
) {
5059 SDValue Hi
= DAG
.getTargetExtractSubreg(SystemZ::subreg_h64
,
5061 SDValue Lo
= DAG
.getTargetExtractSubreg(SystemZ::subreg_l64
,
5063 return DAG
.getNode(ISD::BUILD_PAIR
, DL
, MVT::i128
, Lo
, Hi
);
5067 SystemZTargetLowering::LowerOperationWrapper(SDNode
*N
,
5068 SmallVectorImpl
<SDValue
> &Results
,
5069 SelectionDAG
&DAG
) const {
5070 switch (N
->getOpcode()) {
5071 case ISD::ATOMIC_LOAD
: {
5073 SDVTList Tys
= DAG
.getVTList(MVT::Untyped
, MVT::Other
);
5074 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1) };
5075 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
5076 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128
,
5077 DL
, Tys
, Ops
, MVT::i128
, MMO
);
5078 Results
.push_back(lowerGR128ToI128(DAG
, Res
));
5079 Results
.push_back(Res
.getValue(1));
5082 case ISD::ATOMIC_STORE
: {
5084 SDVTList Tys
= DAG
.getVTList(MVT::Other
);
5085 SDValue Ops
[] = { N
->getOperand(0),
5086 lowerI128ToGR128(DAG
, N
->getOperand(2)),
5088 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
5089 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128
,
5090 DL
, Tys
, Ops
, MVT::i128
, MMO
);
5091 // We have to enforce sequential consistency by performing a
5092 // serialization operation after the store.
5093 if (cast
<AtomicSDNode
>(N
)->getOrdering() ==
5094 AtomicOrdering::SequentiallyConsistent
)
5095 Res
= SDValue(DAG
.getMachineNode(SystemZ::Serialize
, DL
,
5096 MVT::Other
, Res
), 0);
5097 Results
.push_back(Res
);
5100 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS
: {
5102 SDVTList Tys
= DAG
.getVTList(MVT::Untyped
, MVT::i32
, MVT::Other
);
5103 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1),
5104 lowerI128ToGR128(DAG
, N
->getOperand(2)),
5105 lowerI128ToGR128(DAG
, N
->getOperand(3)) };
5106 MachineMemOperand
*MMO
= cast
<AtomicSDNode
>(N
)->getMemOperand();
5107 SDValue Res
= DAG
.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128
,
5108 DL
, Tys
, Ops
, MVT::i128
, MMO
);
5109 SDValue Success
= emitSETCC(DAG
, DL
, Res
.getValue(1),
5110 SystemZ::CCMASK_CS
, SystemZ::CCMASK_CS_EQ
);
5111 Success
= DAG
.getZExtOrTrunc(Success
, DL
, N
->getValueType(1));
5112 Results
.push_back(lowerGR128ToI128(DAG
, Res
));
5113 Results
.push_back(Success
);
5114 Results
.push_back(Res
.getValue(2));
5118 llvm_unreachable("Unexpected node to lower");
5123 SystemZTargetLowering::ReplaceNodeResults(SDNode
*N
,
5124 SmallVectorImpl
<SDValue
> &Results
,
5125 SelectionDAG
&DAG
) const {
5126 return LowerOperationWrapper(N
, Results
, DAG
);
5129 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode
) const {
5130 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
5131 switch ((SystemZISD::NodeType
)Opcode
) {
5132 case SystemZISD::FIRST_NUMBER
: break;
5138 OPCODE(PCREL_WRAPPER
);
5139 OPCODE(PCREL_OFFSET
);
5145 OPCODE(SELECT_CCMASK
);
5146 OPCODE(ADJDYNALLOC
);
5171 OPCODE(SEARCH_STRING
);
5175 OPCODE(TBEGIN_NOFLOAT
);
5178 OPCODE(ROTATE_MASK
);
5180 OPCODE(JOIN_DWORDS
);
5185 OPCODE(PERMUTE_DWORDS
);
5190 OPCODE(UNPACK_HIGH
);
5191 OPCODE(UNPACKL_HIGH
);
5193 OPCODE(UNPACKL_LOW
);
5194 OPCODE(VSHL_BY_SCALAR
);
5195 OPCODE(VSRL_BY_SCALAR
);
5196 OPCODE(VSRA_BY_SCALAR
);
5226 OPCODE(ATOMIC_SWAPW
);
5227 OPCODE(ATOMIC_LOADW_ADD
);
5228 OPCODE(ATOMIC_LOADW_SUB
);
5229 OPCODE(ATOMIC_LOADW_AND
);
5230 OPCODE(ATOMIC_LOADW_OR
);
5231 OPCODE(ATOMIC_LOADW_XOR
);
5232 OPCODE(ATOMIC_LOADW_NAND
);
5233 OPCODE(ATOMIC_LOADW_MIN
);
5234 OPCODE(ATOMIC_LOADW_MAX
);
5235 OPCODE(ATOMIC_LOADW_UMIN
);
5236 OPCODE(ATOMIC_LOADW_UMAX
);
5237 OPCODE(ATOMIC_CMP_SWAPW
);
5238 OPCODE(ATOMIC_CMP_SWAP
);
5239 OPCODE(ATOMIC_LOAD_128
);
5240 OPCODE(ATOMIC_STORE_128
);
5241 OPCODE(ATOMIC_CMP_SWAP_128
);
5252 // Return true if VT is a vector whose elements are a whole number of bytes
5253 // in width. Also check for presence of vector support.
5254 bool SystemZTargetLowering::canTreatAsByteVector(EVT VT
) const {
5255 if (!Subtarget
.hasVector())
5258 return VT
.isVector() && VT
.getScalarSizeInBits() % 8 == 0 && VT
.isSimple();
5261 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
5262 // producing a result of type ResVT. Op is a possibly bitcast version
5263 // of the input vector and Index is the index (based on type VecVT) that
5264 // should be extracted. Return the new extraction if a simplification
5265 // was possible or if Force is true.
5266 SDValue
SystemZTargetLowering::combineExtract(const SDLoc
&DL
, EVT ResVT
,
5267 EVT VecVT
, SDValue Op
,
5269 DAGCombinerInfo
&DCI
,
5271 SelectionDAG
&DAG
= DCI
.DAG
;
5273 // The number of bytes being extracted.
5274 unsigned BytesPerElement
= VecVT
.getVectorElementType().getStoreSize();
5277 unsigned Opcode
= Op
.getOpcode();
5278 if (Opcode
== ISD::BITCAST
)
5279 // Look through bitcasts.
5280 Op
= Op
.getOperand(0);
5281 else if ((Opcode
== ISD::VECTOR_SHUFFLE
|| Opcode
== SystemZISD::SPLAT
) &&
5282 canTreatAsByteVector(Op
.getValueType())) {
5283 // Get a VPERM-like permute mask and see whether the bytes covered
5284 // by the extracted element are a contiguous sequence from one
5286 SmallVector
<int, SystemZ::VectorBytes
> Bytes
;
5287 if (!getVPermMask(Op
, Bytes
))
5290 if (!getShuffleInput(Bytes
, Index
* BytesPerElement
,
5291 BytesPerElement
, First
))
5294 return DAG
.getUNDEF(ResVT
);
5295 // Make sure the contiguous sequence starts at a multiple of the
5296 // original element size.
5297 unsigned Byte
= unsigned(First
) % Bytes
.size();
5298 if (Byte
% BytesPerElement
!= 0)
5300 // We can get the extracted value directly from an input.
5301 Index
= Byte
/ BytesPerElement
;
5302 Op
= Op
.getOperand(unsigned(First
) / Bytes
.size());
5304 } else if (Opcode
== ISD::BUILD_VECTOR
&&
5305 canTreatAsByteVector(Op
.getValueType())) {
5306 // We can only optimize this case if the BUILD_VECTOR elements are
5307 // at least as wide as the extracted value.
5308 EVT OpVT
= Op
.getValueType();
5309 unsigned OpBytesPerElement
= OpVT
.getVectorElementType().getStoreSize();
5310 if (OpBytesPerElement
< BytesPerElement
)
5312 // Make sure that the least-significant bit of the extracted value
5313 // is the least significant bit of an input.
5314 unsigned End
= (Index
+ 1) * BytesPerElement
;
5315 if (End
% OpBytesPerElement
!= 0)
5317 // We're extracting the low part of one operand of the BUILD_VECTOR.
5318 Op
= Op
.getOperand(End
/ OpBytesPerElement
- 1);
5319 if (!Op
.getValueType().isInteger()) {
5320 EVT VT
= MVT::getIntegerVT(Op
.getValueSizeInBits());
5321 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
5322 DCI
.AddToWorklist(Op
.getNode());
5324 EVT VT
= MVT::getIntegerVT(ResVT
.getSizeInBits());
5325 Op
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Op
);
5327 DCI
.AddToWorklist(Op
.getNode());
5328 Op
= DAG
.getNode(ISD::BITCAST
, DL
, ResVT
, Op
);
5331 } else if ((Opcode
== ISD::SIGN_EXTEND_VECTOR_INREG
||
5332 Opcode
== ISD::ZERO_EXTEND_VECTOR_INREG
||
5333 Opcode
== ISD::ANY_EXTEND_VECTOR_INREG
) &&
5334 canTreatAsByteVector(Op
.getValueType()) &&
5335 canTreatAsByteVector(Op
.getOperand(0).getValueType())) {
5336 // Make sure that only the unextended bits are significant.
5337 EVT ExtVT
= Op
.getValueType();
5338 EVT OpVT
= Op
.getOperand(0).getValueType();
5339 unsigned ExtBytesPerElement
= ExtVT
.getVectorElementType().getStoreSize();
5340 unsigned OpBytesPerElement
= OpVT
.getVectorElementType().getStoreSize();
5341 unsigned Byte
= Index
* BytesPerElement
;
5342 unsigned SubByte
= Byte
% ExtBytesPerElement
;
5343 unsigned MinSubByte
= ExtBytesPerElement
- OpBytesPerElement
;
5344 if (SubByte
< MinSubByte
||
5345 SubByte
+ BytesPerElement
> ExtBytesPerElement
)
5347 // Get the byte offset of the unextended element
5348 Byte
= Byte
/ ExtBytesPerElement
* OpBytesPerElement
;
5349 // ...then add the byte offset relative to that element.
5350 Byte
+= SubByte
- MinSubByte
;
5351 if (Byte
% BytesPerElement
!= 0)
5353 Op
= Op
.getOperand(0);
5354 Index
= Byte
/ BytesPerElement
;
5360 if (Op
.getValueType() != VecVT
) {
5361 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VecVT
, Op
);
5362 DCI
.AddToWorklist(Op
.getNode());
5364 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, ResVT
, Op
,
5365 DAG
.getConstant(Index
, DL
, MVT::i32
));
5370 // Optimize vector operations in scalar value Op on the basis that Op
5371 // is truncated to TruncVT.
5372 SDValue
SystemZTargetLowering::combineTruncateExtract(
5373 const SDLoc
&DL
, EVT TruncVT
, SDValue Op
, DAGCombinerInfo
&DCI
) const {
5374 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
5375 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
5377 if (Op
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5378 TruncVT
.getSizeInBits() % 8 == 0) {
5379 SDValue Vec
= Op
.getOperand(0);
5380 EVT VecVT
= Vec
.getValueType();
5381 if (canTreatAsByteVector(VecVT
)) {
5382 if (auto *IndexN
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
5383 unsigned BytesPerElement
= VecVT
.getVectorElementType().getStoreSize();
5384 unsigned TruncBytes
= TruncVT
.getStoreSize();
5385 if (BytesPerElement
% TruncBytes
== 0) {
5386 // Calculate the value of Y' in the above description. We are
5387 // splitting the original elements into Scale equal-sized pieces
5388 // and for truncation purposes want the last (least-significant)
5389 // of these pieces for IndexN. This is easiest to do by calculating
5390 // the start index of the following element and then subtracting 1.
5391 unsigned Scale
= BytesPerElement
/ TruncBytes
;
5392 unsigned NewIndex
= (IndexN
->getZExtValue() + 1) * Scale
- 1;
5394 // Defer the creation of the bitcast from X to combineExtract,
5395 // which might be able to optimize the extraction.
5396 VecVT
= MVT::getVectorVT(MVT::getIntegerVT(TruncBytes
* 8),
5397 VecVT
.getStoreSize() / TruncBytes
);
5398 EVT ResVT
= (TruncBytes
< 4 ? MVT::i32
: TruncVT
);
5399 return combineExtract(DL
, ResVT
, VecVT
, Vec
, NewIndex
, DCI
, true);
5407 SDValue
SystemZTargetLowering::combineZERO_EXTEND(
5408 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5409 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
5410 SelectionDAG
&DAG
= DCI
.DAG
;
5411 SDValue N0
= N
->getOperand(0);
5412 EVT VT
= N
->getValueType(0);
5413 if (N0
.getOpcode() == SystemZISD::SELECT_CCMASK
) {
5414 auto *TrueOp
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(0));
5415 auto *FalseOp
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5416 if (TrueOp
&& FalseOp
) {
5418 SDValue Ops
[] = { DAG
.getConstant(TrueOp
->getZExtValue(), DL
, VT
),
5419 DAG
.getConstant(FalseOp
->getZExtValue(), DL
, VT
),
5420 N0
.getOperand(2), N0
.getOperand(3), N0
.getOperand(4) };
5421 SDValue NewSelect
= DAG
.getNode(SystemZISD::SELECT_CCMASK
, DL
, VT
, Ops
);
5422 // If N0 has multiple uses, change other uses as well.
5423 if (!N0
.hasOneUse()) {
5424 SDValue TruncSelect
=
5425 DAG
.getNode(ISD::TRUNCATE
, DL
, N0
.getValueType(), NewSelect
);
5426 DCI
.CombineTo(N0
.getNode(), TruncSelect
);
5434 SDValue
SystemZTargetLowering::combineSIGN_EXTEND_INREG(
5435 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5436 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
5437 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
5438 // into (select_cc LHS, RHS, -1, 0, COND)
5439 SelectionDAG
&DAG
= DCI
.DAG
;
5440 SDValue N0
= N
->getOperand(0);
5441 EVT VT
= N
->getValueType(0);
5442 EVT EVT
= cast
<VTSDNode
>(N
->getOperand(1))->getVT();
5443 if (N0
.hasOneUse() && N0
.getOpcode() == ISD::ANY_EXTEND
)
5444 N0
= N0
.getOperand(0);
5445 if (EVT
== MVT::i1
&& N0
.hasOneUse() && N0
.getOpcode() == ISD::SETCC
) {
5447 SDValue Ops
[] = { N0
.getOperand(0), N0
.getOperand(1),
5448 DAG
.getConstant(-1, DL
, VT
), DAG
.getConstant(0, DL
, VT
),
5450 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, Ops
);
5455 SDValue
SystemZTargetLowering::combineSIGN_EXTEND(
5456 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5457 // Convert (sext (ashr (shl X, C1), C2)) to
5458 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
5459 // cheap as narrower ones.
5460 SelectionDAG
&DAG
= DCI
.DAG
;
5461 SDValue N0
= N
->getOperand(0);
5462 EVT VT
= N
->getValueType(0);
5463 if (N0
.hasOneUse() && N0
.getOpcode() == ISD::SRA
) {
5464 auto *SraAmt
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5465 SDValue Inner
= N0
.getOperand(0);
5466 if (SraAmt
&& Inner
.hasOneUse() && Inner
.getOpcode() == ISD::SHL
) {
5467 if (auto *ShlAmt
= dyn_cast
<ConstantSDNode
>(Inner
.getOperand(1))) {
5468 unsigned Extra
= (VT
.getSizeInBits() - N0
.getValueSizeInBits());
5469 unsigned NewShlAmt
= ShlAmt
->getZExtValue() + Extra
;
5470 unsigned NewSraAmt
= SraAmt
->getZExtValue() + Extra
;
5471 EVT ShiftVT
= N0
.getOperand(1).getValueType();
5472 SDValue Ext
= DAG
.getNode(ISD::ANY_EXTEND
, SDLoc(Inner
), VT
,
5473 Inner
.getOperand(0));
5474 SDValue Shl
= DAG
.getNode(ISD::SHL
, SDLoc(Inner
), VT
, Ext
,
5475 DAG
.getConstant(NewShlAmt
, SDLoc(Inner
),
5477 return DAG
.getNode(ISD::SRA
, SDLoc(N0
), VT
, Shl
,
5478 DAG
.getConstant(NewSraAmt
, SDLoc(N0
), ShiftVT
));
5485 SDValue
SystemZTargetLowering::combineMERGE(
5486 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5487 SelectionDAG
&DAG
= DCI
.DAG
;
5488 unsigned Opcode
= N
->getOpcode();
5489 SDValue Op0
= N
->getOperand(0);
5490 SDValue Op1
= N
->getOperand(1);
5491 if (Op0
.getOpcode() == ISD::BITCAST
)
5492 Op0
= Op0
.getOperand(0);
5493 if (ISD::isBuildVectorAllZeros(Op0
.getNode())) {
5494 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
5496 if (Op1
== N
->getOperand(0))
5498 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
5499 EVT VT
= Op1
.getValueType();
5500 unsigned ElemBytes
= VT
.getVectorElementType().getStoreSize();
5501 if (ElemBytes
<= 4) {
5502 Opcode
= (Opcode
== SystemZISD::MERGE_HIGH
?
5503 SystemZISD::UNPACKL_HIGH
: SystemZISD::UNPACKL_LOW
);
5504 EVT InVT
= VT
.changeVectorElementTypeToInteger();
5505 EVT OutVT
= MVT::getVectorVT(MVT::getIntegerVT(ElemBytes
* 16),
5506 SystemZ::VectorBytes
/ ElemBytes
/ 2);
5508 Op1
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), InVT
, Op1
);
5509 DCI
.AddToWorklist(Op1
.getNode());
5511 SDValue Op
= DAG
.getNode(Opcode
, SDLoc(N
), OutVT
, Op1
);
5512 DCI
.AddToWorklist(Op
.getNode());
5513 return DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VT
, Op
);
5519 SDValue
SystemZTargetLowering::combineLOAD(
5520 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5521 SelectionDAG
&DAG
= DCI
.DAG
;
5522 EVT LdVT
= N
->getValueType(0);
5523 if (LdVT
.isVector() || LdVT
.isInteger())
5525 // Transform a scalar load that is REPLICATEd as well as having other
5526 // use(s) to the form where the other use(s) use the first element of the
5527 // REPLICATE instead of the load. Otherwise instruction selection will not
5528 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
5532 SmallVector
<SDNode
*, 8> OtherUses
;
5533 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
5535 if (UI
->getOpcode() == SystemZISD::REPLICATE
) {
5537 return SDValue(); // Should never happen
5538 Replicate
= SDValue(*UI
, 0);
5540 else if (UI
.getUse().getResNo() == 0)
5541 OtherUses
.push_back(*UI
);
5543 if (!Replicate
|| OtherUses
.empty())
5547 SDValue Extract0
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, LdVT
,
5548 Replicate
, DAG
.getConstant(0, DL
, MVT::i32
));
5549 // Update uses of the loaded Value while preserving old chains.
5550 for (SDNode
*U
: OtherUses
) {
5551 SmallVector
<SDValue
, 8> Ops
;
5552 for (SDValue Op
: U
->ops())
5553 Ops
.push_back((Op
.getNode() == N
&& Op
.getResNo() == 0) ? Extract0
: Op
);
5554 DAG
.UpdateNodeOperands(U
, Ops
);
5556 return SDValue(N
, 0);
5559 bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT
) const {
5560 if (VT
== MVT::i16
|| VT
== MVT::i32
|| VT
== MVT::i64
)
5562 if (Subtarget
.hasVectorEnhancements2())
5563 if (VT
== MVT::v8i16
|| VT
== MVT::v4i32
|| VT
== MVT::v2i64
)
5568 static bool isVectorElementSwap(ArrayRef
<int> M
, EVT VT
) {
5569 if (!VT
.isVector() || !VT
.isSimple() ||
5570 VT
.getSizeInBits() != 128 ||
5571 VT
.getScalarSizeInBits() % 8 != 0)
5574 unsigned NumElts
= VT
.getVectorNumElements();
5575 for (unsigned i
= 0; i
< NumElts
; ++i
) {
5576 if (M
[i
] < 0) continue; // ignore UNDEF indices
5577 if ((unsigned) M
[i
] != NumElts
- 1 - i
)
5584 SDValue
SystemZTargetLowering::combineSTORE(
5585 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5586 SelectionDAG
&DAG
= DCI
.DAG
;
5587 auto *SN
= cast
<StoreSDNode
>(N
);
5588 auto &Op1
= N
->getOperand(1);
5589 EVT MemVT
= SN
->getMemoryVT();
5590 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
5591 // for the extraction to be done on a vMiN value, so that we can use VSTE.
5592 // If X has wider elements then convert it to:
5593 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
5594 if (MemVT
.isInteger() && SN
->isTruncatingStore()) {
5596 combineTruncateExtract(SDLoc(N
), MemVT
, SN
->getValue(), DCI
)) {
5597 DCI
.AddToWorklist(Value
.getNode());
5599 // Rewrite the store with the new form of stored value.
5600 return DAG
.getTruncStore(SN
->getChain(), SDLoc(SN
), Value
,
5601 SN
->getBasePtr(), SN
->getMemoryVT(),
5602 SN
->getMemOperand());
5605 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
5606 if (!SN
->isTruncatingStore() &&
5607 Op1
.getOpcode() == ISD::BSWAP
&&
5608 Op1
.getNode()->hasOneUse() &&
5609 canLoadStoreByteSwapped(Op1
.getValueType())) {
5611 SDValue BSwapOp
= Op1
.getOperand(0);
5613 if (BSwapOp
.getValueType() == MVT::i16
)
5614 BSwapOp
= DAG
.getNode(ISD::ANY_EXTEND
, SDLoc(N
), MVT::i32
, BSwapOp
);
5617 N
->getOperand(0), BSwapOp
, N
->getOperand(2)
5621 DAG
.getMemIntrinsicNode(SystemZISD::STRV
, SDLoc(N
), DAG
.getVTList(MVT::Other
),
5622 Ops
, MemVT
, SN
->getMemOperand());
5624 // Combine STORE (element-swap) into VSTER
5625 if (!SN
->isTruncatingStore() &&
5626 Op1
.getOpcode() == ISD::VECTOR_SHUFFLE
&&
5627 Op1
.getNode()->hasOneUse() &&
5628 Subtarget
.hasVectorEnhancements2()) {
5629 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(Op1
.getNode());
5630 ArrayRef
<int> ShuffleMask
= SVN
->getMask();
5631 if (isVectorElementSwap(ShuffleMask
, Op1
.getValueType())) {
5633 N
->getOperand(0), Op1
.getOperand(0), N
->getOperand(2)
5636 return DAG
.getMemIntrinsicNode(SystemZISD::VSTER
, SDLoc(N
),
5637 DAG
.getVTList(MVT::Other
),
5638 Ops
, MemVT
, SN
->getMemOperand());
5645 SDValue
SystemZTargetLowering::combineVECTOR_SHUFFLE(
5646 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5647 SelectionDAG
&DAG
= DCI
.DAG
;
5648 // Combine element-swap (LOAD) into VLER
5649 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
5650 N
->getOperand(0).hasOneUse() &&
5651 Subtarget
.hasVectorEnhancements2()) {
5652 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
);
5653 ArrayRef
<int> ShuffleMask
= SVN
->getMask();
5654 if (isVectorElementSwap(ShuffleMask
, N
->getValueType(0))) {
5655 SDValue Load
= N
->getOperand(0);
5656 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
5658 // Create the element-swapping load.
5660 LD
->getChain(), // Chain
5661 LD
->getBasePtr() // Ptr
5664 DAG
.getMemIntrinsicNode(SystemZISD::VLER
, SDLoc(N
),
5665 DAG
.getVTList(LD
->getValueType(0), MVT::Other
),
5666 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
5668 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
5669 // by the load dead.
5670 DCI
.CombineTo(N
, ESLoad
);
5672 // Next, combine the load away, we give it a bogus result value but a real
5673 // chain result. The result value is dead because the shuffle is dead.
5674 DCI
.CombineTo(Load
.getNode(), ESLoad
, ESLoad
.getValue(1));
5676 // Return N so it doesn't get rechecked!
5677 return SDValue(N
, 0);
5684 SDValue
SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
5685 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5686 SelectionDAG
&DAG
= DCI
.DAG
;
5688 if (!Subtarget
.hasVector())
5691 // Look through bitcasts that retain the number of vector elements.
5692 SDValue Op
= N
->getOperand(0);
5693 if (Op
.getOpcode() == ISD::BITCAST
&&
5694 Op
.getValueType().isVector() &&
5695 Op
.getOperand(0).getValueType().isVector() &&
5696 Op
.getValueType().getVectorNumElements() ==
5697 Op
.getOperand(0).getValueType().getVectorNumElements())
5698 Op
= Op
.getOperand(0);
5700 // Pull BSWAP out of a vector extraction.
5701 if (Op
.getOpcode() == ISD::BSWAP
&& Op
.hasOneUse()) {
5702 EVT VecVT
= Op
.getValueType();
5703 EVT EltVT
= VecVT
.getVectorElementType();
5704 Op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(N
), EltVT
,
5705 Op
.getOperand(0), N
->getOperand(1));
5706 DCI
.AddToWorklist(Op
.getNode());
5707 Op
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), EltVT
, Op
);
5708 if (EltVT
!= N
->getValueType(0)) {
5709 DCI
.AddToWorklist(Op
.getNode());
5710 Op
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), N
->getValueType(0), Op
);
5715 // Try to simplify a vector extraction.
5716 if (auto *IndexN
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
5717 SDValue Op0
= N
->getOperand(0);
5718 EVT VecVT
= Op0
.getValueType();
5719 return combineExtract(SDLoc(N
), N
->getValueType(0), VecVT
, Op0
,
5720 IndexN
->getZExtValue(), DCI
, false);
5725 SDValue
SystemZTargetLowering::combineJOIN_DWORDS(
5726 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5727 SelectionDAG
&DAG
= DCI
.DAG
;
5728 // (join_dwords X, X) == (replicate X)
5729 if (N
->getOperand(0) == N
->getOperand(1))
5730 return DAG
.getNode(SystemZISD::REPLICATE
, SDLoc(N
), N
->getValueType(0),
5735 SDValue
SystemZTargetLowering::combineFP_ROUND(
5736 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5738 if (!Subtarget
.hasVector())
5741 // (fpround (extract_vector_elt X 0))
5742 // (fpround (extract_vector_elt X 1)) ->
5743 // (extract_vector_elt (VROUND X) 0)
5744 // (extract_vector_elt (VROUND X) 2)
5746 // This is a special case since the target doesn't really support v2f32s.
5747 SelectionDAG
&DAG
= DCI
.DAG
;
5748 SDValue Op0
= N
->getOperand(0);
5749 if (N
->getValueType(0) == MVT::f32
&&
5751 Op0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5752 Op0
.getOperand(0).getValueType() == MVT::v2f64
&&
5753 Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
5754 cast
<ConstantSDNode
>(Op0
.getOperand(1))->getZExtValue() == 0) {
5755 SDValue Vec
= Op0
.getOperand(0);
5756 for (auto *U
: Vec
->uses()) {
5757 if (U
!= Op0
.getNode() &&
5759 U
->getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5760 U
->getOperand(0) == Vec
&&
5761 U
->getOperand(1).getOpcode() == ISD::Constant
&&
5762 cast
<ConstantSDNode
>(U
->getOperand(1))->getZExtValue() == 1) {
5763 SDValue OtherRound
= SDValue(*U
->use_begin(), 0);
5764 if (OtherRound
.getOpcode() == ISD::FP_ROUND
&&
5765 OtherRound
.getOperand(0) == SDValue(U
, 0) &&
5766 OtherRound
.getValueType() == MVT::f32
) {
5767 SDValue VRound
= DAG
.getNode(SystemZISD::VROUND
, SDLoc(N
),
5769 DCI
.AddToWorklist(VRound
.getNode());
5771 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(U
), MVT::f32
,
5772 VRound
, DAG
.getConstant(2, SDLoc(U
), MVT::i32
));
5773 DCI
.AddToWorklist(Extract1
.getNode());
5774 DAG
.ReplaceAllUsesOfValueWith(OtherRound
, Extract1
);
5776 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op0
), MVT::f32
,
5777 VRound
, DAG
.getConstant(0, SDLoc(Op0
), MVT::i32
));
5786 SDValue
SystemZTargetLowering::combineFP_EXTEND(
5787 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5789 if (!Subtarget
.hasVector())
5792 // (fpextend (extract_vector_elt X 0))
5793 // (fpextend (extract_vector_elt X 2)) ->
5794 // (extract_vector_elt (VEXTEND X) 0)
5795 // (extract_vector_elt (VEXTEND X) 1)
5797 // This is a special case since the target doesn't really support v2f32s.
5798 SelectionDAG
&DAG
= DCI
.DAG
;
5799 SDValue Op0
= N
->getOperand(0);
5800 if (N
->getValueType(0) == MVT::f64
&&
5802 Op0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5803 Op0
.getOperand(0).getValueType() == MVT::v4f32
&&
5804 Op0
.getOperand(1).getOpcode() == ISD::Constant
&&
5805 cast
<ConstantSDNode
>(Op0
.getOperand(1))->getZExtValue() == 0) {
5806 SDValue Vec
= Op0
.getOperand(0);
5807 for (auto *U
: Vec
->uses()) {
5808 if (U
!= Op0
.getNode() &&
5810 U
->getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
5811 U
->getOperand(0) == Vec
&&
5812 U
->getOperand(1).getOpcode() == ISD::Constant
&&
5813 cast
<ConstantSDNode
>(U
->getOperand(1))->getZExtValue() == 2) {
5814 SDValue OtherExtend
= SDValue(*U
->use_begin(), 0);
5815 if (OtherExtend
.getOpcode() == ISD::FP_EXTEND
&&
5816 OtherExtend
.getOperand(0) == SDValue(U
, 0) &&
5817 OtherExtend
.getValueType() == MVT::f64
) {
5818 SDValue VExtend
= DAG
.getNode(SystemZISD::VEXTEND
, SDLoc(N
),
5820 DCI
.AddToWorklist(VExtend
.getNode());
5822 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(U
), MVT::f64
,
5823 VExtend
, DAG
.getConstant(1, SDLoc(U
), MVT::i32
));
5824 DCI
.AddToWorklist(Extract1
.getNode());
5825 DAG
.ReplaceAllUsesOfValueWith(OtherExtend
, Extract1
);
5827 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SDLoc(Op0
), MVT::f64
,
5828 VExtend
, DAG
.getConstant(0, SDLoc(Op0
), MVT::i32
));
5837 SDValue
SystemZTargetLowering::combineBSWAP(
5838 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
5839 SelectionDAG
&DAG
= DCI
.DAG
;
5840 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
5841 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
5842 N
->getOperand(0).hasOneUse() &&
5843 canLoadStoreByteSwapped(N
->getValueType(0))) {
5844 SDValue Load
= N
->getOperand(0);
5845 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
5847 // Create the byte-swapping load.
5849 LD
->getChain(), // Chain
5850 LD
->getBasePtr() // Ptr
5852 EVT LoadVT
= N
->getValueType(0);
5853 if (LoadVT
== MVT::i16
)
5856 DAG
.getMemIntrinsicNode(SystemZISD::LRV
, SDLoc(N
),
5857 DAG
.getVTList(LoadVT
, MVT::Other
),
5858 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
5860 // If this is an i16 load, insert the truncate.
5861 SDValue ResVal
= BSLoad
;
5862 if (N
->getValueType(0) == MVT::i16
)
5863 ResVal
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), MVT::i16
, BSLoad
);
5865 // First, combine the bswap away. This makes the value produced by the
5867 DCI
.CombineTo(N
, ResVal
);
5869 // Next, combine the load away, we give it a bogus result value but a real
5870 // chain result. The result value is dead because the bswap is dead.
5871 DCI
.CombineTo(Load
.getNode(), ResVal
, BSLoad
.getValue(1));
5873 // Return N so it doesn't get rechecked!
5874 return SDValue(N
, 0);
5877 // Look through bitcasts that retain the number of vector elements.
5878 SDValue Op
= N
->getOperand(0);
5879 if (Op
.getOpcode() == ISD::BITCAST
&&
5880 Op
.getValueType().isVector() &&
5881 Op
.getOperand(0).getValueType().isVector() &&
5882 Op
.getValueType().getVectorNumElements() ==
5883 Op
.getOperand(0).getValueType().getVectorNumElements())
5884 Op
= Op
.getOperand(0);
5886 // Push BSWAP into a vector insertion if at least one side then simplifies.
5887 if (Op
.getOpcode() == ISD::INSERT_VECTOR_ELT
&& Op
.hasOneUse()) {
5888 SDValue Vec
= Op
.getOperand(0);
5889 SDValue Elt
= Op
.getOperand(1);
5890 SDValue Idx
= Op
.getOperand(2);
5892 if (DAG
.isConstantIntBuildVectorOrConstantInt(Vec
) ||
5893 Vec
.getOpcode() == ISD::BSWAP
|| Vec
.isUndef() ||
5894 DAG
.isConstantIntBuildVectorOrConstantInt(Elt
) ||
5895 Elt
.getOpcode() == ISD::BSWAP
|| Elt
.isUndef() ||
5896 (canLoadStoreByteSwapped(N
->getValueType(0)) &&
5897 ISD::isNON_EXTLoad(Elt
.getNode()) && Elt
.hasOneUse())) {
5898 EVT VecVT
= N
->getValueType(0);
5899 EVT EltVT
= N
->getValueType(0).getVectorElementType();
5900 if (VecVT
!= Vec
.getValueType()) {
5901 Vec
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VecVT
, Vec
);
5902 DCI
.AddToWorklist(Vec
.getNode());
5904 if (EltVT
!= Elt
.getValueType()) {
5905 Elt
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), EltVT
, Elt
);
5906 DCI
.AddToWorklist(Elt
.getNode());
5908 Vec
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VecVT
, Vec
);
5909 DCI
.AddToWorklist(Vec
.getNode());
5910 Elt
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), EltVT
, Elt
);
5911 DCI
.AddToWorklist(Elt
.getNode());
5912 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(N
), VecVT
,
5917 // Push BSWAP into a vector shuffle if at least one side then simplifies.
5918 ShuffleVectorSDNode
*SV
= dyn_cast
<ShuffleVectorSDNode
>(Op
);
5919 if (SV
&& Op
.hasOneUse()) {
5920 SDValue Op0
= Op
.getOperand(0);
5921 SDValue Op1
= Op
.getOperand(1);
5923 if (DAG
.isConstantIntBuildVectorOrConstantInt(Op0
) ||
5924 Op0
.getOpcode() == ISD::BSWAP
|| Op0
.isUndef() ||
5925 DAG
.isConstantIntBuildVectorOrConstantInt(Op1
) ||
5926 Op1
.getOpcode() == ISD::BSWAP
|| Op1
.isUndef()) {
5927 EVT VecVT
= N
->getValueType(0);
5928 if (VecVT
!= Op0
.getValueType()) {
5929 Op0
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VecVT
, Op0
);
5930 DCI
.AddToWorklist(Op0
.getNode());
5932 if (VecVT
!= Op1
.getValueType()) {
5933 Op1
= DAG
.getNode(ISD::BITCAST
, SDLoc(N
), VecVT
, Op1
);
5934 DCI
.AddToWorklist(Op1
.getNode());
5936 Op0
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VecVT
, Op0
);
5937 DCI
.AddToWorklist(Op0
.getNode());
5938 Op1
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VecVT
, Op1
);
5939 DCI
.AddToWorklist(Op1
.getNode());
5940 return DAG
.getVectorShuffle(VecVT
, SDLoc(N
), Op0
, Op1
, SV
->getMask());
5947 static bool combineCCMask(SDValue
&CCReg
, int &CCValid
, int &CCMask
) {
5948 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
5949 // set by the CCReg instruction using the CCValid / CCMask masks,
5950 // If the CCReg instruction is itself a ICMP testing the condition
5951 // code set by some other instruction, see whether we can directly
5952 // use that condition code.
5954 // Verify that we have an ICMP against some constant.
5955 if (CCValid
!= SystemZ::CCMASK_ICMP
)
5957 auto *ICmp
= CCReg
.getNode();
5958 if (ICmp
->getOpcode() != SystemZISD::ICMP
)
5960 auto *CompareLHS
= ICmp
->getOperand(0).getNode();
5961 auto *CompareRHS
= dyn_cast
<ConstantSDNode
>(ICmp
->getOperand(1));
5965 // Optimize the case where CompareLHS is a SELECT_CCMASK.
5966 if (CompareLHS
->getOpcode() == SystemZISD::SELECT_CCMASK
) {
5967 // Verify that we have an appropriate mask for a EQ or NE comparison.
5968 bool Invert
= false;
5969 if (CCMask
== SystemZ::CCMASK_CMP_NE
)
5971 else if (CCMask
!= SystemZ::CCMASK_CMP_EQ
)
5974 // Verify that the ICMP compares against one of select values.
5975 auto *TrueVal
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(0));
5978 auto *FalseVal
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(1));
5981 if (CompareRHS
->getZExtValue() == FalseVal
->getZExtValue())
5983 else if (CompareRHS
->getZExtValue() != TrueVal
->getZExtValue())
5986 // Compute the effective CC mask for the new branch or select.
5987 auto *NewCCValid
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(2));
5988 auto *NewCCMask
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(3));
5989 if (!NewCCValid
|| !NewCCMask
)
5991 CCValid
= NewCCValid
->getZExtValue();
5992 CCMask
= NewCCMask
->getZExtValue();
5996 // Return the updated CCReg link.
5997 CCReg
= CompareLHS
->getOperand(4);
6001 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
6002 if (CompareLHS
->getOpcode() == ISD::SRA
) {
6003 auto *SRACount
= dyn_cast
<ConstantSDNode
>(CompareLHS
->getOperand(1));
6004 if (!SRACount
|| SRACount
->getZExtValue() != 30)
6006 auto *SHL
= CompareLHS
->getOperand(0).getNode();
6007 if (SHL
->getOpcode() != ISD::SHL
)
6009 auto *SHLCount
= dyn_cast
<ConstantSDNode
>(SHL
->getOperand(1));
6010 if (!SHLCount
|| SHLCount
->getZExtValue() != 30 - SystemZ::IPM_CC
)
6012 auto *IPM
= SHL
->getOperand(0).getNode();
6013 if (IPM
->getOpcode() != SystemZISD::IPM
)
6016 // Avoid introducing CC spills (because SRA would clobber CC).
6017 if (!CompareLHS
->hasOneUse())
6019 // Verify that the ICMP compares against zero.
6020 if (CompareRHS
->getZExtValue() != 0)
6023 // Compute the effective CC mask for the new branch or select.
6025 case SystemZ::CCMASK_CMP_EQ
: break;
6026 case SystemZ::CCMASK_CMP_NE
: break;
6027 case SystemZ::CCMASK_CMP_LT
: CCMask
= SystemZ::CCMASK_CMP_GT
; break;
6028 case SystemZ::CCMASK_CMP_GT
: CCMask
= SystemZ::CCMASK_CMP_LT
; break;
6029 case SystemZ::CCMASK_CMP_LE
: CCMask
= SystemZ::CCMASK_CMP_GE
; break;
6030 case SystemZ::CCMASK_CMP_GE
: CCMask
= SystemZ::CCMASK_CMP_LE
; break;
6031 default: return false;
6034 // Return the updated CCReg link.
6035 CCReg
= IPM
->getOperand(0);
6042 SDValue
SystemZTargetLowering::combineBR_CCMASK(
6043 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6044 SelectionDAG
&DAG
= DCI
.DAG
;
6046 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
6047 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
6048 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
6049 if (!CCValid
|| !CCMask
)
6052 int CCValidVal
= CCValid
->getZExtValue();
6053 int CCMaskVal
= CCMask
->getZExtValue();
6054 SDValue Chain
= N
->getOperand(0);
6055 SDValue CCReg
= N
->getOperand(4);
6057 if (combineCCMask(CCReg
, CCValidVal
, CCMaskVal
))
6058 return DAG
.getNode(SystemZISD::BR_CCMASK
, SDLoc(N
), N
->getValueType(0),
6060 DAG
.getConstant(CCValidVal
, SDLoc(N
), MVT::i32
),
6061 DAG
.getConstant(CCMaskVal
, SDLoc(N
), MVT::i32
),
6062 N
->getOperand(3), CCReg
);
6066 SDValue
SystemZTargetLowering::combineSELECT_CCMASK(
6067 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6068 SelectionDAG
&DAG
= DCI
.DAG
;
6070 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
6071 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
6072 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3));
6073 if (!CCValid
|| !CCMask
)
6076 int CCValidVal
= CCValid
->getZExtValue();
6077 int CCMaskVal
= CCMask
->getZExtValue();
6078 SDValue CCReg
= N
->getOperand(4);
6080 if (combineCCMask(CCReg
, CCValidVal
, CCMaskVal
))
6081 return DAG
.getNode(SystemZISD::SELECT_CCMASK
, SDLoc(N
), N
->getValueType(0),
6084 DAG
.getConstant(CCValidVal
, SDLoc(N
), MVT::i32
),
6085 DAG
.getConstant(CCMaskVal
, SDLoc(N
), MVT::i32
),
6091 SDValue
SystemZTargetLowering::combineGET_CCMASK(
6092 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6094 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
6095 auto *CCValid
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
6096 auto *CCMask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
6097 if (!CCValid
|| !CCMask
)
6099 int CCValidVal
= CCValid
->getZExtValue();
6100 int CCMaskVal
= CCMask
->getZExtValue();
6102 SDValue Select
= N
->getOperand(0);
6103 if (Select
->getOpcode() != SystemZISD::SELECT_CCMASK
)
6106 auto *SelectCCValid
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(2));
6107 auto *SelectCCMask
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(3));
6108 if (!SelectCCValid
|| !SelectCCMask
)
6110 int SelectCCValidVal
= SelectCCValid
->getZExtValue();
6111 int SelectCCMaskVal
= SelectCCMask
->getZExtValue();
6113 auto *TrueVal
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(0));
6114 auto *FalseVal
= dyn_cast
<ConstantSDNode
>(Select
->getOperand(1));
6115 if (!TrueVal
|| !FalseVal
)
6117 if (TrueVal
->getZExtValue() != 0 && FalseVal
->getZExtValue() == 0)
6119 else if (TrueVal
->getZExtValue() == 0 && FalseVal
->getZExtValue() != 0)
6120 SelectCCMaskVal
^= SelectCCValidVal
;
6124 if (SelectCCValidVal
& ~CCValidVal
)
6126 if (SelectCCMaskVal
!= (CCMaskVal
& SelectCCValidVal
))
6129 return Select
->getOperand(4);
6132 SDValue
SystemZTargetLowering::combineIntDIVREM(
6133 SDNode
*N
, DAGCombinerInfo
&DCI
) const {
6134 SelectionDAG
&DAG
= DCI
.DAG
;
6135 EVT VT
= N
->getValueType(0);
6136 // In the case where the divisor is a vector of constants a cheaper
6137 // sequence of instructions can replace the divide. BuildSDIV is called to
6138 // do this during DAG combining, but it only succeeds when it can build a
6139 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
6140 // since it is not Legal but Custom it can only happen before
6141 // legalization. Therefore we must scalarize this early before Combine
6142 // 1. For widened vectors, this is already the result of type legalization.
6143 if (DCI
.Level
== BeforeLegalizeTypes
&& VT
.isVector() && isTypeLegal(VT
) &&
6144 DAG
.isConstantIntBuildVectorOrConstantInt(N
->getOperand(1)))
6145 return DAG
.UnrollVectorOp(N
);
6149 SDValue
SystemZTargetLowering::unwrapAddress(SDValue N
) const {
6150 if (N
->getOpcode() == SystemZISD::PCREL_WRAPPER
)
6151 return N
->getOperand(0);
6155 SDValue
SystemZTargetLowering::PerformDAGCombine(SDNode
*N
,
6156 DAGCombinerInfo
&DCI
) const {
6157 switch(N
->getOpcode()) {
6159 case ISD::ZERO_EXTEND
: return combineZERO_EXTEND(N
, DCI
);
6160 case ISD::SIGN_EXTEND
: return combineSIGN_EXTEND(N
, DCI
);
6161 case ISD::SIGN_EXTEND_INREG
: return combineSIGN_EXTEND_INREG(N
, DCI
);
6162 case SystemZISD::MERGE_HIGH
:
6163 case SystemZISD::MERGE_LOW
: return combineMERGE(N
, DCI
);
6164 case ISD::LOAD
: return combineLOAD(N
, DCI
);
6165 case ISD::STORE
: return combineSTORE(N
, DCI
);
6166 case ISD::VECTOR_SHUFFLE
: return combineVECTOR_SHUFFLE(N
, DCI
);
6167 case ISD::EXTRACT_VECTOR_ELT
: return combineEXTRACT_VECTOR_ELT(N
, DCI
);
6168 case SystemZISD::JOIN_DWORDS
: return combineJOIN_DWORDS(N
, DCI
);
6169 case ISD::FP_ROUND
: return combineFP_ROUND(N
, DCI
);
6170 case ISD::FP_EXTEND
: return combineFP_EXTEND(N
, DCI
);
6171 case ISD::BSWAP
: return combineBSWAP(N
, DCI
);
6172 case SystemZISD::BR_CCMASK
: return combineBR_CCMASK(N
, DCI
);
6173 case SystemZISD::SELECT_CCMASK
: return combineSELECT_CCMASK(N
, DCI
);
6174 case SystemZISD::GET_CCMASK
: return combineGET_CCMASK(N
, DCI
);
6178 case ISD::UREM
: return combineIntDIVREM(N
, DCI
);
6184 // Return the demanded elements for the OpNo source operand of Op. DemandedElts
6186 static APInt
getDemandedSrcElements(SDValue Op
, const APInt
&DemandedElts
,
6188 EVT VT
= Op
.getValueType();
6189 unsigned NumElts
= (VT
.isVector() ? VT
.getVectorNumElements() : 1);
6191 unsigned Opcode
= Op
.getOpcode();
6192 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
6193 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6195 case Intrinsic::s390_vpksh
: // PACKS
6196 case Intrinsic::s390_vpksf
:
6197 case Intrinsic::s390_vpksg
:
6198 case Intrinsic::s390_vpkshs
: // PACKS_CC
6199 case Intrinsic::s390_vpksfs
:
6200 case Intrinsic::s390_vpksgs
:
6201 case Intrinsic::s390_vpklsh
: // PACKLS
6202 case Intrinsic::s390_vpklsf
:
6203 case Intrinsic::s390_vpklsg
:
6204 case Intrinsic::s390_vpklshs
: // PACKLS_CC
6205 case Intrinsic::s390_vpklsfs
:
6206 case Intrinsic::s390_vpklsgs
:
6207 // VECTOR PACK truncates the elements of two source vectors into one.
6208 SrcDemE
= DemandedElts
;
6210 SrcDemE
.lshrInPlace(NumElts
/ 2);
6211 SrcDemE
= SrcDemE
.trunc(NumElts
/ 2);
6213 // VECTOR UNPACK extends half the elements of the source vector.
6214 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
6215 case Intrinsic::s390_vuphh
:
6216 case Intrinsic::s390_vuphf
:
6217 case Intrinsic::s390_vuplhb
: // VECTOR UNPACK LOGICAL HIGH
6218 case Intrinsic::s390_vuplhh
:
6219 case Intrinsic::s390_vuplhf
:
6220 SrcDemE
= APInt(NumElts
* 2, 0);
6221 SrcDemE
.insertBits(DemandedElts
, 0);
6223 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
6224 case Intrinsic::s390_vuplhw
:
6225 case Intrinsic::s390_vuplf
:
6226 case Intrinsic::s390_vupllb
: // VECTOR UNPACK LOGICAL LOW
6227 case Intrinsic::s390_vupllh
:
6228 case Intrinsic::s390_vupllf
:
6229 SrcDemE
= APInt(NumElts
* 2, 0);
6230 SrcDemE
.insertBits(DemandedElts
, NumElts
);
6232 case Intrinsic::s390_vpdi
: {
6233 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
6234 SrcDemE
= APInt(NumElts
, 0);
6235 if (!DemandedElts
[OpNo
- 1])
6237 unsigned Mask
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
6238 unsigned MaskBit
= ((OpNo
- 1) ? 1 : 4);
6239 // Demand input element 0 or 1, given by the mask bit value.
6240 SrcDemE
.setBit((Mask
& MaskBit
)? 1 : 0);
6243 case Intrinsic::s390_vsldb
: {
6244 // VECTOR SHIFT LEFT DOUBLE BY BYTE
6245 assert(VT
== MVT::v16i8
&& "Unexpected type.");
6246 unsigned FirstIdx
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
6247 assert (FirstIdx
> 0 && FirstIdx
< 16 && "Unused operand.");
6248 unsigned NumSrc0Els
= 16 - FirstIdx
;
6249 SrcDemE
= APInt(NumElts
, 0);
6251 APInt DemEls
= DemandedElts
.trunc(NumSrc0Els
);
6252 SrcDemE
.insertBits(DemEls
, FirstIdx
);
6254 APInt DemEls
= DemandedElts
.lshr(NumSrc0Els
);
6255 SrcDemE
.insertBits(DemEls
, 0);
6259 case Intrinsic::s390_vperm
:
6260 SrcDemE
= APInt(NumElts
, 1);
6263 llvm_unreachable("Unhandled intrinsic.");
6268 case SystemZISD::JOIN_DWORDS
:
6270 SrcDemE
= APInt(1, 1);
6272 case SystemZISD::SELECT_CCMASK
:
6273 SrcDemE
= DemandedElts
;
6276 llvm_unreachable("Unhandled opcode.");
6283 static void computeKnownBitsBinOp(const SDValue Op
, KnownBits
&Known
,
6284 const APInt
&DemandedElts
,
6285 const SelectionDAG
&DAG
, unsigned Depth
,
6287 APInt Src0DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
);
6288 APInt Src1DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
+ 1);
6289 KnownBits LHSKnown
=
6290 DAG
.computeKnownBits(Op
.getOperand(OpNo
), Src0DemE
, Depth
+ 1);
6291 KnownBits RHSKnown
=
6292 DAG
.computeKnownBits(Op
.getOperand(OpNo
+ 1), Src1DemE
, Depth
+ 1);
6293 Known
.Zero
= LHSKnown
.Zero
& RHSKnown
.Zero
;
6294 Known
.One
= LHSKnown
.One
& RHSKnown
.One
;
6298 SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
6300 const APInt
&DemandedElts
,
6301 const SelectionDAG
&DAG
,
6302 unsigned Depth
) const {
6305 // Intrinsic CC result is returned in the two low bits.
6306 unsigned tmp0
, tmp1
; // not used
6307 if (Op
.getResNo() == 1 && isIntrinsicWithCC(Op
, tmp0
, tmp1
)) {
6308 Known
.Zero
.setBitsFrom(2);
6311 EVT VT
= Op
.getValueType();
6312 if (Op
.getResNo() != 0 || VT
== MVT::Untyped
)
6314 assert (Known
.getBitWidth() == VT
.getScalarSizeInBits() &&
6315 "KnownBits does not match VT in bitwidth");
6316 assert ((!VT
.isVector() ||
6317 (DemandedElts
.getBitWidth() == VT
.getVectorNumElements())) &&
6318 "DemandedElts does not match VT number of elements");
6319 unsigned BitWidth
= Known
.getBitWidth();
6320 unsigned Opcode
= Op
.getOpcode();
6321 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
6322 bool IsLogical
= false;
6323 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6325 case Intrinsic::s390_vpksh
: // PACKS
6326 case Intrinsic::s390_vpksf
:
6327 case Intrinsic::s390_vpksg
:
6328 case Intrinsic::s390_vpkshs
: // PACKS_CC
6329 case Intrinsic::s390_vpksfs
:
6330 case Intrinsic::s390_vpksgs
:
6331 case Intrinsic::s390_vpklsh
: // PACKLS
6332 case Intrinsic::s390_vpklsf
:
6333 case Intrinsic::s390_vpklsg
:
6334 case Intrinsic::s390_vpklshs
: // PACKLS_CC
6335 case Intrinsic::s390_vpklsfs
:
6336 case Intrinsic::s390_vpklsgs
:
6337 case Intrinsic::s390_vpdi
:
6338 case Intrinsic::s390_vsldb
:
6339 case Intrinsic::s390_vperm
:
6340 computeKnownBitsBinOp(Op
, Known
, DemandedElts
, DAG
, Depth
, 1);
6342 case Intrinsic::s390_vuplhb
: // VECTOR UNPACK LOGICAL HIGH
6343 case Intrinsic::s390_vuplhh
:
6344 case Intrinsic::s390_vuplhf
:
6345 case Intrinsic::s390_vupllb
: // VECTOR UNPACK LOGICAL LOW
6346 case Intrinsic::s390_vupllh
:
6347 case Intrinsic::s390_vupllf
:
6350 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
6351 case Intrinsic::s390_vuphh
:
6352 case Intrinsic::s390_vuphf
:
6353 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
6354 case Intrinsic::s390_vuplhw
:
6355 case Intrinsic::s390_vuplf
: {
6356 SDValue SrcOp
= Op
.getOperand(1);
6357 APInt SrcDemE
= getDemandedSrcElements(Op
, DemandedElts
, 0);
6358 Known
= DAG
.computeKnownBits(SrcOp
, SrcDemE
, Depth
+ 1);
6360 Known
= Known
.zext(BitWidth
, true);
6362 Known
= Known
.sext(BitWidth
);
6370 case SystemZISD::JOIN_DWORDS
:
6371 case SystemZISD::SELECT_CCMASK
:
6372 computeKnownBitsBinOp(Op
, Known
, DemandedElts
, DAG
, Depth
, 0);
6374 case SystemZISD::REPLICATE
: {
6375 SDValue SrcOp
= Op
.getOperand(0);
6376 Known
= DAG
.computeKnownBits(SrcOp
, Depth
+ 1);
6377 if (Known
.getBitWidth() < BitWidth
&& isa
<ConstantSDNode
>(SrcOp
))
6378 Known
= Known
.sext(BitWidth
); // VREPI sign extends the immedate.
6386 // Known has the width of the source operand(s). Adjust if needed to match
6387 // the passed bitwidth.
6388 if (Known
.getBitWidth() != BitWidth
)
6389 Known
= Known
.zextOrTrunc(BitWidth
, false);
6392 static unsigned computeNumSignBitsBinOp(SDValue Op
, const APInt
&DemandedElts
,
6393 const SelectionDAG
&DAG
, unsigned Depth
,
6395 APInt Src0DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
);
6396 unsigned LHS
= DAG
.ComputeNumSignBits(Op
.getOperand(OpNo
), Src0DemE
, Depth
+ 1);
6397 if (LHS
== 1) return 1; // Early out.
6398 APInt Src1DemE
= getDemandedSrcElements(Op
, DemandedElts
, OpNo
+ 1);
6399 unsigned RHS
= DAG
.ComputeNumSignBits(Op
.getOperand(OpNo
+ 1), Src1DemE
, Depth
+ 1);
6400 if (RHS
== 1) return 1; // Early out.
6401 unsigned Common
= std::min(LHS
, RHS
);
6402 unsigned SrcBitWidth
= Op
.getOperand(OpNo
).getScalarValueSizeInBits();
6403 EVT VT
= Op
.getValueType();
6404 unsigned VTBits
= VT
.getScalarSizeInBits();
6405 if (SrcBitWidth
> VTBits
) { // PACK
6406 unsigned SrcExtraBits
= SrcBitWidth
- VTBits
;
6407 if (Common
> SrcExtraBits
)
6408 return (Common
- SrcExtraBits
);
6411 assert (SrcBitWidth
== VTBits
&& "Expected operands of same bitwidth.");
6416 SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
6417 SDValue Op
, const APInt
&DemandedElts
, const SelectionDAG
&DAG
,
6418 unsigned Depth
) const {
6419 if (Op
.getResNo() != 0)
6421 unsigned Opcode
= Op
.getOpcode();
6422 if (Opcode
== ISD::INTRINSIC_WO_CHAIN
) {
6423 unsigned Id
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
6425 case Intrinsic::s390_vpksh
: // PACKS
6426 case Intrinsic::s390_vpksf
:
6427 case Intrinsic::s390_vpksg
:
6428 case Intrinsic::s390_vpkshs
: // PACKS_CC
6429 case Intrinsic::s390_vpksfs
:
6430 case Intrinsic::s390_vpksgs
:
6431 case Intrinsic::s390_vpklsh
: // PACKLS
6432 case Intrinsic::s390_vpklsf
:
6433 case Intrinsic::s390_vpklsg
:
6434 case Intrinsic::s390_vpklshs
: // PACKLS_CC
6435 case Intrinsic::s390_vpklsfs
:
6436 case Intrinsic::s390_vpklsgs
:
6437 case Intrinsic::s390_vpdi
:
6438 case Intrinsic::s390_vsldb
:
6439 case Intrinsic::s390_vperm
:
6440 return computeNumSignBitsBinOp(Op
, DemandedElts
, DAG
, Depth
, 1);
6441 case Intrinsic::s390_vuphb
: // VECTOR UNPACK HIGH
6442 case Intrinsic::s390_vuphh
:
6443 case Intrinsic::s390_vuphf
:
6444 case Intrinsic::s390_vuplb
: // VECTOR UNPACK LOW
6445 case Intrinsic::s390_vuplhw
:
6446 case Intrinsic::s390_vuplf
: {
6447 SDValue PackedOp
= Op
.getOperand(1);
6448 APInt SrcDemE
= getDemandedSrcElements(Op
, DemandedElts
, 1);
6449 unsigned Tmp
= DAG
.ComputeNumSignBits(PackedOp
, SrcDemE
, Depth
+ 1);
6450 EVT VT
= Op
.getValueType();
6451 unsigned VTBits
= VT
.getScalarSizeInBits();
6452 Tmp
+= VTBits
- PackedOp
.getScalarValueSizeInBits();
6460 case SystemZISD::SELECT_CCMASK
:
6461 return computeNumSignBitsBinOp(Op
, DemandedElts
, DAG
, Depth
, 0);
6470 //===----------------------------------------------------------------------===//
6472 //===----------------------------------------------------------------------===//
6474 // Create a new basic block after MBB.
6475 static MachineBasicBlock
*emitBlockAfter(MachineBasicBlock
*MBB
) {
6476 MachineFunction
&MF
= *MBB
->getParent();
6477 MachineBasicBlock
*NewMBB
= MF
.CreateMachineBasicBlock(MBB
->getBasicBlock());
6478 MF
.insert(std::next(MachineFunction::iterator(MBB
)), NewMBB
);
6482 // Split MBB after MI and return the new block (the one that contains
6483 // instructions after MI).
6484 static MachineBasicBlock
*splitBlockAfter(MachineBasicBlock::iterator MI
,
6485 MachineBasicBlock
*MBB
) {
6486 MachineBasicBlock
*NewMBB
= emitBlockAfter(MBB
);
6487 NewMBB
->splice(NewMBB
->begin(), MBB
,
6488 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
6489 NewMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
6493 // Split MBB before MI and return the new block (the one that contains MI).
6494 static MachineBasicBlock
*splitBlockBefore(MachineBasicBlock::iterator MI
,
6495 MachineBasicBlock
*MBB
) {
6496 MachineBasicBlock
*NewMBB
= emitBlockAfter(MBB
);
6497 NewMBB
->splice(NewMBB
->begin(), MBB
, MI
, MBB
->end());
6498 NewMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
6502 // Force base value Base into a register before MI. Return the register.
6503 static Register
forceReg(MachineInstr
&MI
, MachineOperand
&Base
,
6504 const SystemZInstrInfo
*TII
) {
6506 return Base
.getReg();
6508 MachineBasicBlock
*MBB
= MI
.getParent();
6509 MachineFunction
&MF
= *MBB
->getParent();
6510 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6512 Register Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
6513 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LA
), Reg
)
6520 // The CC operand of MI might be missing a kill marker because there
6521 // were multiple uses of CC, and ISel didn't know which to mark.
6522 // Figure out whether MI should have had a kill marker.
6523 static bool checkCCKill(MachineInstr
&MI
, MachineBasicBlock
*MBB
) {
6524 // Scan forward through BB for a use/def of CC.
6525 MachineBasicBlock::iterator
miI(std::next(MachineBasicBlock::iterator(MI
)));
6526 for (MachineBasicBlock::iterator miE
= MBB
->end(); miI
!= miE
; ++miI
) {
6527 const MachineInstr
& mi
= *miI
;
6528 if (mi
.readsRegister(SystemZ::CC
))
6530 if (mi
.definesRegister(SystemZ::CC
))
6531 break; // Should have kill-flag - update below.
6534 // If we hit the end of the block, check whether CC is live into a
6536 if (miI
== MBB
->end()) {
6537 for (auto SI
= MBB
->succ_begin(), SE
= MBB
->succ_end(); SI
!= SE
; ++SI
)
6538 if ((*SI
)->isLiveIn(SystemZ::CC
))
6545 // Return true if it is OK for this Select pseudo-opcode to be cascaded
6546 // together with other Select pseudo-opcodes into a single basic-block with
6547 // a conditional jump around it.
6548 static bool isSelectPseudo(MachineInstr
&MI
) {
6549 switch (MI
.getOpcode()) {
6550 case SystemZ::Select32
:
6551 case SystemZ::Select64
:
6552 case SystemZ::SelectF32
:
6553 case SystemZ::SelectF64
:
6554 case SystemZ::SelectF128
:
6555 case SystemZ::SelectVR32
:
6556 case SystemZ::SelectVR64
:
6557 case SystemZ::SelectVR128
:
6565 // Helper function, which inserts PHI functions into SinkMBB:
6566 // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
6567 // where %FalseValue(i) and %TrueValue(i) are taken from the consequent Selects
6568 // in [MIItBegin, MIItEnd) range.
6569 static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin
,
6570 MachineBasicBlock::iterator MIItEnd
,
6571 MachineBasicBlock
*TrueMBB
,
6572 MachineBasicBlock
*FalseMBB
,
6573 MachineBasicBlock
*SinkMBB
) {
6574 MachineFunction
*MF
= TrueMBB
->getParent();
6575 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
6577 unsigned CCValid
= MIItBegin
->getOperand(3).getImm();
6578 unsigned CCMask
= MIItBegin
->getOperand(4).getImm();
6579 DebugLoc DL
= MIItBegin
->getDebugLoc();
6581 MachineBasicBlock::iterator SinkInsertionPoint
= SinkMBB
->begin();
6583 // As we are creating the PHIs, we have to be careful if there is more than
6584 // one. Later Selects may reference the results of earlier Selects, but later
6585 // PHIs have to reference the individual true/false inputs from earlier PHIs.
6586 // That also means that PHI construction must work forward from earlier to
6587 // later, and that the code must maintain a mapping from earlier PHI's
6588 // destination registers, and the registers that went into the PHI.
6589 DenseMap
<unsigned, std::pair
<unsigned, unsigned>> RegRewriteTable
;
6591 for (MachineBasicBlock::iterator MIIt
= MIItBegin
; MIIt
!= MIItEnd
;
6592 MIIt
= skipDebugInstructionsForward(++MIIt
, MIItEnd
)) {
6593 Register DestReg
= MIIt
->getOperand(0).getReg();
6594 Register TrueReg
= MIIt
->getOperand(1).getReg();
6595 Register FalseReg
= MIIt
->getOperand(2).getReg();
6597 // If this Select we are generating is the opposite condition from
6598 // the jump we generated, then we have to swap the operands for the
6599 // PHI that is going to be generated.
6600 if (MIIt
->getOperand(4).getImm() == (CCValid
^ CCMask
))
6601 std::swap(TrueReg
, FalseReg
);
6603 if (RegRewriteTable
.find(TrueReg
) != RegRewriteTable
.end())
6604 TrueReg
= RegRewriteTable
[TrueReg
].first
;
6606 if (RegRewriteTable
.find(FalseReg
) != RegRewriteTable
.end())
6607 FalseReg
= RegRewriteTable
[FalseReg
].second
;
6609 BuildMI(*SinkMBB
, SinkInsertionPoint
, DL
, TII
->get(SystemZ::PHI
), DestReg
)
6610 .addReg(TrueReg
).addMBB(TrueMBB
)
6611 .addReg(FalseReg
).addMBB(FalseMBB
);
6613 // Add this PHI to the rewrite table.
6614 RegRewriteTable
[DestReg
] = std::make_pair(TrueReg
, FalseReg
);
6617 MF
->getProperties().reset(MachineFunctionProperties::Property::NoPHIs
);
6620 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
6622 SystemZTargetLowering::emitSelect(MachineInstr
&MI
,
6623 MachineBasicBlock
*MBB
) const {
6624 const SystemZInstrInfo
*TII
=
6625 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6627 unsigned CCValid
= MI
.getOperand(3).getImm();
6628 unsigned CCMask
= MI
.getOperand(4).getImm();
6629 DebugLoc DL
= MI
.getDebugLoc();
6631 // If we have a sequence of Select* pseudo instructions using the
6632 // same condition code value, we want to expand all of them into
6633 // a single pair of basic blocks using the same condition.
6634 MachineInstr
*LastMI
= &MI
;
6635 MachineBasicBlock::iterator NextMIIt
= skipDebugInstructionsForward(
6636 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
6638 if (isSelectPseudo(MI
))
6639 while (NextMIIt
!= MBB
->end() && isSelectPseudo(*NextMIIt
) &&
6640 NextMIIt
->getOperand(3).getImm() == CCValid
&&
6641 (NextMIIt
->getOperand(4).getImm() == CCMask
||
6642 NextMIIt
->getOperand(4).getImm() == (CCValid
^ CCMask
))) {
6643 LastMI
= &*NextMIIt
;
6644 NextMIIt
= skipDebugInstructionsForward(++NextMIIt
, MBB
->end());
6647 MachineBasicBlock
*StartMBB
= MBB
;
6648 MachineBasicBlock
*JoinMBB
= splitBlockBefore(MI
, MBB
);
6649 MachineBasicBlock
*FalseMBB
= emitBlockAfter(StartMBB
);
6651 // Unless CC was killed in the last Select instruction, mark it as
6652 // live-in to both FalseMBB and JoinMBB.
6653 if (!LastMI
->killsRegister(SystemZ::CC
) && !checkCCKill(*LastMI
, JoinMBB
)) {
6654 FalseMBB
->addLiveIn(SystemZ::CC
);
6655 JoinMBB
->addLiveIn(SystemZ::CC
);
6659 // BRC CCMask, JoinMBB
6660 // # fallthrough to FalseMBB
6662 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6663 .addImm(CCValid
).addImm(CCMask
).addMBB(JoinMBB
);
6664 MBB
->addSuccessor(JoinMBB
);
6665 MBB
->addSuccessor(FalseMBB
);
6668 // # fallthrough to JoinMBB
6670 MBB
->addSuccessor(JoinMBB
);
6673 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
6676 MachineBasicBlock::iterator MIItBegin
= MachineBasicBlock::iterator(MI
);
6677 MachineBasicBlock::iterator MIItEnd
= skipDebugInstructionsForward(
6678 std::next(MachineBasicBlock::iterator(LastMI
)), MBB
->end());
6679 createPHIsForSelects(MIItBegin
, MIItEnd
, StartMBB
, FalseMBB
, MBB
);
6681 StartMBB
->erase(MIItBegin
, MIItEnd
);
6685 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
6686 // StoreOpcode is the store to use and Invert says whether the store should
6687 // happen when the condition is false rather than true. If a STORE ON
6688 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
6689 MachineBasicBlock
*SystemZTargetLowering::emitCondStore(MachineInstr
&MI
,
6690 MachineBasicBlock
*MBB
,
6691 unsigned StoreOpcode
,
6692 unsigned STOCOpcode
,
6693 bool Invert
) const {
6694 const SystemZInstrInfo
*TII
=
6695 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6697 Register SrcReg
= MI
.getOperand(0).getReg();
6698 MachineOperand Base
= MI
.getOperand(1);
6699 int64_t Disp
= MI
.getOperand(2).getImm();
6700 Register IndexReg
= MI
.getOperand(3).getReg();
6701 unsigned CCValid
= MI
.getOperand(4).getImm();
6702 unsigned CCMask
= MI
.getOperand(5).getImm();
6703 DebugLoc DL
= MI
.getDebugLoc();
6705 StoreOpcode
= TII
->getOpcodeForOffset(StoreOpcode
, Disp
);
6707 // Use STOCOpcode if possible. We could use different store patterns in
6708 // order to avoid matching the index register, but the performance trade-offs
6709 // might be more complicated in that case.
6710 if (STOCOpcode
&& !IndexReg
&& Subtarget
.hasLoadStoreOnCond()) {
6714 // ISel pattern matching also adds a load memory operand of the same
6715 // address, so take special care to find the storing memory operand.
6716 MachineMemOperand
*MMO
= nullptr;
6717 for (auto *I
: MI
.memoperands())
6723 BuildMI(*MBB
, MI
, DL
, TII
->get(STOCOpcode
))
6729 .addMemOperand(MMO
);
6731 MI
.eraseFromParent();
6735 // Get the condition needed to branch around the store.
6739 MachineBasicBlock
*StartMBB
= MBB
;
6740 MachineBasicBlock
*JoinMBB
= splitBlockBefore(MI
, MBB
);
6741 MachineBasicBlock
*FalseMBB
= emitBlockAfter(StartMBB
);
6743 // Unless CC was killed in the CondStore instruction, mark it as
6744 // live-in to both FalseMBB and JoinMBB.
6745 if (!MI
.killsRegister(SystemZ::CC
) && !checkCCKill(MI
, JoinMBB
)) {
6746 FalseMBB
->addLiveIn(SystemZ::CC
);
6747 JoinMBB
->addLiveIn(SystemZ::CC
);
6751 // BRC CCMask, JoinMBB
6752 // # fallthrough to FalseMBB
6754 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6755 .addImm(CCValid
).addImm(CCMask
).addMBB(JoinMBB
);
6756 MBB
->addSuccessor(JoinMBB
);
6757 MBB
->addSuccessor(FalseMBB
);
6760 // store %SrcReg, %Disp(%Index,%Base)
6761 // # fallthrough to JoinMBB
6763 BuildMI(MBB
, DL
, TII
->get(StoreOpcode
))
6768 MBB
->addSuccessor(JoinMBB
);
6770 MI
.eraseFromParent();
6774 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
6775 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
6776 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
6777 // BitSize is the width of the field in bits, or 0 if this is a partword
6778 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
6779 // is one of the operands. Invert says whether the field should be
6780 // inverted after performing BinOpcode (e.g. for NAND).
6781 MachineBasicBlock
*SystemZTargetLowering::emitAtomicLoadBinary(
6782 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned BinOpcode
,
6783 unsigned BitSize
, bool Invert
) const {
6784 MachineFunction
&MF
= *MBB
->getParent();
6785 const SystemZInstrInfo
*TII
=
6786 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6787 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6788 bool IsSubWord
= (BitSize
< 32);
6790 // Extract the operands. Base can be a register or a frame index.
6791 // Src2 can be a register or immediate.
6792 Register Dest
= MI
.getOperand(0).getReg();
6793 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
6794 int64_t Disp
= MI
.getOperand(2).getImm();
6795 MachineOperand Src2
= earlyUseOperand(MI
.getOperand(3));
6796 Register BitShift
= IsSubWord
? MI
.getOperand(4).getReg() : Register();
6797 Register NegBitShift
= IsSubWord
? MI
.getOperand(5).getReg() : Register();
6798 DebugLoc DL
= MI
.getDebugLoc();
6800 BitSize
= MI
.getOperand(6).getImm();
6802 // Subword operations use 32-bit registers.
6803 const TargetRegisterClass
*RC
= (BitSize
<= 32 ?
6804 &SystemZ::GR32BitRegClass
:
6805 &SystemZ::GR64BitRegClass
);
6806 unsigned LOpcode
= BitSize
<= 32 ? SystemZ::L
: SystemZ::LG
;
6807 unsigned CSOpcode
= BitSize
<= 32 ? SystemZ::CS
: SystemZ::CSG
;
6809 // Get the right opcodes for the displacement.
6810 LOpcode
= TII
->getOpcodeForOffset(LOpcode
, Disp
);
6811 CSOpcode
= TII
->getOpcodeForOffset(CSOpcode
, Disp
);
6812 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
6814 // Create virtual registers for temporary results.
6815 Register OrigVal
= MRI
.createVirtualRegister(RC
);
6816 Register OldVal
= MRI
.createVirtualRegister(RC
);
6817 Register NewVal
= (BinOpcode
|| IsSubWord
?
6818 MRI
.createVirtualRegister(RC
) : Src2
.getReg());
6819 Register RotatedOldVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : OldVal
);
6820 Register RotatedNewVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : NewVal
);
6822 // Insert a basic block for the main loop.
6823 MachineBasicBlock
*StartMBB
= MBB
;
6824 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
6825 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
6829 // %OrigVal = L Disp(%Base)
6830 // # fall through to LoopMMB
6832 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigVal
).add(Base
).addImm(Disp
).addReg(0);
6833 MBB
->addSuccessor(LoopMBB
);
6836 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
6837 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
6838 // %RotatedNewVal = OP %RotatedOldVal, %Src2
6839 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
6840 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
6842 // # fall through to DoneMMB
6844 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
6845 .addReg(OrigVal
).addMBB(StartMBB
)
6846 .addReg(Dest
).addMBB(LoopMBB
);
6848 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), RotatedOldVal
)
6849 .addReg(OldVal
).addReg(BitShift
).addImm(0);
6851 // Perform the operation normally and then invert every bit of the field.
6852 Register Tmp
= MRI
.createVirtualRegister(RC
);
6853 BuildMI(MBB
, DL
, TII
->get(BinOpcode
), Tmp
).addReg(RotatedOldVal
).add(Src2
);
6855 // XILF with the upper BitSize bits set.
6856 BuildMI(MBB
, DL
, TII
->get(SystemZ::XILF
), RotatedNewVal
)
6857 .addReg(Tmp
).addImm(-1U << (32 - BitSize
));
6859 // Use LCGR and add -1 to the result, which is more compact than
6860 // an XILF, XILH pair.
6861 Register Tmp2
= MRI
.createVirtualRegister(RC
);
6862 BuildMI(MBB
, DL
, TII
->get(SystemZ::LCGR
), Tmp2
).addReg(Tmp
);
6863 BuildMI(MBB
, DL
, TII
->get(SystemZ::AGHI
), RotatedNewVal
)
6864 .addReg(Tmp2
).addImm(-1);
6866 } else if (BinOpcode
)
6867 // A simply binary operation.
6868 BuildMI(MBB
, DL
, TII
->get(BinOpcode
), RotatedNewVal
)
6869 .addReg(RotatedOldVal
)
6872 // Use RISBG to rotate Src2 into position and use it to replace the
6873 // field in RotatedOldVal.
6874 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RotatedNewVal
)
6875 .addReg(RotatedOldVal
).addReg(Src2
.getReg())
6876 .addImm(32).addImm(31 + BitSize
).addImm(32 - BitSize
);
6878 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), NewVal
)
6879 .addReg(RotatedNewVal
).addReg(NegBitShift
).addImm(0);
6880 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), Dest
)
6885 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6886 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
6887 MBB
->addSuccessor(LoopMBB
);
6888 MBB
->addSuccessor(DoneMBB
);
6890 MI
.eraseFromParent();
6894 // Implement EmitInstrWithCustomInserter for pseudo
6895 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
6896 // instruction that should be used to compare the current field with the
6897 // minimum or maximum value. KeepOldMask is the BRC condition-code mask
6898 // for when the current field should be kept. BitSize is the width of
6899 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
6900 MachineBasicBlock
*SystemZTargetLowering::emitAtomicLoadMinMax(
6901 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned CompareOpcode
,
6902 unsigned KeepOldMask
, unsigned BitSize
) const {
6903 MachineFunction
&MF
= *MBB
->getParent();
6904 const SystemZInstrInfo
*TII
=
6905 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
6906 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
6907 bool IsSubWord
= (BitSize
< 32);
6909 // Extract the operands. Base can be a register or a frame index.
6910 Register Dest
= MI
.getOperand(0).getReg();
6911 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
6912 int64_t Disp
= MI
.getOperand(2).getImm();
6913 Register Src2
= MI
.getOperand(3).getReg();
6914 Register BitShift
= (IsSubWord
? MI
.getOperand(4).getReg() : Register());
6915 Register NegBitShift
= (IsSubWord
? MI
.getOperand(5).getReg() : Register());
6916 DebugLoc DL
= MI
.getDebugLoc();
6918 BitSize
= MI
.getOperand(6).getImm();
6920 // Subword operations use 32-bit registers.
6921 const TargetRegisterClass
*RC
= (BitSize
<= 32 ?
6922 &SystemZ::GR32BitRegClass
:
6923 &SystemZ::GR64BitRegClass
);
6924 unsigned LOpcode
= BitSize
<= 32 ? SystemZ::L
: SystemZ::LG
;
6925 unsigned CSOpcode
= BitSize
<= 32 ? SystemZ::CS
: SystemZ::CSG
;
6927 // Get the right opcodes for the displacement.
6928 LOpcode
= TII
->getOpcodeForOffset(LOpcode
, Disp
);
6929 CSOpcode
= TII
->getOpcodeForOffset(CSOpcode
, Disp
);
6930 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
6932 // Create virtual registers for temporary results.
6933 Register OrigVal
= MRI
.createVirtualRegister(RC
);
6934 Register OldVal
= MRI
.createVirtualRegister(RC
);
6935 Register NewVal
= MRI
.createVirtualRegister(RC
);
6936 Register RotatedOldVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : OldVal
);
6937 Register RotatedAltVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : Src2
);
6938 Register RotatedNewVal
= (IsSubWord
? MRI
.createVirtualRegister(RC
) : NewVal
);
6940 // Insert 3 basic blocks for the loop.
6941 MachineBasicBlock
*StartMBB
= MBB
;
6942 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
6943 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
6944 MachineBasicBlock
*UseAltMBB
= emitBlockAfter(LoopMBB
);
6945 MachineBasicBlock
*UpdateMBB
= emitBlockAfter(UseAltMBB
);
6949 // %OrigVal = L Disp(%Base)
6950 // # fall through to LoopMMB
6952 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigVal
).add(Base
).addImm(Disp
).addReg(0);
6953 MBB
->addSuccessor(LoopMBB
);
6956 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
6957 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
6958 // CompareOpcode %RotatedOldVal, %Src2
6959 // BRC KeepOldMask, UpdateMBB
6961 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
6962 .addReg(OrigVal
).addMBB(StartMBB
)
6963 .addReg(Dest
).addMBB(UpdateMBB
);
6965 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), RotatedOldVal
)
6966 .addReg(OldVal
).addReg(BitShift
).addImm(0);
6967 BuildMI(MBB
, DL
, TII
->get(CompareOpcode
))
6968 .addReg(RotatedOldVal
).addReg(Src2
);
6969 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
6970 .addImm(SystemZ::CCMASK_ICMP
).addImm(KeepOldMask
).addMBB(UpdateMBB
);
6971 MBB
->addSuccessor(UpdateMBB
);
6972 MBB
->addSuccessor(UseAltMBB
);
6975 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
6976 // # fall through to UpdateMMB
6979 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RotatedAltVal
)
6980 .addReg(RotatedOldVal
).addReg(Src2
)
6981 .addImm(32).addImm(31 + BitSize
).addImm(0);
6982 MBB
->addSuccessor(UpdateMBB
);
6985 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
6986 // [ %RotatedAltVal, UseAltMBB ]
6987 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
6988 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
6990 // # fall through to DoneMMB
6992 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), RotatedNewVal
)
6993 .addReg(RotatedOldVal
).addMBB(LoopMBB
)
6994 .addReg(RotatedAltVal
).addMBB(UseAltMBB
);
6996 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), NewVal
)
6997 .addReg(RotatedNewVal
).addReg(NegBitShift
).addImm(0);
6998 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), Dest
)
7003 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7004 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
7005 MBB
->addSuccessor(LoopMBB
);
7006 MBB
->addSuccessor(DoneMBB
);
7008 MI
.eraseFromParent();
7012 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
7015 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr
&MI
,
7016 MachineBasicBlock
*MBB
) const {
7018 MachineFunction
&MF
= *MBB
->getParent();
7019 const SystemZInstrInfo
*TII
=
7020 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7021 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7023 // Extract the operands. Base can be a register or a frame index.
7024 Register Dest
= MI
.getOperand(0).getReg();
7025 MachineOperand Base
= earlyUseOperand(MI
.getOperand(1));
7026 int64_t Disp
= MI
.getOperand(2).getImm();
7027 Register OrigCmpVal
= MI
.getOperand(3).getReg();
7028 Register OrigSwapVal
= MI
.getOperand(4).getReg();
7029 Register BitShift
= MI
.getOperand(5).getReg();
7030 Register NegBitShift
= MI
.getOperand(6).getReg();
7031 int64_t BitSize
= MI
.getOperand(7).getImm();
7032 DebugLoc DL
= MI
.getDebugLoc();
7034 const TargetRegisterClass
*RC
= &SystemZ::GR32BitRegClass
;
7036 // Get the right opcodes for the displacement.
7037 unsigned LOpcode
= TII
->getOpcodeForOffset(SystemZ::L
, Disp
);
7038 unsigned CSOpcode
= TII
->getOpcodeForOffset(SystemZ::CS
, Disp
);
7039 assert(LOpcode
&& CSOpcode
&& "Displacement out of range");
7041 // Create virtual registers for temporary results.
7042 Register OrigOldVal
= MRI
.createVirtualRegister(RC
);
7043 Register OldVal
= MRI
.createVirtualRegister(RC
);
7044 Register CmpVal
= MRI
.createVirtualRegister(RC
);
7045 Register SwapVal
= MRI
.createVirtualRegister(RC
);
7046 Register StoreVal
= MRI
.createVirtualRegister(RC
);
7047 Register RetryOldVal
= MRI
.createVirtualRegister(RC
);
7048 Register RetryCmpVal
= MRI
.createVirtualRegister(RC
);
7049 Register RetrySwapVal
= MRI
.createVirtualRegister(RC
);
7051 // Insert 2 basic blocks for the loop.
7052 MachineBasicBlock
*StartMBB
= MBB
;
7053 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7054 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7055 MachineBasicBlock
*SetMBB
= emitBlockAfter(LoopMBB
);
7059 // %OrigOldVal = L Disp(%Base)
7060 // # fall through to LoopMMB
7062 BuildMI(MBB
, DL
, TII
->get(LOpcode
), OrigOldVal
)
7066 MBB
->addSuccessor(LoopMBB
);
7069 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
7070 // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
7071 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
7072 // %Dest = RLL %OldVal, BitSize(%BitShift)
7073 // ^^ The low BitSize bits contain the field
7075 // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
7076 // ^^ Replace the upper 32-BitSize bits of the
7077 // comparison value with those that we loaded,
7078 // so that we can use a full word comparison.
7079 // CR %Dest, %RetryCmpVal
7081 // # Fall through to SetMBB
7083 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), OldVal
)
7084 .addReg(OrigOldVal
).addMBB(StartMBB
)
7085 .addReg(RetryOldVal
).addMBB(SetMBB
);
7086 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), CmpVal
)
7087 .addReg(OrigCmpVal
).addMBB(StartMBB
)
7088 .addReg(RetryCmpVal
).addMBB(SetMBB
);
7089 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), SwapVal
)
7090 .addReg(OrigSwapVal
).addMBB(StartMBB
)
7091 .addReg(RetrySwapVal
).addMBB(SetMBB
);
7092 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), Dest
)
7093 .addReg(OldVal
).addReg(BitShift
).addImm(BitSize
);
7094 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RetryCmpVal
)
7095 .addReg(CmpVal
).addReg(Dest
).addImm(32).addImm(63 - BitSize
).addImm(0);
7096 BuildMI(MBB
, DL
, TII
->get(SystemZ::CR
))
7097 .addReg(Dest
).addReg(RetryCmpVal
);
7098 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7099 .addImm(SystemZ::CCMASK_ICMP
)
7100 .addImm(SystemZ::CCMASK_CMP_NE
).addMBB(DoneMBB
);
7101 MBB
->addSuccessor(DoneMBB
);
7102 MBB
->addSuccessor(SetMBB
);
7105 // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
7106 // ^^ Replace the upper 32-BitSize bits of the new
7107 // value with those that we loaded.
7108 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
7109 // ^^ Rotate the new field to its proper position.
7110 // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
7112 // # fall through to ExitMMB
7114 BuildMI(MBB
, DL
, TII
->get(SystemZ::RISBG32
), RetrySwapVal
)
7115 .addReg(SwapVal
).addReg(Dest
).addImm(32).addImm(63 - BitSize
).addImm(0);
7116 BuildMI(MBB
, DL
, TII
->get(SystemZ::RLL
), StoreVal
)
7117 .addReg(RetrySwapVal
).addReg(NegBitShift
).addImm(-BitSize
);
7118 BuildMI(MBB
, DL
, TII
->get(CSOpcode
), RetryOldVal
)
7123 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7124 .addImm(SystemZ::CCMASK_CS
).addImm(SystemZ::CCMASK_CS_NE
).addMBB(LoopMBB
);
7125 MBB
->addSuccessor(LoopMBB
);
7126 MBB
->addSuccessor(DoneMBB
);
7128 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
7129 // to the block after the loop. At this point, CC may have been defined
7130 // either by the CR in LoopMBB or by the CS in SetMBB.
7131 if (!MI
.registerDefIsDead(SystemZ::CC
))
7132 DoneMBB
->addLiveIn(SystemZ::CC
);
7134 MI
.eraseFromParent();
7138 // Emit a move from two GR64s to a GR128.
7140 SystemZTargetLowering::emitPair128(MachineInstr
&MI
,
7141 MachineBasicBlock
*MBB
) const {
7142 MachineFunction
&MF
= *MBB
->getParent();
7143 const SystemZInstrInfo
*TII
=
7144 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7145 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7146 DebugLoc DL
= MI
.getDebugLoc();
7148 Register Dest
= MI
.getOperand(0).getReg();
7149 Register Hi
= MI
.getOperand(1).getReg();
7150 Register Lo
= MI
.getOperand(2).getReg();
7151 Register Tmp1
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7152 Register Tmp2
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7154 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::IMPLICIT_DEF
), Tmp1
);
7155 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Tmp2
)
7156 .addReg(Tmp1
).addReg(Hi
).addImm(SystemZ::subreg_h64
);
7157 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Dest
)
7158 .addReg(Tmp2
).addReg(Lo
).addImm(SystemZ::subreg_l64
);
7160 MI
.eraseFromParent();
7164 // Emit an extension from a GR64 to a GR128. ClearEven is true
7165 // if the high register of the GR128 value must be cleared or false if
7166 // it's "don't care".
7167 MachineBasicBlock
*SystemZTargetLowering::emitExt128(MachineInstr
&MI
,
7168 MachineBasicBlock
*MBB
,
7169 bool ClearEven
) const {
7170 MachineFunction
&MF
= *MBB
->getParent();
7171 const SystemZInstrInfo
*TII
=
7172 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7173 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7174 DebugLoc DL
= MI
.getDebugLoc();
7176 Register Dest
= MI
.getOperand(0).getReg();
7177 Register Src
= MI
.getOperand(1).getReg();
7178 Register In128
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7180 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::IMPLICIT_DEF
), In128
);
7182 Register NewIn128
= MRI
.createVirtualRegister(&SystemZ::GR128BitRegClass
);
7183 Register Zero64
= MRI
.createVirtualRegister(&SystemZ::GR64BitRegClass
);
7185 BuildMI(*MBB
, MI
, DL
, TII
->get(SystemZ::LLILL
), Zero64
)
7187 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), NewIn128
)
7188 .addReg(In128
).addReg(Zero64
).addImm(SystemZ::subreg_h64
);
7191 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), Dest
)
7192 .addReg(In128
).addReg(Src
).addImm(SystemZ::subreg_l64
);
7194 MI
.eraseFromParent();
7198 MachineBasicBlock
*SystemZTargetLowering::emitMemMemWrapper(
7199 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
7200 MachineFunction
&MF
= *MBB
->getParent();
7201 const SystemZInstrInfo
*TII
=
7202 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7203 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7204 DebugLoc DL
= MI
.getDebugLoc();
7206 MachineOperand DestBase
= earlyUseOperand(MI
.getOperand(0));
7207 uint64_t DestDisp
= MI
.getOperand(1).getImm();
7208 MachineOperand SrcBase
= earlyUseOperand(MI
.getOperand(2));
7209 uint64_t SrcDisp
= MI
.getOperand(3).getImm();
7210 uint64_t Length
= MI
.getOperand(4).getImm();
7212 // When generating more than one CLC, all but the last will need to
7213 // branch to the end when a difference is found.
7214 MachineBasicBlock
*EndMBB
= (Length
> 256 && Opcode
== SystemZ::CLC
?
7215 splitBlockAfter(MI
, MBB
) : nullptr);
7217 // Check for the loop form, in which operand 5 is the trip count.
7218 if (MI
.getNumExplicitOperands() > 5) {
7219 bool HaveSingleBase
= DestBase
.isIdenticalTo(SrcBase
);
7221 Register StartCountReg
= MI
.getOperand(5).getReg();
7222 Register StartSrcReg
= forceReg(MI
, SrcBase
, TII
);
7223 Register StartDestReg
= (HaveSingleBase
? StartSrcReg
:
7224 forceReg(MI
, DestBase
, TII
));
7226 const TargetRegisterClass
*RC
= &SystemZ::ADDR64BitRegClass
;
7227 Register ThisSrcReg
= MRI
.createVirtualRegister(RC
);
7228 Register ThisDestReg
= (HaveSingleBase
? ThisSrcReg
:
7229 MRI
.createVirtualRegister(RC
));
7230 Register NextSrcReg
= MRI
.createVirtualRegister(RC
);
7231 Register NextDestReg
= (HaveSingleBase
? NextSrcReg
:
7232 MRI
.createVirtualRegister(RC
));
7234 RC
= &SystemZ::GR64BitRegClass
;
7235 Register ThisCountReg
= MRI
.createVirtualRegister(RC
);
7236 Register NextCountReg
= MRI
.createVirtualRegister(RC
);
7238 MachineBasicBlock
*StartMBB
= MBB
;
7239 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7240 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7241 MachineBasicBlock
*NextMBB
= (EndMBB
? emitBlockAfter(LoopMBB
) : LoopMBB
);
7244 // # fall through to LoopMMB
7245 MBB
->addSuccessor(LoopMBB
);
7248 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
7249 // [ %NextDestReg, NextMBB ]
7250 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
7251 // [ %NextSrcReg, NextMBB ]
7252 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
7253 // [ %NextCountReg, NextMBB ]
7254 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
7255 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
7258 // The prefetch is used only for MVC. The JLH is used only for CLC.
7261 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisDestReg
)
7262 .addReg(StartDestReg
).addMBB(StartMBB
)
7263 .addReg(NextDestReg
).addMBB(NextMBB
);
7264 if (!HaveSingleBase
)
7265 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisSrcReg
)
7266 .addReg(StartSrcReg
).addMBB(StartMBB
)
7267 .addReg(NextSrcReg
).addMBB(NextMBB
);
7268 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), ThisCountReg
)
7269 .addReg(StartCountReg
).addMBB(StartMBB
)
7270 .addReg(NextCountReg
).addMBB(NextMBB
);
7271 if (Opcode
== SystemZ::MVC
)
7272 BuildMI(MBB
, DL
, TII
->get(SystemZ::PFD
))
7273 .addImm(SystemZ::PFD_WRITE
)
7274 .addReg(ThisDestReg
).addImm(DestDisp
+ 768).addReg(0);
7275 BuildMI(MBB
, DL
, TII
->get(Opcode
))
7276 .addReg(ThisDestReg
).addImm(DestDisp
).addImm(256)
7277 .addReg(ThisSrcReg
).addImm(SrcDisp
);
7279 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7280 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
7282 MBB
->addSuccessor(EndMBB
);
7283 MBB
->addSuccessor(NextMBB
);
7287 // %NextDestReg = LA 256(%ThisDestReg)
7288 // %NextSrcReg = LA 256(%ThisSrcReg)
7289 // %NextCountReg = AGHI %ThisCountReg, -1
7290 // CGHI %NextCountReg, 0
7292 // # fall through to DoneMMB
7294 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
7297 BuildMI(MBB
, DL
, TII
->get(SystemZ::LA
), NextDestReg
)
7298 .addReg(ThisDestReg
).addImm(256).addReg(0);
7299 if (!HaveSingleBase
)
7300 BuildMI(MBB
, DL
, TII
->get(SystemZ::LA
), NextSrcReg
)
7301 .addReg(ThisSrcReg
).addImm(256).addReg(0);
7302 BuildMI(MBB
, DL
, TII
->get(SystemZ::AGHI
), NextCountReg
)
7303 .addReg(ThisCountReg
).addImm(-1);
7304 BuildMI(MBB
, DL
, TII
->get(SystemZ::CGHI
))
7305 .addReg(NextCountReg
).addImm(0);
7306 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7307 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
7309 MBB
->addSuccessor(LoopMBB
);
7310 MBB
->addSuccessor(DoneMBB
);
7312 DestBase
= MachineOperand::CreateReg(NextDestReg
, false);
7313 SrcBase
= MachineOperand::CreateReg(NextSrcReg
, false);
7315 if (EndMBB
&& !Length
)
7316 // If the loop handled the whole CLC range, DoneMBB will be empty with
7317 // CC live-through into EndMBB, so add it as live-in.
7318 DoneMBB
->addLiveIn(SystemZ::CC
);
7321 // Handle any remaining bytes with straight-line code.
7322 while (Length
> 0) {
7323 uint64_t ThisLength
= std::min(Length
, uint64_t(256));
7324 // The previous iteration might have created out-of-range displacements.
7325 // Apply them using LAY if so.
7326 if (!isUInt
<12>(DestDisp
)) {
7327 Register Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
7328 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LAY
), Reg
)
7332 DestBase
= MachineOperand::CreateReg(Reg
, false);
7335 if (!isUInt
<12>(SrcDisp
)) {
7336 Register Reg
= MRI
.createVirtualRegister(&SystemZ::ADDR64BitRegClass
);
7337 BuildMI(*MBB
, MI
, MI
.getDebugLoc(), TII
->get(SystemZ::LAY
), Reg
)
7341 SrcBase
= MachineOperand::CreateReg(Reg
, false);
7344 BuildMI(*MBB
, MI
, DL
, TII
->get(Opcode
))
7350 .setMemRefs(MI
.memoperands());
7351 DestDisp
+= ThisLength
;
7352 SrcDisp
+= ThisLength
;
7353 Length
-= ThisLength
;
7354 // If there's another CLC to go, branch to the end if a difference
7356 if (EndMBB
&& Length
> 0) {
7357 MachineBasicBlock
*NextMBB
= splitBlockBefore(MI
, MBB
);
7358 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7359 .addImm(SystemZ::CCMASK_ICMP
).addImm(SystemZ::CCMASK_CMP_NE
)
7361 MBB
->addSuccessor(EndMBB
);
7362 MBB
->addSuccessor(NextMBB
);
7367 MBB
->addSuccessor(EndMBB
);
7369 MBB
->addLiveIn(SystemZ::CC
);
7372 MI
.eraseFromParent();
7376 // Decompose string pseudo-instruction MI into a loop that continually performs
7377 // Opcode until CC != 3.
7378 MachineBasicBlock
*SystemZTargetLowering::emitStringWrapper(
7379 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
7380 MachineFunction
&MF
= *MBB
->getParent();
7381 const SystemZInstrInfo
*TII
=
7382 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7383 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
7384 DebugLoc DL
= MI
.getDebugLoc();
7386 uint64_t End1Reg
= MI
.getOperand(0).getReg();
7387 uint64_t Start1Reg
= MI
.getOperand(1).getReg();
7388 uint64_t Start2Reg
= MI
.getOperand(2).getReg();
7389 uint64_t CharReg
= MI
.getOperand(3).getReg();
7391 const TargetRegisterClass
*RC
= &SystemZ::GR64BitRegClass
;
7392 uint64_t This1Reg
= MRI
.createVirtualRegister(RC
);
7393 uint64_t This2Reg
= MRI
.createVirtualRegister(RC
);
7394 uint64_t End2Reg
= MRI
.createVirtualRegister(RC
);
7396 MachineBasicBlock
*StartMBB
= MBB
;
7397 MachineBasicBlock
*DoneMBB
= splitBlockBefore(MI
, MBB
);
7398 MachineBasicBlock
*LoopMBB
= emitBlockAfter(StartMBB
);
7401 // # fall through to LoopMMB
7402 MBB
->addSuccessor(LoopMBB
);
7405 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
7406 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
7408 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
7410 // # fall through to DoneMMB
7412 // The load of R0L can be hoisted by post-RA LICM.
7415 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), This1Reg
)
7416 .addReg(Start1Reg
).addMBB(StartMBB
)
7417 .addReg(End1Reg
).addMBB(LoopMBB
);
7418 BuildMI(MBB
, DL
, TII
->get(SystemZ::PHI
), This2Reg
)
7419 .addReg(Start2Reg
).addMBB(StartMBB
)
7420 .addReg(End2Reg
).addMBB(LoopMBB
);
7421 BuildMI(MBB
, DL
, TII
->get(TargetOpcode::COPY
), SystemZ::R0L
).addReg(CharReg
);
7422 BuildMI(MBB
, DL
, TII
->get(Opcode
))
7423 .addReg(End1Reg
, RegState::Define
).addReg(End2Reg
, RegState::Define
)
7424 .addReg(This1Reg
).addReg(This2Reg
);
7425 BuildMI(MBB
, DL
, TII
->get(SystemZ::BRC
))
7426 .addImm(SystemZ::CCMASK_ANY
).addImm(SystemZ::CCMASK_3
).addMBB(LoopMBB
);
7427 MBB
->addSuccessor(LoopMBB
);
7428 MBB
->addSuccessor(DoneMBB
);
7430 DoneMBB
->addLiveIn(SystemZ::CC
);
7432 MI
.eraseFromParent();
7436 // Update TBEGIN instruction with final opcode and register clobbers.
7437 MachineBasicBlock
*SystemZTargetLowering::emitTransactionBegin(
7438 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
,
7439 bool NoFloat
) const {
7440 MachineFunction
&MF
= *MBB
->getParent();
7441 const TargetFrameLowering
*TFI
= Subtarget
.getFrameLowering();
7442 const SystemZInstrInfo
*TII
= Subtarget
.getInstrInfo();
7445 MI
.setDesc(TII
->get(Opcode
));
7447 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
7448 // Make sure to add the corresponding GRSM bits if they are missing.
7449 uint64_t Control
= MI
.getOperand(2).getImm();
7450 static const unsigned GPRControlBit
[16] = {
7451 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
7452 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
7454 Control
|= GPRControlBit
[15];
7456 Control
|= GPRControlBit
[11];
7457 MI
.getOperand(2).setImm(Control
);
7459 // Add GPR clobbers.
7460 for (int I
= 0; I
< 16; I
++) {
7461 if ((Control
& GPRControlBit
[I
]) == 0) {
7462 unsigned Reg
= SystemZMC::GR64Regs
[I
];
7463 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7467 // Add FPR/VR clobbers.
7468 if (!NoFloat
&& (Control
& 4) != 0) {
7469 if (Subtarget
.hasVector()) {
7470 for (int I
= 0; I
< 32; I
++) {
7471 unsigned Reg
= SystemZMC::VR128Regs
[I
];
7472 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7475 for (int I
= 0; I
< 16; I
++) {
7476 unsigned Reg
= SystemZMC::FP64Regs
[I
];
7477 MI
.addOperand(MachineOperand::CreateReg(Reg
, true, true));
7485 MachineBasicBlock
*SystemZTargetLowering::emitLoadAndTestCmp0(
7486 MachineInstr
&MI
, MachineBasicBlock
*MBB
, unsigned Opcode
) const {
7487 MachineFunction
&MF
= *MBB
->getParent();
7488 MachineRegisterInfo
*MRI
= &MF
.getRegInfo();
7489 const SystemZInstrInfo
*TII
=
7490 static_cast<const SystemZInstrInfo
*>(Subtarget
.getInstrInfo());
7491 DebugLoc DL
= MI
.getDebugLoc();
7493 Register SrcReg
= MI
.getOperand(0).getReg();
7495 // Create new virtual register of the same class as source.
7496 const TargetRegisterClass
*RC
= MRI
->getRegClass(SrcReg
);
7497 Register DstReg
= MRI
->createVirtualRegister(RC
);
7499 // Replace pseudo with a normal load-and-test that models the def as
7501 BuildMI(*MBB
, MI
, DL
, TII
->get(Opcode
), DstReg
)
7503 MI
.eraseFromParent();
7508 MachineBasicBlock
*SystemZTargetLowering::EmitInstrWithCustomInserter(
7509 MachineInstr
&MI
, MachineBasicBlock
*MBB
) const {
7510 switch (MI
.getOpcode()) {
7511 case SystemZ::Select32
:
7512 case SystemZ::Select64
:
7513 case SystemZ::SelectF32
:
7514 case SystemZ::SelectF64
:
7515 case SystemZ::SelectF128
:
7516 case SystemZ::SelectVR32
:
7517 case SystemZ::SelectVR64
:
7518 case SystemZ::SelectVR128
:
7519 return emitSelect(MI
, MBB
);
7521 case SystemZ::CondStore8Mux
:
7522 return emitCondStore(MI
, MBB
, SystemZ::STCMux
, 0, false);
7523 case SystemZ::CondStore8MuxInv
:
7524 return emitCondStore(MI
, MBB
, SystemZ::STCMux
, 0, true);
7525 case SystemZ::CondStore16Mux
:
7526 return emitCondStore(MI
, MBB
, SystemZ::STHMux
, 0, false);
7527 case SystemZ::CondStore16MuxInv
:
7528 return emitCondStore(MI
, MBB
, SystemZ::STHMux
, 0, true);
7529 case SystemZ::CondStore32Mux
:
7530 return emitCondStore(MI
, MBB
, SystemZ::STMux
, SystemZ::STOCMux
, false);
7531 case SystemZ::CondStore32MuxInv
:
7532 return emitCondStore(MI
, MBB
, SystemZ::STMux
, SystemZ::STOCMux
, true);
7533 case SystemZ::CondStore8
:
7534 return emitCondStore(MI
, MBB
, SystemZ::STC
, 0, false);
7535 case SystemZ::CondStore8Inv
:
7536 return emitCondStore(MI
, MBB
, SystemZ::STC
, 0, true);
7537 case SystemZ::CondStore16
:
7538 return emitCondStore(MI
, MBB
, SystemZ::STH
, 0, false);
7539 case SystemZ::CondStore16Inv
:
7540 return emitCondStore(MI
, MBB
, SystemZ::STH
, 0, true);
7541 case SystemZ::CondStore32
:
7542 return emitCondStore(MI
, MBB
, SystemZ::ST
, SystemZ::STOC
, false);
7543 case SystemZ::CondStore32Inv
:
7544 return emitCondStore(MI
, MBB
, SystemZ::ST
, SystemZ::STOC
, true);
7545 case SystemZ::CondStore64
:
7546 return emitCondStore(MI
, MBB
, SystemZ::STG
, SystemZ::STOCG
, false);
7547 case SystemZ::CondStore64Inv
:
7548 return emitCondStore(MI
, MBB
, SystemZ::STG
, SystemZ::STOCG
, true);
7549 case SystemZ::CondStoreF32
:
7550 return emitCondStore(MI
, MBB
, SystemZ::STE
, 0, false);
7551 case SystemZ::CondStoreF32Inv
:
7552 return emitCondStore(MI
, MBB
, SystemZ::STE
, 0, true);
7553 case SystemZ::CondStoreF64
:
7554 return emitCondStore(MI
, MBB
, SystemZ::STD
, 0, false);
7555 case SystemZ::CondStoreF64Inv
:
7556 return emitCondStore(MI
, MBB
, SystemZ::STD
, 0, true);
7558 case SystemZ::PAIR128
:
7559 return emitPair128(MI
, MBB
);
7560 case SystemZ::AEXT128
:
7561 return emitExt128(MI
, MBB
, false);
7562 case SystemZ::ZEXT128
:
7563 return emitExt128(MI
, MBB
, true);
7565 case SystemZ::ATOMIC_SWAPW
:
7566 return emitAtomicLoadBinary(MI
, MBB
, 0, 0);
7567 case SystemZ::ATOMIC_SWAP_32
:
7568 return emitAtomicLoadBinary(MI
, MBB
, 0, 32);
7569 case SystemZ::ATOMIC_SWAP_64
:
7570 return emitAtomicLoadBinary(MI
, MBB
, 0, 64);
7572 case SystemZ::ATOMIC_LOADW_AR
:
7573 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AR
, 0);
7574 case SystemZ::ATOMIC_LOADW_AFI
:
7575 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AFI
, 0);
7576 case SystemZ::ATOMIC_LOAD_AR
:
7577 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AR
, 32);
7578 case SystemZ::ATOMIC_LOAD_AHI
:
7579 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AHI
, 32);
7580 case SystemZ::ATOMIC_LOAD_AFI
:
7581 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AFI
, 32);
7582 case SystemZ::ATOMIC_LOAD_AGR
:
7583 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGR
, 64);
7584 case SystemZ::ATOMIC_LOAD_AGHI
:
7585 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGHI
, 64);
7586 case SystemZ::ATOMIC_LOAD_AGFI
:
7587 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::AGFI
, 64);
7589 case SystemZ::ATOMIC_LOADW_SR
:
7590 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SR
, 0);
7591 case SystemZ::ATOMIC_LOAD_SR
:
7592 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SR
, 32);
7593 case SystemZ::ATOMIC_LOAD_SGR
:
7594 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::SGR
, 64);
7596 case SystemZ::ATOMIC_LOADW_NR
:
7597 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 0);
7598 case SystemZ::ATOMIC_LOADW_NILH
:
7599 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 0);
7600 case SystemZ::ATOMIC_LOAD_NR
:
7601 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 32);
7602 case SystemZ::ATOMIC_LOAD_NILL
:
7603 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL
, 32);
7604 case SystemZ::ATOMIC_LOAD_NILH
:
7605 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 32);
7606 case SystemZ::ATOMIC_LOAD_NILF
:
7607 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF
, 32);
7608 case SystemZ::ATOMIC_LOAD_NGR
:
7609 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NGR
, 64);
7610 case SystemZ::ATOMIC_LOAD_NILL64
:
7611 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL64
, 64);
7612 case SystemZ::ATOMIC_LOAD_NILH64
:
7613 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH64
, 64);
7614 case SystemZ::ATOMIC_LOAD_NIHL64
:
7615 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHL64
, 64);
7616 case SystemZ::ATOMIC_LOAD_NIHH64
:
7617 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHH64
, 64);
7618 case SystemZ::ATOMIC_LOAD_NILF64
:
7619 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF64
, 64);
7620 case SystemZ::ATOMIC_LOAD_NIHF64
:
7621 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHF64
, 64);
7623 case SystemZ::ATOMIC_LOADW_OR
:
7624 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OR
, 0);
7625 case SystemZ::ATOMIC_LOADW_OILH
:
7626 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH
, 0);
7627 case SystemZ::ATOMIC_LOAD_OR
:
7628 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OR
, 32);
7629 case SystemZ::ATOMIC_LOAD_OILL
:
7630 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILL
, 32);
7631 case SystemZ::ATOMIC_LOAD_OILH
:
7632 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH
, 32);
7633 case SystemZ::ATOMIC_LOAD_OILF
:
7634 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILF
, 32);
7635 case SystemZ::ATOMIC_LOAD_OGR
:
7636 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OGR
, 64);
7637 case SystemZ::ATOMIC_LOAD_OILL64
:
7638 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILL64
, 64);
7639 case SystemZ::ATOMIC_LOAD_OILH64
:
7640 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILH64
, 64);
7641 case SystemZ::ATOMIC_LOAD_OIHL64
:
7642 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHL64
, 64);
7643 case SystemZ::ATOMIC_LOAD_OIHH64
:
7644 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHH64
, 64);
7645 case SystemZ::ATOMIC_LOAD_OILF64
:
7646 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OILF64
, 64);
7647 case SystemZ::ATOMIC_LOAD_OIHF64
:
7648 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::OIHF64
, 64);
7650 case SystemZ::ATOMIC_LOADW_XR
:
7651 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XR
, 0);
7652 case SystemZ::ATOMIC_LOADW_XILF
:
7653 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF
, 0);
7654 case SystemZ::ATOMIC_LOAD_XR
:
7655 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XR
, 32);
7656 case SystemZ::ATOMIC_LOAD_XILF
:
7657 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF
, 32);
7658 case SystemZ::ATOMIC_LOAD_XGR
:
7659 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XGR
, 64);
7660 case SystemZ::ATOMIC_LOAD_XILF64
:
7661 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XILF64
, 64);
7662 case SystemZ::ATOMIC_LOAD_XIHF64
:
7663 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::XIHF64
, 64);
7665 case SystemZ::ATOMIC_LOADW_NRi
:
7666 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 0, true);
7667 case SystemZ::ATOMIC_LOADW_NILHi
:
7668 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 0, true);
7669 case SystemZ::ATOMIC_LOAD_NRi
:
7670 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NR
, 32, true);
7671 case SystemZ::ATOMIC_LOAD_NILLi
:
7672 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL
, 32, true);
7673 case SystemZ::ATOMIC_LOAD_NILHi
:
7674 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH
, 32, true);
7675 case SystemZ::ATOMIC_LOAD_NILFi
:
7676 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF
, 32, true);
7677 case SystemZ::ATOMIC_LOAD_NGRi
:
7678 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NGR
, 64, true);
7679 case SystemZ::ATOMIC_LOAD_NILL64i
:
7680 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILL64
, 64, true);
7681 case SystemZ::ATOMIC_LOAD_NILH64i
:
7682 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILH64
, 64, true);
7683 case SystemZ::ATOMIC_LOAD_NIHL64i
:
7684 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHL64
, 64, true);
7685 case SystemZ::ATOMIC_LOAD_NIHH64i
:
7686 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHH64
, 64, true);
7687 case SystemZ::ATOMIC_LOAD_NILF64i
:
7688 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NILF64
, 64, true);
7689 case SystemZ::ATOMIC_LOAD_NIHF64i
:
7690 return emitAtomicLoadBinary(MI
, MBB
, SystemZ::NIHF64
, 64, true);
7692 case SystemZ::ATOMIC_LOADW_MIN
:
7693 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7694 SystemZ::CCMASK_CMP_LE
, 0);
7695 case SystemZ::ATOMIC_LOAD_MIN_32
:
7696 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7697 SystemZ::CCMASK_CMP_LE
, 32);
7698 case SystemZ::ATOMIC_LOAD_MIN_64
:
7699 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CGR
,
7700 SystemZ::CCMASK_CMP_LE
, 64);
7702 case SystemZ::ATOMIC_LOADW_MAX
:
7703 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7704 SystemZ::CCMASK_CMP_GE
, 0);
7705 case SystemZ::ATOMIC_LOAD_MAX_32
:
7706 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CR
,
7707 SystemZ::CCMASK_CMP_GE
, 32);
7708 case SystemZ::ATOMIC_LOAD_MAX_64
:
7709 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CGR
,
7710 SystemZ::CCMASK_CMP_GE
, 64);
7712 case SystemZ::ATOMIC_LOADW_UMIN
:
7713 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7714 SystemZ::CCMASK_CMP_LE
, 0);
7715 case SystemZ::ATOMIC_LOAD_UMIN_32
:
7716 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7717 SystemZ::CCMASK_CMP_LE
, 32);
7718 case SystemZ::ATOMIC_LOAD_UMIN_64
:
7719 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLGR
,
7720 SystemZ::CCMASK_CMP_LE
, 64);
7722 case SystemZ::ATOMIC_LOADW_UMAX
:
7723 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7724 SystemZ::CCMASK_CMP_GE
, 0);
7725 case SystemZ::ATOMIC_LOAD_UMAX_32
:
7726 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLR
,
7727 SystemZ::CCMASK_CMP_GE
, 32);
7728 case SystemZ::ATOMIC_LOAD_UMAX_64
:
7729 return emitAtomicLoadMinMax(MI
, MBB
, SystemZ::CLGR
,
7730 SystemZ::CCMASK_CMP_GE
, 64);
7732 case SystemZ::ATOMIC_CMP_SWAPW
:
7733 return emitAtomicCmpSwapW(MI
, MBB
);
7734 case SystemZ::MVCSequence
:
7735 case SystemZ::MVCLoop
:
7736 return emitMemMemWrapper(MI
, MBB
, SystemZ::MVC
);
7737 case SystemZ::NCSequence
:
7738 case SystemZ::NCLoop
:
7739 return emitMemMemWrapper(MI
, MBB
, SystemZ::NC
);
7740 case SystemZ::OCSequence
:
7741 case SystemZ::OCLoop
:
7742 return emitMemMemWrapper(MI
, MBB
, SystemZ::OC
);
7743 case SystemZ::XCSequence
:
7744 case SystemZ::XCLoop
:
7745 return emitMemMemWrapper(MI
, MBB
, SystemZ::XC
);
7746 case SystemZ::CLCSequence
:
7747 case SystemZ::CLCLoop
:
7748 return emitMemMemWrapper(MI
, MBB
, SystemZ::CLC
);
7749 case SystemZ::CLSTLoop
:
7750 return emitStringWrapper(MI
, MBB
, SystemZ::CLST
);
7751 case SystemZ::MVSTLoop
:
7752 return emitStringWrapper(MI
, MBB
, SystemZ::MVST
);
7753 case SystemZ::SRSTLoop
:
7754 return emitStringWrapper(MI
, MBB
, SystemZ::SRST
);
7755 case SystemZ::TBEGIN
:
7756 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGIN
, false);
7757 case SystemZ::TBEGIN_nofloat
:
7758 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGIN
, true);
7759 case SystemZ::TBEGINC
:
7760 return emitTransactionBegin(MI
, MBB
, SystemZ::TBEGINC
, true);
7761 case SystemZ::LTEBRCompare_VecPseudo
:
7762 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTEBR
);
7763 case SystemZ::LTDBRCompare_VecPseudo
:
7764 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTDBR
);
7765 case SystemZ::LTXBRCompare_VecPseudo
:
7766 return emitLoadAndTestCmp0(MI
, MBB
, SystemZ::LTXBR
);
7768 case TargetOpcode::STACKMAP
:
7769 case TargetOpcode::PATCHPOINT
:
7770 return emitPatchPoint(MI
, MBB
);
7773 llvm_unreachable("Unexpected instr type to insert");
7777 // This is only used by the isel schedulers, and is needed only to prevent
7778 // compiler from crashing when list-ilp is used.
7779 const TargetRegisterClass
*
7780 SystemZTargetLowering::getRepRegClassFor(MVT VT
) const {
7781 if (VT
== MVT::Untyped
)
7782 return &SystemZ::ADDR128BitRegClass
;
7783 return TargetLowering::getRepRegClassFor(VT
);