1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT). These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22 RegisterClass RC = rc;
23 ValueType EltVT = eltvt;
24 int NumElts = numelts;
26 // Corresponding mask register class.
27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29 // Corresponding mask register pair class.
30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31 !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33 // Corresponding write-mask register class.
34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39 // Suffix used in the instruction mnemonic.
40 string Suffix = suffix;
42 // VTName is a string name for vector VT. For vector types it will be
43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44 // It is a little bit complex for scalar types, where NumElts = 1.
45 // In this case we build v4f32 or v2f64
46 string VTName = "v" # !if (!eq (NumElts, 1),
47 !if (!eq (EltVT.Size, 16), 8,
48 !if (!eq (EltVT.Size, 32), 4,
49 !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
52 ValueType VT = !cast<ValueType>(VTName);
54 string EltTypeName = !cast<string>(EltVT);
55 // Size of the element type in bits, e.g. 32 for v16i32.
56 string EltSizeName = !subst("i", "", !subst("f", "", !subst("b", "", EltTypeName)));
57 int EltSize = EltVT.Size;
59 // "i" for integer types and "f" for floating-point types
60 string TypeVariantName = !subst("b", "", !subst(EltSizeName, "", EltTypeName));
62 // Size of RC in bits, e.g. 512 for VR512.
65 // The corresponding memory operand, e.g. i512mem for VR512.
66 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
67 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(!subst("b", "", EltTypeName) # "mem");
68 // FP scalar memory operand for intrinsics - ssmem/sdmem.
69 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
70 !if (!eq (EltTypeName, "bf16"), !cast<Operand>("shmem"),
71 !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
72 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))));
75 PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
77 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
79 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # !subst("b", "", EltTypeName));
80 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
82 PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
83 !if (!eq (EltTypeName, "bf16"), !cast<PatFrags>("sse_load_f16"),
84 !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
85 !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))));
87 // The string to specify embedded broadcast in assembly.
88 string BroadcastStr = "{1to" # NumElts # "}";
90 // 8-bit compressed displacement tuple/subvector format. This is only
91 // defined for NumElts <= 8.
92 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
93 !cast<CD8VForm>("CD8VT" # NumElts), ?);
95 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
96 !if (!eq (Size, 256), sub_ymm, ?));
98 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
99 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
100 !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
101 !if (!eq (EltTypeName, "bf16"), SSEPackedSingle, // FIXME?
104 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
105 !if (!eq (EltTypeName, "f16"), FR16X,
106 !if (!eq (EltTypeName, "bf16"), FR16X,
109 dag ImmAllZerosV = (VT immAllZerosV);
111 string ZSuffix = !if (!eq (Size, 128), "Z128",
112 !if (!eq (Size, 256), "Z256", "Z"));
115 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
116 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
117 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
118 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
119 def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
120 def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf">;
121 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
122 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
124 // "x" in v32i8x_info means RC = VR256X
125 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
126 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
127 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
128 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
129 def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
130 def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf">;
131 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
132 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
134 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
135 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
136 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
137 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
138 def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">;
139 def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbf">;
140 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
141 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
143 // We map scalar types to the smallest (128-bit) vector type
144 // with the appropriate element type. This allows to use the same masking logic.
145 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
146 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
147 def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">;
148 def bf16x_info : X86VectorVTInfo<1, bf16, VR128X, "sbf">;
149 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
150 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
152 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
153 X86VectorVTInfo i128> {
154 X86VectorVTInfo info512 = i512;
155 X86VectorVTInfo info256 = i256;
156 X86VectorVTInfo info128 = i128;
159 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
161 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
163 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
165 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
167 def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
169 def avx512vl_bf16_info : AVX512VLVectorVTInfo<v32bf16_info, v16bf16x_info,
171 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
173 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
176 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
178 RegisterClass KRC = _krc;
179 RegisterClass KRCWM = _krcwm;
183 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
184 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
185 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
186 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
187 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
188 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
189 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
191 // Used for matching masked operations. Ensures the operation part only has a
193 def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
194 (vselect node:$mask, node:$src1, node:$src2), [{
195 return isProfitableToFormMaskedOp(N);
198 def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
199 (X86selects node:$mask, node:$src1, node:$src2), [{
200 return isProfitableToFormMaskedOp(N);
203 // This multiclass generates the masking variants from the non-masking
204 // variant. It only provides the assembly pieces for the masking variants.
205 // It assumes custom ISel patterns for masking which can be provided as
206 // template arguments.
207 multiclass AVX512_maskable_custom<bits<8> O, Format F,
209 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
211 string AttSrcAsm, string IntelSrcAsm,
213 list<dag> MaskingPattern,
214 list<dag> ZeroMaskingPattern,
215 string MaskingConstraint = "",
216 bit IsCommutable = 0,
217 bit IsKCommutable = 0,
218 bit IsKZCommutable = IsCommutable,
219 string ClobberConstraint = ""> {
220 let isCommutable = IsCommutable, Constraints = ClobberConstraint in
221 def NAME: AVX512<O, F, Outs, Ins,
222 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
223 "$dst, "#IntelSrcAsm#"}",
226 // Prefer over VMOV*rrk Pat<>
227 let isCommutable = IsKCommutable in
228 def NAME#k: AVX512<O, F, Outs, MaskingIns,
229 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
230 "$dst {${mask}}, "#IntelSrcAsm#"}",
233 // In case of the 3src subclass this is overridden with a let.
234 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
235 !if(!eq(MaskingConstraint, ""), ClobberConstraint,
236 !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
239 // Zero mask does not add any restrictions to commute operands transformation.
240 // So, it is Ok to use IsCommutable instead of IsKCommutable.
241 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
242 Constraints = ClobberConstraint in
243 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
244 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
245 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
251 // Common base class of AVX512_maskable and AVX512_maskable_3src.
252 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
254 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
256 string AttSrcAsm, string IntelSrcAsm,
257 dag RHS, dag MaskingRHS,
258 SDPatternOperator Select = vselect_mask,
259 string MaskingConstraint = "",
260 bit IsCommutable = 0,
261 bit IsKCommutable = 0,
262 bit IsKZCommutable = IsCommutable,
263 string ClobberConstraint = ""> :
264 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
265 AttSrcAsm, IntelSrcAsm,
266 [(set _.RC:$dst, RHS)],
267 [(set _.RC:$dst, MaskingRHS)],
269 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
270 MaskingConstraint, IsCommutable,
271 IsKCommutable, IsKZCommutable, ClobberConstraint>;
273 // This multiclass generates the unconditional/non-masking, the masking and
274 // the zero-masking variant of the vector instruction. In the masking case, the
275 // preserved vector elements come from a new dummy input operand tied to $dst.
276 // This version uses a separate dag for non-masking and masking.
277 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
278 dag Outs, dag Ins, string OpcodeStr,
279 string AttSrcAsm, string IntelSrcAsm,
280 dag RHS, dag MaskRHS,
281 string ClobberConstraint = "",
282 bit IsCommutable = 0, bit IsKCommutable = 0,
283 bit IsKZCommutable = IsCommutable> :
284 AVX512_maskable_custom<O, F, Outs, Ins,
285 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
286 !con((ins _.KRCWM:$mask), Ins),
287 OpcodeStr, AttSrcAsm, IntelSrcAsm,
288 [(set _.RC:$dst, RHS)],
290 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
292 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
293 "$src0 = $dst", IsCommutable, IsKCommutable,
294 IsKZCommutable, ClobberConstraint>;
296 // This multiclass generates the unconditional/non-masking, the masking and
297 // the zero-masking variant of the vector instruction. In the masking case, the
298 // preserved vector elements come from a new dummy input operand tied to $dst.
299 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
300 dag Outs, dag Ins, string OpcodeStr,
301 string AttSrcAsm, string IntelSrcAsm,
303 bit IsCommutable = 0, bit IsKCommutable = 0,
304 bit IsKZCommutable = IsCommutable,
305 SDPatternOperator Select = vselect_mask,
306 string ClobberConstraint = ""> :
307 AVX512_maskable_common<O, F, _, Outs, Ins,
308 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
309 !con((ins _.KRCWM:$mask), Ins),
310 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
311 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
312 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
313 IsKZCommutable, ClobberConstraint>;
315 // This multiclass generates the unconditional/non-masking, the masking and
316 // the zero-masking variant of the scalar instruction.
317 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
318 dag Outs, dag Ins, string OpcodeStr,
319 string AttSrcAsm, string IntelSrcAsm,
321 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
322 RHS, 0, 0, 0, X86selects_mask>;
324 // Similar to AVX512_maskable but in this case one of the source operands
325 // ($src1) is already tied to $dst so we just use that for the preserved
326 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
328 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
329 dag Outs, dag NonTiedIns, string OpcodeStr,
330 string AttSrcAsm, string IntelSrcAsm,
332 bit IsCommutable = 0,
333 bit IsKCommutable = 0,
334 SDPatternOperator Select = vselect_mask,
336 AVX512_maskable_common<O, F, _, Outs,
337 !con((ins _.RC:$src1), NonTiedIns),
338 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
339 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
340 OpcodeStr, AttSrcAsm, IntelSrcAsm,
341 !if(MaskOnly, (null_frag), RHS),
342 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
343 Select, "", IsCommutable, IsKCommutable>;
345 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
346 // operand differs from the output VT. This requires a bitconvert on
347 // the preserved vector going into the vselect.
348 // NOTE: The unmasked pattern is disabled.
349 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
350 X86VectorVTInfo InVT,
351 dag Outs, dag NonTiedIns, string OpcodeStr,
352 string AttSrcAsm, string IntelSrcAsm,
353 dag RHS, bit IsCommutable = 0> :
354 AVX512_maskable_common<O, F, OutVT, Outs,
355 !con((ins InVT.RC:$src1), NonTiedIns),
356 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
357 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
358 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
359 (vselect_mask InVT.KRCWM:$mask, RHS,
360 (bitconvert InVT.RC:$src1)),
361 vselect_mask, "", IsCommutable>;
363 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
364 dag Outs, dag NonTiedIns, string OpcodeStr,
365 string AttSrcAsm, string IntelSrcAsm,
367 bit IsCommutable = 0,
368 bit IsKCommutable = 0,
370 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
371 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
372 X86selects_mask, MaskOnly>;
374 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
377 string AttSrcAsm, string IntelSrcAsm,
379 AVX512_maskable_custom<O, F, Outs, Ins,
380 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
381 !con((ins _.KRCWM:$mask), Ins),
382 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
385 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
386 dag Outs, dag NonTiedIns,
388 string AttSrcAsm, string IntelSrcAsm,
390 AVX512_maskable_custom<O, F, Outs,
391 !con((ins _.RC:$src1), NonTiedIns),
392 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
393 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
394 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
397 // Instruction with mask that puts result in mask register,
398 // like "compare" and "vptest"
399 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
401 dag Ins, dag MaskingIns,
403 string AttSrcAsm, string IntelSrcAsm,
405 list<dag> MaskingPattern,
406 bit IsCommutable = 0> {
407 let isCommutable = IsCommutable in {
408 def NAME: AVX512<O, F, Outs, Ins,
409 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
410 "$dst, "#IntelSrcAsm#"}",
413 def NAME#k: AVX512<O, F, Outs, MaskingIns,
414 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
415 "$dst {${mask}}, "#IntelSrcAsm#"}",
416 MaskingPattern>, EVEX_K;
420 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
422 dag Ins, dag MaskingIns,
424 string AttSrcAsm, string IntelSrcAsm,
425 dag RHS, dag MaskingRHS,
426 bit IsCommutable = 0> :
427 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
428 AttSrcAsm, IntelSrcAsm,
429 [(set _.KRC:$dst, RHS)],
430 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
432 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
433 dag Outs, dag Ins, string OpcodeStr,
434 string AttSrcAsm, string IntelSrcAsm,
435 dag RHS, dag RHS_su, bit IsCommutable = 0> :
436 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
437 !con((ins _.KRCWM:$mask), Ins),
438 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
439 (and _.KRCWM:$mask, RHS_su), IsCommutable>;
441 // Used by conversion instructions.
442 multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
444 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
446 string AttSrcAsm, string IntelSrcAsm,
447 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
448 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
449 AttSrcAsm, IntelSrcAsm,
450 [(set _.RC:$dst, RHS)],
451 [(set _.RC:$dst, MaskingRHS)],
452 [(set _.RC:$dst, ZeroMaskingRHS)],
455 multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
456 dag Outs, dag NonTiedIns, string OpcodeStr,
457 string AttSrcAsm, string IntelSrcAsm,
458 dag RHS, dag MaskingRHS, bit IsCommutable,
460 AVX512_maskable_custom<O, F, Outs,
461 !con((ins _.RC:$src1), NonTiedIns),
462 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
463 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
464 OpcodeStr, AttSrcAsm, IntelSrcAsm,
465 [(set _.RC:$dst, RHS)],
467 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
469 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
470 "", IsCommutable, IsKCommutable>;
472 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
473 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
474 // swizzled by ExecutionDomainFix to pxor.
475 // We set canFoldAsLoad because this can be converted to a constant-pool
476 // load of an all-zeros value if folding it would be beneficial.
477 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
478 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
479 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
480 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
481 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
482 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
485 let Predicates = [HasAVX512] in {
486 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
487 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
488 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
489 def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
490 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
491 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
494 // Alias instructions that allow VPTERNLOG to be used with a mask to create
495 // a mix of all ones and all zeros elements. This is done this way to force
496 // the same register to be used as input for all three sources.
497 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
498 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
499 (ins VK16WM:$mask), "",
500 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
501 (v16i32 immAllOnesV),
502 (v16i32 immAllZerosV)))]>;
503 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
504 (ins VK8WM:$mask), "",
505 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
507 (v8i64 immAllZerosV)))]>;
510 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
511 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
512 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
513 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
514 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
515 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
518 let Predicates = [HasAVX512] in {
519 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
520 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
521 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
522 def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
523 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
524 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
525 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
526 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
527 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
528 def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
529 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
530 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
533 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
534 // This is expanded by ExpandPostRAPseudos.
535 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
536 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
537 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
538 [(set FR16X:$dst, fp16imm0)]>;
539 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
540 [(set FR32X:$dst, fp32imm0)]>;
541 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
542 [(set FR64X:$dst, fp64imm0)]>;
543 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
544 [(set VR128X:$dst, fp128imm0)]>;
547 //===----------------------------------------------------------------------===//
548 // AVX-512 - VECTOR INSERT
551 // Supports two different pattern operators for mask and unmasked ops. Allows
552 // null_frag to be passed for one.
553 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
555 SDPatternOperator vinsert_insert,
556 SDPatternOperator vinsert_for_mask,
557 X86FoldableSchedWrite sched> {
558 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
559 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
560 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
561 "vinsert" # From.EltTypeName # "x" # From.NumElts,
562 "$src3, $src2, $src1", "$src1, $src2, $src3",
563 (vinsert_insert:$src3 (To.VT To.RC:$src1),
564 (From.VT From.RC:$src2),
566 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
567 (From.VT From.RC:$src2),
569 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
571 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
572 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
573 "vinsert" # From.EltTypeName # "x" # From.NumElts,
574 "$src3, $src2, $src1", "$src1, $src2, $src3",
575 (vinsert_insert:$src3 (To.VT To.RC:$src1),
576 (From.VT (From.LdFrag addr:$src2)),
578 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
579 (From.VT (From.LdFrag addr:$src2)),
580 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
581 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
582 Sched<[sched.Folded, sched.ReadAfterFold]>;
586 // Passes the same pattern operator for masked and unmasked ops.
587 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
589 SDPatternOperator vinsert_insert,
590 X86FoldableSchedWrite sched> :
591 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
593 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
594 X86VectorVTInfo To, PatFrag vinsert_insert,
595 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
596 let Predicates = p in {
597 def : Pat<(vinsert_insert:$ins
598 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
599 (To.VT (!cast<Instruction>(InstrStr#"rr")
600 To.RC:$src1, From.RC:$src2,
601 (INSERT_get_vinsert_imm To.RC:$ins)))>;
603 def : Pat<(vinsert_insert:$ins
605 (From.VT (From.LdFrag addr:$src2)),
607 (To.VT (!cast<Instruction>(InstrStr#"rm")
608 To.RC:$src1, addr:$src2,
609 (INSERT_get_vinsert_imm To.RC:$ins)))>;
613 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
614 ValueType EltVT64, int Opcode256,
615 X86FoldableSchedWrite sched> {
617 let Predicates = [HasVLX] in
618 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
619 X86VectorVTInfo< 4, EltVT32, VR128X>,
620 X86VectorVTInfo< 8, EltVT32, VR256X>,
621 vinsert128_insert, sched>, EVEX_V256;
623 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
624 X86VectorVTInfo< 4, EltVT32, VR128X>,
625 X86VectorVTInfo<16, EltVT32, VR512>,
626 vinsert128_insert, sched>, EVEX_V512;
628 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
629 X86VectorVTInfo< 4, EltVT64, VR256X>,
630 X86VectorVTInfo< 8, EltVT64, VR512>,
631 vinsert256_insert, sched>, REX_W, EVEX_V512;
633 // Even with DQI we'd like to only use these instructions for masking.
634 let Predicates = [HasVLX, HasDQI] in
635 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
636 X86VectorVTInfo< 2, EltVT64, VR128X>,
637 X86VectorVTInfo< 4, EltVT64, VR256X>,
638 null_frag, vinsert128_insert, sched>,
641 // Even with DQI we'd like to only use these instructions for masking.
642 let Predicates = [HasDQI] in {
643 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
644 X86VectorVTInfo< 2, EltVT64, VR128X>,
645 X86VectorVTInfo< 8, EltVT64, VR512>,
646 null_frag, vinsert128_insert, sched>,
649 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
650 X86VectorVTInfo< 8, EltVT32, VR256X>,
651 X86VectorVTInfo<16, EltVT32, VR512>,
652 null_frag, vinsert256_insert, sched>,
657 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
658 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
659 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
661 // Codegen pattern with the alternative types,
662 // Even with AVX512DQ we'll still use these for unmasked operations.
663 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
664 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
665 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
666 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
668 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
669 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
670 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
671 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
673 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
674 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
675 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
676 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
678 // Codegen pattern with the alternative types insert VEC128 into VEC256
679 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
680 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
681 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
682 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
683 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
684 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
685 // Codegen pattern with the alternative types insert VEC128 into VEC512
686 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
687 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
688 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
689 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
690 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
691 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
692 // Codegen pattern with the alternative types insert VEC256 into VEC512
693 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
694 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
695 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
696 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
697 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
698 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
701 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
702 X86VectorVTInfo To, X86VectorVTInfo Cast,
703 PatFrag vinsert_insert,
704 SDNodeXForm INSERT_get_vinsert_imm,
706 let Predicates = p in {
708 (vselect_mask Cast.KRCWM:$mask,
710 (vinsert_insert:$ins (To.VT To.RC:$src1),
711 (From.VT From.RC:$src2),
714 (!cast<Instruction>(InstrStr#"rrk")
715 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
716 (INSERT_get_vinsert_imm To.RC:$ins))>;
718 (vselect_mask Cast.KRCWM:$mask,
720 (vinsert_insert:$ins (To.VT To.RC:$src1),
723 (From.LdFrag addr:$src2))),
726 (!cast<Instruction>(InstrStr#"rmk")
727 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
728 (INSERT_get_vinsert_imm To.RC:$ins))>;
731 (vselect_mask Cast.KRCWM:$mask,
733 (vinsert_insert:$ins (To.VT To.RC:$src1),
734 (From.VT From.RC:$src2),
737 (!cast<Instruction>(InstrStr#"rrkz")
738 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
739 (INSERT_get_vinsert_imm To.RC:$ins))>;
741 (vselect_mask Cast.KRCWM:$mask,
743 (vinsert_insert:$ins (To.VT To.RC:$src1),
744 (From.VT (From.LdFrag addr:$src2)),
747 (!cast<Instruction>(InstrStr#"rmkz")
748 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
749 (INSERT_get_vinsert_imm To.RC:$ins))>;
753 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
754 v8f32x_info, vinsert128_insert,
755 INSERT_get_vinsert128_imm, [HasVLX]>;
756 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
757 v4f64x_info, vinsert128_insert,
758 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
760 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
761 v8i32x_info, vinsert128_insert,
762 INSERT_get_vinsert128_imm, [HasVLX]>;
763 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
764 v8i32x_info, vinsert128_insert,
765 INSERT_get_vinsert128_imm, [HasVLX]>;
766 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
767 v8i32x_info, vinsert128_insert,
768 INSERT_get_vinsert128_imm, [HasVLX]>;
769 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
770 v4i64x_info, vinsert128_insert,
771 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
772 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
773 v4i64x_info, vinsert128_insert,
774 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
775 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
776 v4i64x_info, vinsert128_insert,
777 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
779 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
780 v16f32_info, vinsert128_insert,
781 INSERT_get_vinsert128_imm, [HasAVX512]>;
782 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
783 v8f64_info, vinsert128_insert,
784 INSERT_get_vinsert128_imm, [HasDQI]>;
786 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
787 v16i32_info, vinsert128_insert,
788 INSERT_get_vinsert128_imm, [HasAVX512]>;
789 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
790 v16i32_info, vinsert128_insert,
791 INSERT_get_vinsert128_imm, [HasAVX512]>;
792 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
793 v16i32_info, vinsert128_insert,
794 INSERT_get_vinsert128_imm, [HasAVX512]>;
795 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
796 v8i64_info, vinsert128_insert,
797 INSERT_get_vinsert128_imm, [HasDQI]>;
798 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
799 v8i64_info, vinsert128_insert,
800 INSERT_get_vinsert128_imm, [HasDQI]>;
801 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
802 v8i64_info, vinsert128_insert,
803 INSERT_get_vinsert128_imm, [HasDQI]>;
805 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
806 v16f32_info, vinsert256_insert,
807 INSERT_get_vinsert256_imm, [HasDQI]>;
808 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
809 v8f64_info, vinsert256_insert,
810 INSERT_get_vinsert256_imm, [HasAVX512]>;
812 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
813 v16i32_info, vinsert256_insert,
814 INSERT_get_vinsert256_imm, [HasDQI]>;
815 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
816 v16i32_info, vinsert256_insert,
817 INSERT_get_vinsert256_imm, [HasDQI]>;
818 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
819 v16i32_info, vinsert256_insert,
820 INSERT_get_vinsert256_imm, [HasDQI]>;
821 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
822 v8i64_info, vinsert256_insert,
823 INSERT_get_vinsert256_imm, [HasAVX512]>;
824 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
825 v8i64_info, vinsert256_insert,
826 INSERT_get_vinsert256_imm, [HasAVX512]>;
827 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
828 v8i64_info, vinsert256_insert,
829 INSERT_get_vinsert256_imm, [HasAVX512]>;
831 // vinsertps - insert f32 to XMM
832 let ExeDomain = SSEPackedSingle in {
833 let isCommutable = 1 in
834 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
835 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
836 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
837 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
838 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
839 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
840 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
841 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
842 [(set VR128X:$dst, (X86insertps VR128X:$src1,
843 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
845 EVEX_4V, EVEX_CD8<32, CD8VT1>,
846 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
849 //===----------------------------------------------------------------------===//
850 // AVX-512 VECTOR EXTRACT
853 // Supports two different pattern operators for mask and unmasked ops. Allows
854 // null_frag to be passed for one.
855 multiclass vextract_for_size_split<int Opcode,
856 X86VectorVTInfo From, X86VectorVTInfo To,
857 SDPatternOperator vextract_extract,
858 SDPatternOperator vextract_for_mask,
859 SchedWrite SchedRR, SchedWrite SchedMR> {
861 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
862 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
863 (ins From.RC:$src1, u8imm:$idx),
864 "vextract" # To.EltTypeName # "x" # To.NumElts,
865 "$idx, $src1", "$src1, $idx",
866 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
867 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
868 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
870 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
871 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
872 "vextract" # To.EltTypeName # "x" # To.NumElts #
873 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
874 [(store (To.VT (vextract_extract:$idx
875 (From.VT From.RC:$src1), (iPTR imm))),
879 let mayStore = 1, hasSideEffects = 0 in
880 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
881 (ins To.MemOp:$dst, To.KRCWM:$mask,
882 From.RC:$src1, u8imm:$idx),
883 "vextract" # To.EltTypeName # "x" # To.NumElts #
884 "\t{$idx, $src1, $dst {${mask}}|"
885 "$dst {${mask}}, $src1, $idx}", []>,
886 EVEX_K, EVEX, Sched<[SchedMR]>;
890 // Passes the same pattern operator for masked and unmasked ops.
891 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
893 SDPatternOperator vextract_extract,
894 SchedWrite SchedRR, SchedWrite SchedMR> :
895 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
897 // Codegen pattern for the alternative types
898 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
899 X86VectorVTInfo To, PatFrag vextract_extract,
900 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
901 let Predicates = p in {
902 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
903 (To.VT (!cast<Instruction>(InstrStr#"rr")
905 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
906 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
907 (iPTR imm))), addr:$dst),
908 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
909 (EXTRACT_get_vextract_imm To.RC:$ext))>;
913 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
914 ValueType EltVT64, int Opcode256,
915 SchedWrite SchedRR, SchedWrite SchedMR> {
916 let Predicates = [HasAVX512] in {
917 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
918 X86VectorVTInfo<16, EltVT32, VR512>,
919 X86VectorVTInfo< 4, EltVT32, VR128X>,
920 vextract128_extract, SchedRR, SchedMR>,
921 EVEX_V512, EVEX_CD8<32, CD8VT4>;
922 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
923 X86VectorVTInfo< 8, EltVT64, VR512>,
924 X86VectorVTInfo< 4, EltVT64, VR256X>,
925 vextract256_extract, SchedRR, SchedMR>,
926 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
928 let Predicates = [HasVLX] in
929 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
930 X86VectorVTInfo< 8, EltVT32, VR256X>,
931 X86VectorVTInfo< 4, EltVT32, VR128X>,
932 vextract128_extract, SchedRR, SchedMR>,
933 EVEX_V256, EVEX_CD8<32, CD8VT4>;
935 // Even with DQI we'd like to only use these instructions for masking.
936 let Predicates = [HasVLX, HasDQI] in
937 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
938 X86VectorVTInfo< 4, EltVT64, VR256X>,
939 X86VectorVTInfo< 2, EltVT64, VR128X>,
940 null_frag, vextract128_extract, SchedRR, SchedMR>,
941 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
943 // Even with DQI we'd like to only use these instructions for masking.
944 let Predicates = [HasDQI] in {
945 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
946 X86VectorVTInfo< 8, EltVT64, VR512>,
947 X86VectorVTInfo< 2, EltVT64, VR128X>,
948 null_frag, vextract128_extract, SchedRR, SchedMR>,
949 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
950 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
951 X86VectorVTInfo<16, EltVT32, VR512>,
952 X86VectorVTInfo< 8, EltVT32, VR256X>,
953 null_frag, vextract256_extract, SchedRR, SchedMR>,
954 EVEX_V512, EVEX_CD8<32, CD8VT8>;
958 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
959 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
960 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
962 // extract_subvector codegen patterns with the alternative types.
963 // Even with AVX512DQ we'll still use these for unmasked operations.
964 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
965 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
966 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
967 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
969 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
970 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
971 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
972 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
974 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
975 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
976 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
977 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
979 // Codegen pattern with the alternative types extract VEC128 from VEC256
980 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
981 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
982 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
983 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
984 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
985 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
987 // Codegen pattern with the alternative types extract VEC128 from VEC512
988 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
989 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
990 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
991 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
992 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
993 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
994 // Codegen pattern with the alternative types extract VEC256 from VEC512
995 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
996 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
997 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
998 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
999 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
1000 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
1003 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1004 // smaller extract to enable EVEX->VEX.
1005 let Predicates = [NoVLX] in {
1006 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1007 (v2i64 (VEXTRACTI128rr
1008 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1010 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1011 (v2f64 (VEXTRACTF128rr
1012 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1014 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1015 (v4i32 (VEXTRACTI128rr
1016 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1018 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1019 (v4f32 (VEXTRACTF128rr
1020 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1022 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1023 (v8i16 (VEXTRACTI128rr
1024 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1026 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1027 (v8f16 (VEXTRACTF128rr
1028 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1030 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1031 (v16i8 (VEXTRACTI128rr
1032 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1036 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1037 // smaller extract to enable EVEX->VEX.
1038 let Predicates = [HasVLX] in {
1039 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1040 (v2i64 (VEXTRACTI32x4Z256rr
1041 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1043 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1044 (v2f64 (VEXTRACTF32x4Z256rr
1045 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1047 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1048 (v4i32 (VEXTRACTI32x4Z256rr
1049 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1051 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1052 (v4f32 (VEXTRACTF32x4Z256rr
1053 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1055 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1056 (v8i16 (VEXTRACTI32x4Z256rr
1057 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1059 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1060 (v8f16 (VEXTRACTF32x4Z256rr
1061 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1063 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1064 (v16i8 (VEXTRACTI32x4Z256rr
1065 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1070 // Additional patterns for handling a bitcast between the vselect and the
1071 // extract_subvector.
1072 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1073 X86VectorVTInfo To, X86VectorVTInfo Cast,
1074 PatFrag vextract_extract,
1075 SDNodeXForm EXTRACT_get_vextract_imm,
1076 list<Predicate> p> {
1077 let Predicates = p in {
1078 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1080 (To.VT (vextract_extract:$ext
1081 (From.VT From.RC:$src), (iPTR imm)))),
1083 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1084 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1085 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1087 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1089 (To.VT (vextract_extract:$ext
1090 (From.VT From.RC:$src), (iPTR imm)))),
1091 Cast.ImmAllZerosV)),
1092 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1093 Cast.KRCWM:$mask, From.RC:$src,
1094 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1098 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1099 v4f32x_info, vextract128_extract,
1100 EXTRACT_get_vextract128_imm, [HasVLX]>;
1101 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1102 v2f64x_info, vextract128_extract,
1103 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1105 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1106 v4i32x_info, vextract128_extract,
1107 EXTRACT_get_vextract128_imm, [HasVLX]>;
1108 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1109 v4i32x_info, vextract128_extract,
1110 EXTRACT_get_vextract128_imm, [HasVLX]>;
1111 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1112 v4i32x_info, vextract128_extract,
1113 EXTRACT_get_vextract128_imm, [HasVLX]>;
1114 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1115 v2i64x_info, vextract128_extract,
1116 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1117 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1118 v2i64x_info, vextract128_extract,
1119 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1120 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1121 v2i64x_info, vextract128_extract,
1122 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1124 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1125 v4f32x_info, vextract128_extract,
1126 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1127 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1128 v2f64x_info, vextract128_extract,
1129 EXTRACT_get_vextract128_imm, [HasDQI]>;
1131 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1132 v4i32x_info, vextract128_extract,
1133 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1134 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1135 v4i32x_info, vextract128_extract,
1136 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1137 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1138 v4i32x_info, vextract128_extract,
1139 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1140 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1141 v2i64x_info, vextract128_extract,
1142 EXTRACT_get_vextract128_imm, [HasDQI]>;
1143 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1144 v2i64x_info, vextract128_extract,
1145 EXTRACT_get_vextract128_imm, [HasDQI]>;
1146 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1147 v2i64x_info, vextract128_extract,
1148 EXTRACT_get_vextract128_imm, [HasDQI]>;
1150 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1151 v8f32x_info, vextract256_extract,
1152 EXTRACT_get_vextract256_imm, [HasDQI]>;
1153 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1154 v4f64x_info, vextract256_extract,
1155 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1157 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1158 v8i32x_info, vextract256_extract,
1159 EXTRACT_get_vextract256_imm, [HasDQI]>;
1160 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1161 v8i32x_info, vextract256_extract,
1162 EXTRACT_get_vextract256_imm, [HasDQI]>;
1163 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1164 v8i32x_info, vextract256_extract,
1165 EXTRACT_get_vextract256_imm, [HasDQI]>;
1166 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1167 v4i64x_info, vextract256_extract,
1168 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1169 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1170 v4i64x_info, vextract256_extract,
1171 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1172 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1173 v4i64x_info, vextract256_extract,
1174 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1176 // vextractps - extract 32 bits from XMM
1177 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1178 (ins VR128X:$src1, u8imm:$src2),
1179 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1180 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1181 EVEX, WIG, Sched<[WriteVecExtract]>;
1183 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1184 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1185 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1186 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1188 EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1190 //===---------------------------------------------------------------------===//
1191 // AVX-512 BROADCAST
1193 // broadcast with a scalar argument.
1194 multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1195 X86VectorVTInfo SrcInfo> {
1196 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1197 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1198 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1199 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1200 (X86VBroadcast SrcInfo.FRC:$src),
1201 DestInfo.RC:$src0)),
1202 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1203 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1204 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1205 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1206 (X86VBroadcast SrcInfo.FRC:$src),
1207 DestInfo.ImmAllZerosV)),
1208 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1209 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1212 // Split version to allow mask and broadcast node to be different types. This
1213 // helps support the 32x2 broadcasts.
1214 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1215 SchedWrite SchedRR, SchedWrite SchedRM,
1216 X86VectorVTInfo MaskInfo,
1217 X86VectorVTInfo DestInfo,
1218 X86VectorVTInfo SrcInfo,
1219 bit IsConvertibleToThreeAddress,
1220 SDPatternOperator UnmaskedOp = X86VBroadcast,
1221 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1222 let hasSideEffects = 0 in
1223 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1224 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1225 [(set MaskInfo.RC:$dst,
1229 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1230 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1231 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1232 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1233 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1234 "${dst} {${mask}} {z}, $src}"),
1235 [(set MaskInfo.RC:$dst,
1236 (vselect_mask MaskInfo.KRCWM:$mask,
1240 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1241 MaskInfo.ImmAllZerosV))],
1242 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1243 let Constraints = "$src0 = $dst" in
1244 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1245 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1247 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1248 "${dst} {${mask}}, $src}"),
1249 [(set MaskInfo.RC:$dst,
1250 (vselect_mask MaskInfo.KRCWM:$mask,
1254 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1255 MaskInfo.RC:$src0))],
1256 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1258 let hasSideEffects = 0, mayLoad = 1 in
1259 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1260 (ins SrcInfo.ScalarMemOp:$src),
1261 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1262 [(set MaskInfo.RC:$dst,
1266 (UnmaskedBcastOp addr:$src)))))],
1267 DestInfo.ExeDomain>, T8PD, EVEX,
1268 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1270 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1271 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1272 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1273 "${dst} {${mask}} {z}, $src}"),
1274 [(set MaskInfo.RC:$dst,
1275 (vselect_mask MaskInfo.KRCWM:$mask,
1279 (SrcInfo.BroadcastLdFrag addr:$src)))),
1280 MaskInfo.ImmAllZerosV))],
1281 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1282 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1284 let Constraints = "$src0 = $dst",
1285 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1286 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1287 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1288 SrcInfo.ScalarMemOp:$src),
1289 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1290 "${dst} {${mask}}, $src}"),
1291 [(set MaskInfo.RC:$dst,
1292 (vselect_mask MaskInfo.KRCWM:$mask,
1296 (SrcInfo.BroadcastLdFrag addr:$src)))),
1297 MaskInfo.RC:$src0))],
1298 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1299 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1302 // Helper class to force mask and broadcast result to same type.
1303 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1304 SchedWrite SchedRR, SchedWrite SchedRM,
1305 X86VectorVTInfo DestInfo,
1306 X86VectorVTInfo SrcInfo,
1307 bit IsConvertibleToThreeAddress> :
1308 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1309 DestInfo, DestInfo, SrcInfo,
1310 IsConvertibleToThreeAddress>;
1312 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1313 AVX512VLVectorVTInfo _> {
1314 let Predicates = [HasAVX512] in {
1315 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1316 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1317 avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1321 let Predicates = [HasVLX] in {
1322 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1323 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1324 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1329 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1330 AVX512VLVectorVTInfo _> {
1331 let Predicates = [HasAVX512] in {
1332 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1333 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1334 avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1338 let Predicates = [HasVLX] in {
1339 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1340 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1341 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1343 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1344 WriteFShuffle256Ld, _.info128, _.info128, 1>,
1345 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1349 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1351 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1352 avx512vl_f64_info>, VEX_W1X;
1354 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1355 X86VectorVTInfo _, SDPatternOperator OpNode,
1356 RegisterClass SrcRC> {
1357 // Fold with a mask even if it has multiple uses since it is cheap.
1358 let ExeDomain = _.ExeDomain in
1359 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1361 "vpbroadcast"#_.Suffix, "$src", "$src",
1362 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1363 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1364 T8PD, EVEX, Sched<[SchedRR]>;
1367 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1368 X86VectorVTInfo _, SDPatternOperator OpNode,
1369 RegisterClass SrcRC, SubRegIndex Subreg> {
1370 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1371 defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1372 (outs _.RC:$dst), (ins GR32:$src),
1373 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1374 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1375 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1376 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1378 def : Pat <(_.VT (OpNode SrcRC:$src)),
1379 (!cast<Instruction>(Name#rr)
1380 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1382 // Fold with a mask even if it has multiple uses since it is cheap.
1383 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1384 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1385 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1387 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1388 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1389 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1392 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1393 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1394 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1395 let Predicates = [prd] in
1396 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1397 OpNode, SrcRC, Subreg>, EVEX_V512;
1398 let Predicates = [prd, HasVLX] in {
1399 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1400 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1401 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1402 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1406 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1407 SDPatternOperator OpNode,
1408 RegisterClass SrcRC, Predicate prd> {
1409 let Predicates = [prd] in
1410 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1412 let Predicates = [prd, HasVLX] in {
1413 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1415 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1420 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1421 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1422 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1423 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1425 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1426 X86VBroadcast, GR32, HasAVX512>;
1427 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1428 X86VBroadcast, GR64, HasAVX512>, REX_W;
1430 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1431 AVX512VLVectorVTInfo _, Predicate prd,
1432 bit IsConvertibleToThreeAddress> {
1433 let Predicates = [prd] in {
1434 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1435 WriteShuffle256Ld, _.info512, _.info128,
1436 IsConvertibleToThreeAddress>,
1439 let Predicates = [prd, HasVLX] in {
1440 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1441 WriteShuffle256Ld, _.info256, _.info128,
1442 IsConvertibleToThreeAddress>,
1444 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1445 WriteShuffleXLd, _.info128, _.info128,
1446 IsConvertibleToThreeAddress>,
1451 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1452 avx512vl_i8_info, HasBWI, 0>;
1453 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1454 avx512vl_i16_info, HasBWI, 0>;
1455 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1456 avx512vl_i32_info, HasAVX512, 1>;
1457 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1458 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1460 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1461 SDPatternOperator OpNode,
1462 X86VectorVTInfo _Dst,
1463 X86VectorVTInfo _Src> {
1464 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1465 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1466 (_Dst.VT (OpNode addr:$src))>,
1467 Sched<[SchedWriteShuffle.YMM.Folded]>,
1471 // This should be used for the AVX512DQ broadcast instructions. It disables
1472 // the unmasked patterns so that we only use the DQ instructions when masking
1474 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1475 SDPatternOperator OpNode,
1476 X86VectorVTInfo _Dst,
1477 X86VectorVTInfo _Src> {
1478 let hasSideEffects = 0, mayLoad = 1 in
1479 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1480 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1482 (_Dst.VT (OpNode addr:$src))>,
1483 Sched<[SchedWriteShuffle.YMM.Folded]>,
1486 let Predicates = [HasBWI] in {
1487 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1488 (VPBROADCASTWZrm addr:$src)>;
1490 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1491 (VPBROADCASTWZrr VR128X:$src)>;
1492 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1493 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1495 let Predicates = [HasVLX, HasBWI] in {
1496 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1497 (VPBROADCASTWZ128rm addr:$src)>;
1498 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1499 (VPBROADCASTWZ256rm addr:$src)>;
1501 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1502 (VPBROADCASTWZ128rr VR128X:$src)>;
1503 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1504 (VPBROADCASTWZ256rr VR128X:$src)>;
1506 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1507 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1508 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1509 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1512 //===----------------------------------------------------------------------===//
1513 // AVX-512 BROADCAST SUBVECTORS
1516 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1517 X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1518 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1519 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1520 X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1521 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1522 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1523 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
1524 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1525 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1526 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
1527 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1529 let Predicates = [HasAVX512] in {
1530 def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1531 (VBROADCASTF64X4rm addr:$src)>;
1532 def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1533 (VBROADCASTF64X4rm addr:$src)>;
1534 def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1535 (VBROADCASTF64X4rm addr:$src)>;
1536 def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1537 (VBROADCASTI64X4rm addr:$src)>;
1538 def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1539 (VBROADCASTI64X4rm addr:$src)>;
1540 def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1541 (VBROADCASTI64X4rm addr:$src)>;
1542 def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1543 (VBROADCASTI64X4rm addr:$src)>;
1545 def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1546 (VBROADCASTF32X4rm addr:$src)>;
1547 def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1548 (VBROADCASTF32X4rm addr:$src)>;
1549 def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1550 (VBROADCASTF32X4rm addr:$src)>;
1551 def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1552 (VBROADCASTI32X4rm addr:$src)>;
1553 def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1554 (VBROADCASTI32X4rm addr:$src)>;
1555 def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1556 (VBROADCASTI32X4rm addr:$src)>;
1557 def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1558 (VBROADCASTI32X4rm addr:$src)>;
1560 // Patterns for selects of bitcasted operations.
1561 def : Pat<(vselect_mask VK16WM:$mask,
1562 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1563 (v16f32 immAllZerosV)),
1564 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1565 def : Pat<(vselect_mask VK16WM:$mask,
1566 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1568 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1569 def : Pat<(vselect_mask VK16WM:$mask,
1570 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1571 (v16i32 immAllZerosV)),
1572 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1573 def : Pat<(vselect_mask VK16WM:$mask,
1574 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1576 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1578 def : Pat<(vselect_mask VK8WM:$mask,
1579 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1580 (v8f64 immAllZerosV)),
1581 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1582 def : Pat<(vselect_mask VK8WM:$mask,
1583 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1585 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1586 def : Pat<(vselect_mask VK8WM:$mask,
1587 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1588 (v8i64 immAllZerosV)),
1589 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1590 def : Pat<(vselect_mask VK8WM:$mask,
1591 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1593 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1596 let Predicates = [HasVLX] in {
1597 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1598 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1599 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1600 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1601 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1602 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1604 def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1605 (VBROADCASTF32X4Z256rm addr:$src)>;
1606 def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1607 (VBROADCASTF32X4Z256rm addr:$src)>;
1608 def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1609 (VBROADCASTF32X4Z256rm addr:$src)>;
1610 def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1611 (VBROADCASTI32X4Z256rm addr:$src)>;
1612 def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1613 (VBROADCASTI32X4Z256rm addr:$src)>;
1614 def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1615 (VBROADCASTI32X4Z256rm addr:$src)>;
1616 def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1617 (VBROADCASTI32X4Z256rm addr:$src)>;
1619 // Patterns for selects of bitcasted operations.
1620 def : Pat<(vselect_mask VK8WM:$mask,
1621 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1622 (v8f32 immAllZerosV)),
1623 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1624 def : Pat<(vselect_mask VK8WM:$mask,
1625 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1627 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1628 def : Pat<(vselect_mask VK8WM:$mask,
1629 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1630 (v8i32 immAllZerosV)),
1631 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1632 def : Pat<(vselect_mask VK8WM:$mask,
1633 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1635 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1638 let Predicates = [HasVLX, HasDQI] in {
1639 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1640 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
1641 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1642 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1643 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
1644 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1646 // Patterns for selects of bitcasted operations.
1647 def : Pat<(vselect_mask VK4WM:$mask,
1648 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1649 (v4f64 immAllZerosV)),
1650 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1651 def : Pat<(vselect_mask VK4WM:$mask,
1652 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1654 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1655 def : Pat<(vselect_mask VK4WM:$mask,
1656 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1657 (v4i64 immAllZerosV)),
1658 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1659 def : Pat<(vselect_mask VK4WM:$mask,
1660 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1662 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1665 let Predicates = [HasDQI] in {
1666 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1667 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
1668 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1669 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1670 X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1671 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1672 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1673 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
1674 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1675 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1676 X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1677 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1679 // Patterns for selects of bitcasted operations.
1680 def : Pat<(vselect_mask VK16WM:$mask,
1681 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1682 (v16f32 immAllZerosV)),
1683 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1684 def : Pat<(vselect_mask VK16WM:$mask,
1685 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1687 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1688 def : Pat<(vselect_mask VK16WM:$mask,
1689 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1690 (v16i32 immAllZerosV)),
1691 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1692 def : Pat<(vselect_mask VK16WM:$mask,
1693 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1695 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1697 def : Pat<(vselect_mask VK8WM:$mask,
1698 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1699 (v8f64 immAllZerosV)),
1700 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1701 def : Pat<(vselect_mask VK8WM:$mask,
1702 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1704 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1705 def : Pat<(vselect_mask VK8WM:$mask,
1706 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1707 (v8i64 immAllZerosV)),
1708 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1709 def : Pat<(vselect_mask VK8WM:$mask,
1710 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1712 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1715 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1716 AVX512VLVectorVTInfo _Dst,
1717 AVX512VLVectorVTInfo _Src> {
1718 let Predicates = [HasDQI] in
1719 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1720 WriteShuffle256Ld, _Dst.info512,
1721 _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1723 let Predicates = [HasDQI, HasVLX] in
1724 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1725 WriteShuffle256Ld, _Dst.info256,
1726 _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1730 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1731 AVX512VLVectorVTInfo _Dst,
1732 AVX512VLVectorVTInfo _Src> :
1733 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1735 let Predicates = [HasDQI, HasVLX] in
1736 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1737 WriteShuffleXLd, _Dst.info128,
1738 _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1742 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1743 avx512vl_i32_info, avx512vl_i64_info>;
1744 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1745 avx512vl_f32_info, avx512vl_f64_info>;
1747 //===----------------------------------------------------------------------===//
1748 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1750 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1751 X86VectorVTInfo _, RegisterClass KRC> {
1752 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1753 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1754 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1755 EVEX, Sched<[WriteShuffle]>;
1758 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1759 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1760 let Predicates = [HasCDI] in
1761 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1762 let Predicates = [HasCDI, HasVLX] in {
1763 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1764 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1768 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1769 avx512vl_i32_info, VK16>;
1770 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1771 avx512vl_i64_info, VK8>, REX_W;
1773 //===----------------------------------------------------------------------===//
1774 // -- VPERMI2 - 3 source operands form --
1775 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1776 X86FoldableSchedWrite sched,
1777 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1778 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1779 hasSideEffects = 0 in {
1780 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1781 (ins _.RC:$src2, _.RC:$src3),
1782 OpcodeStr, "$src3, $src2", "$src2, $src3",
1783 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1784 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1787 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1788 (ins _.RC:$src2, _.MemOp:$src3),
1789 OpcodeStr, "$src3, $src2", "$src2, $src3",
1790 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1791 (_.VT (_.LdFrag addr:$src3)))), 1>,
1792 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1796 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1797 X86FoldableSchedWrite sched,
1798 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1799 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1800 hasSideEffects = 0, mayLoad = 1 in
1801 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1802 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1803 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1804 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1805 (_.VT (X86VPermt2 _.RC:$src2,
1806 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1807 AVX5128IBase, EVEX_4V, EVEX_B,
1808 Sched<[sched.Folded, sched.ReadAfterFold]>;
1811 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1812 X86FoldableSchedWrite sched,
1813 AVX512VLVectorVTInfo VTInfo,
1814 AVX512VLVectorVTInfo ShuffleMask> {
1815 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1816 ShuffleMask.info512>,
1817 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1818 ShuffleMask.info512>, EVEX_V512;
1819 let Predicates = [HasVLX] in {
1820 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1821 ShuffleMask.info128>,
1822 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1823 ShuffleMask.info128>, EVEX_V128;
1824 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1825 ShuffleMask.info256>,
1826 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1827 ShuffleMask.info256>, EVEX_V256;
1831 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1832 X86FoldableSchedWrite sched,
1833 AVX512VLVectorVTInfo VTInfo,
1834 AVX512VLVectorVTInfo Idx,
1836 let Predicates = [Prd] in
1837 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1838 Idx.info512>, EVEX_V512;
1839 let Predicates = [Prd, HasVLX] in {
1840 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1841 Idx.info128>, EVEX_V128;
1842 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1843 Idx.info256>, EVEX_V256;
1847 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1848 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1849 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1850 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1851 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1852 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1853 REX_W, EVEX_CD8<16, CD8VF>;
1854 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1855 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1857 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1858 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1859 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1860 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1862 // Extra patterns to deal with extra bitcasts due to passthru and index being
1863 // different types on the fp versions.
1864 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1865 X86VectorVTInfo IdxVT,
1866 X86VectorVTInfo CastVT> {
1867 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1868 (X86VPermt2 (_.VT _.RC:$src2),
1869 (IdxVT.VT (bitconvert
1870 (CastVT.VT _.RC:$src1))),
1872 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1873 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1874 _.RC:$src2, _.RC:$src3)>;
1875 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1876 (X86VPermt2 _.RC:$src2,
1877 (IdxVT.VT (bitconvert
1878 (CastVT.VT _.RC:$src1))),
1879 (_.LdFrag addr:$src3)),
1880 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1881 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1882 _.RC:$src2, addr:$src3)>;
1883 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1884 (X86VPermt2 _.RC:$src2,
1885 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1886 (_.BroadcastLdFrag addr:$src3)),
1887 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1888 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1889 _.RC:$src2, addr:$src3)>;
1892 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1893 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1894 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1895 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1898 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1899 X86FoldableSchedWrite sched,
1900 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1901 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1902 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1903 (ins IdxVT.RC:$src2, _.RC:$src3),
1904 OpcodeStr, "$src3, $src2", "$src2, $src3",
1905 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1906 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1908 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1909 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1910 OpcodeStr, "$src3, $src2", "$src2, $src3",
1911 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1912 (_.LdFrag addr:$src3))), 1>,
1913 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1916 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1917 X86FoldableSchedWrite sched,
1918 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1919 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1920 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1921 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1922 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1923 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1924 (_.VT (X86VPermt2 _.RC:$src1,
1925 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1926 AVX5128IBase, EVEX_4V, EVEX_B,
1927 Sched<[sched.Folded, sched.ReadAfterFold]>;
1930 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1931 X86FoldableSchedWrite sched,
1932 AVX512VLVectorVTInfo VTInfo,
1933 AVX512VLVectorVTInfo ShuffleMask> {
1934 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1935 ShuffleMask.info512>,
1936 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1937 ShuffleMask.info512>, EVEX_V512;
1938 let Predicates = [HasVLX] in {
1939 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1940 ShuffleMask.info128>,
1941 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1942 ShuffleMask.info128>, EVEX_V128;
1943 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1944 ShuffleMask.info256>,
1945 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1946 ShuffleMask.info256>, EVEX_V256;
1950 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1951 X86FoldableSchedWrite sched,
1952 AVX512VLVectorVTInfo VTInfo,
1953 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1954 let Predicates = [Prd] in
1955 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1956 Idx.info512>, EVEX_V512;
1957 let Predicates = [Prd, HasVLX] in {
1958 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1959 Idx.info128>, EVEX_V128;
1960 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1961 Idx.info256>, EVEX_V256;
1965 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1966 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1967 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1968 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1969 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1970 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1971 REX_W, EVEX_CD8<16, CD8VF>;
1972 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1973 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1975 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1976 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1977 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1978 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1980 //===----------------------------------------------------------------------===//
1981 // AVX-512 - BLEND using mask
1984 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1985 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1986 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1987 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1988 (ins _.RC:$src1, _.RC:$src2),
1989 !strconcat(OpcodeStr,
1990 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1991 EVEX_4V, Sched<[sched]>;
1992 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1993 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1994 !strconcat(OpcodeStr,
1995 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1996 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1997 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1998 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1999 !strconcat(OpcodeStr,
2000 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2001 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>;
2002 let mayLoad = 1 in {
2003 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2004 (ins _.RC:$src1, _.MemOp:$src2),
2005 !strconcat(OpcodeStr,
2006 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
2007 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
2008 Sched<[sched.Folded, sched.ReadAfterFold]>;
2009 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2010 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2011 !strconcat(OpcodeStr,
2012 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
2013 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
2014 Sched<[sched.Folded, sched.ReadAfterFold]>;
2015 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2016 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2017 !strconcat(OpcodeStr,
2018 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2019 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
2020 Sched<[sched.Folded, sched.ReadAfterFold]>;
2024 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
2025 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2026 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
2027 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2028 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2029 !strconcat(OpcodeStr,
2030 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2031 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2032 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2033 Sched<[sched.Folded, sched.ReadAfterFold]>;
2035 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2036 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2037 !strconcat(OpcodeStr,
2038 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2039 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2040 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2041 Sched<[sched.Folded, sched.ReadAfterFold]>;
2043 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2044 (ins _.RC:$src1, _.ScalarMemOp:$src2),
2045 !strconcat(OpcodeStr,
2046 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2047 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2048 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2049 Sched<[sched.Folded, sched.ReadAfterFold]>;
2053 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2054 AVX512VLVectorVTInfo VTInfo> {
2055 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2056 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2059 let Predicates = [HasVLX] in {
2060 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2061 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2063 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2064 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2069 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2070 AVX512VLVectorVTInfo VTInfo> {
2071 let Predicates = [HasBWI] in
2072 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2075 let Predicates = [HasBWI, HasVLX] in {
2076 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2078 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2083 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2085 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2086 avx512vl_f64_info>, REX_W;
2087 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2089 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2090 avx512vl_i64_info>, REX_W;
2091 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2093 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2094 avx512vl_i16_info>, REX_W;
2096 //===----------------------------------------------------------------------===//
2097 // Compare Instructions
2098 //===----------------------------------------------------------------------===//
2100 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2102 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2103 PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2104 X86FoldableSchedWrite sched> {
2105 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2107 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2109 "$cc, $src2, $src1", "$src1, $src2, $cc",
2110 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2111 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2112 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2114 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2116 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2118 "$cc, $src2, $src1", "$src1, $src2, $cc",
2119 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2121 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2122 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2123 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2125 let Uses = [MXCSR] in
2126 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2128 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2130 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2131 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2133 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2135 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2137 let isCodeGenOnly = 1 in {
2138 let isCommutable = 1 in
2139 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2140 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2141 !strconcat("vcmp", _.Suffix,
2142 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2143 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2146 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2147 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2149 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2150 !strconcat("vcmp", _.Suffix,
2151 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2152 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2153 (_.ScalarLdFrag addr:$src2),
2155 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2156 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2160 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2161 (X86cmpms node:$src1, node:$src2, node:$cc), [{
2162 return N->hasOneUse();
2164 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2165 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2166 return N->hasOneUse();
2169 let Predicates = [HasAVX512] in {
2170 let ExeDomain = SSEPackedSingle in
2171 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2172 X86cmpms_su, X86cmpmsSAE_su,
2173 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2174 let ExeDomain = SSEPackedDouble in
2175 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2176 X86cmpms_su, X86cmpmsSAE_su,
2177 SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
2179 let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2180 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2181 X86cmpms_su, X86cmpmsSAE_su,
2182 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2184 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2185 X86FoldableSchedWrite sched,
2186 X86VectorVTInfo _, bit IsCommutable> {
2187 let isCommutable = IsCommutable, hasSideEffects = 0 in
2188 def rr : AVX512BI<opc, MRMSrcReg,
2189 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2190 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2191 []>, EVEX_4V, Sched<[sched]>;
2192 let mayLoad = 1, hasSideEffects = 0 in
2193 def rm : AVX512BI<opc, MRMSrcMem,
2194 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2195 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2196 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2197 let isCommutable = IsCommutable, hasSideEffects = 0 in
2198 def rrk : AVX512BI<opc, MRMSrcReg,
2199 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2200 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2201 "$dst {${mask}}, $src1, $src2}"),
2202 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2203 let mayLoad = 1, hasSideEffects = 0 in
2204 def rmk : AVX512BI<opc, MRMSrcMem,
2205 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2206 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2207 "$dst {${mask}}, $src1, $src2}"),
2208 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2211 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2212 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2214 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2215 let mayLoad = 1, hasSideEffects = 0 in {
2216 def rmb : AVX512BI<opc, MRMSrcMem,
2217 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2218 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2219 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2220 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2221 def rmbk : AVX512BI<opc, MRMSrcMem,
2222 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2223 _.ScalarMemOp:$src2),
2224 !strconcat(OpcodeStr,
2225 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2226 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2227 []>, EVEX_4V, EVEX_K, EVEX_B,
2228 Sched<[sched.Folded, sched.ReadAfterFold]>;
2232 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2233 X86SchedWriteWidths sched,
2234 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2235 bit IsCommutable = 0> {
2236 let Predicates = [prd] in
2237 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2238 VTInfo.info512, IsCommutable>, EVEX_V512;
2240 let Predicates = [prd, HasVLX] in {
2241 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2242 VTInfo.info256, IsCommutable>, EVEX_V256;
2243 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2244 VTInfo.info128, IsCommutable>, EVEX_V128;
2248 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2249 X86SchedWriteWidths sched,
2250 AVX512VLVectorVTInfo VTInfo,
2251 Predicate prd, bit IsCommutable = 0> {
2252 let Predicates = [prd] in
2253 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2254 VTInfo.info512, IsCommutable>, EVEX_V512;
2256 let Predicates = [prd, HasVLX] in {
2257 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2258 VTInfo.info256, IsCommutable>, EVEX_V256;
2259 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2260 VTInfo.info128, IsCommutable>, EVEX_V128;
2264 // This fragment treats X86cmpm as commutable to help match loads in both
2265 // operands for PCMPEQ.
2266 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2267 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2268 (setcc node:$src1, node:$src2, SETGT)>;
2270 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2271 // increase the pattern complexity the way an immediate would.
2272 let AddedComplexity = 2 in {
2273 // FIXME: Is there a better scheduler class for VPCMP?
2274 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2275 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2276 EVEX_CD8<8, CD8VF>, WIG;
2278 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2279 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2280 EVEX_CD8<16, CD8VF>, WIG;
2282 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2283 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2284 EVEX_CD8<32, CD8VF>;
2286 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2287 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2288 T8PD, REX_W, EVEX_CD8<64, CD8VF>;
2290 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2291 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2292 EVEX_CD8<8, CD8VF>, WIG;
2294 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2295 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2296 EVEX_CD8<16, CD8VF>, WIG;
2298 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2299 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2300 EVEX_CD8<32, CD8VF>;
2302 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2303 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2304 T8PD, REX_W, EVEX_CD8<64, CD8VF>;
2307 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2308 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2309 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2310 return getI8Imm(SSECC, SDLoc(N));
2313 // Swapped operand version of the above.
2314 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2315 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2316 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2317 SSECC = X86::getSwappedVPCMPImm(SSECC);
2318 return getI8Imm(SSECC, SDLoc(N));
2321 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2323 X86FoldableSchedWrite sched,
2324 X86VectorVTInfo _, string Name> {
2325 let isCommutable = 1 in
2326 def rri : AVX512AIi8<opc, MRMSrcReg,
2327 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2328 !strconcat("vpcmp", Suffix,
2329 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2330 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2333 EVEX_4V, Sched<[sched]>;
2334 def rmi : AVX512AIi8<opc, MRMSrcMem,
2335 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2336 !strconcat("vpcmp", Suffix,
2337 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2338 [(set _.KRC:$dst, (_.KVT
2341 (_.VT (_.LdFrag addr:$src2)),
2343 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2344 let isCommutable = 1 in
2345 def rrik : AVX512AIi8<opc, MRMSrcReg,
2346 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2348 !strconcat("vpcmp", Suffix,
2349 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2350 "$dst {${mask}}, $src1, $src2, $cc}"),
2351 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2352 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2355 EVEX_4V, EVEX_K, Sched<[sched]>;
2356 def rmik : AVX512AIi8<opc, MRMSrcMem,
2357 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2359 !strconcat("vpcmp", Suffix,
2360 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2361 "$dst {${mask}}, $src1, $src2, $cc}"),
2362 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2366 (_.VT (_.LdFrag addr:$src2)),
2368 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2370 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2371 (_.VT _.RC:$src1), cond)),
2372 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2373 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2375 def : Pat<(and _.KRCWM:$mask,
2376 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2377 (_.VT _.RC:$src1), cond))),
2378 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2379 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2380 (X86pcmpm_imm_commute $cc))>;
2383 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2384 PatFrag Frag_su, X86FoldableSchedWrite sched,
2385 X86VectorVTInfo _, string Name> :
2386 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2387 def rmib : AVX512AIi8<opc, MRMSrcMem,
2388 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2390 !strconcat("vpcmp", Suffix,
2391 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2392 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2393 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2395 (_.BroadcastLdFrag addr:$src2),
2397 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2398 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2399 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2400 _.ScalarMemOp:$src2, u8imm:$cc),
2401 !strconcat("vpcmp", Suffix,
2402 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2403 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2404 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2407 (_.BroadcastLdFrag addr:$src2),
2409 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2411 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2412 (_.VT _.RC:$src1), cond)),
2413 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2414 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2416 def : Pat<(and _.KRCWM:$mask,
2417 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2418 (_.VT _.RC:$src1), cond))),
2419 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2420 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2421 (X86pcmpm_imm_commute $cc))>;
2424 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2425 PatFrag Frag_su, X86SchedWriteWidths sched,
2426 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2427 let Predicates = [prd] in
2428 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2429 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2431 let Predicates = [prd, HasVLX] in {
2432 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2433 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2434 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2435 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2439 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2440 PatFrag Frag_su, X86SchedWriteWidths sched,
2441 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2442 let Predicates = [prd] in
2443 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2444 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2446 let Predicates = [prd, HasVLX] in {
2447 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2448 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2449 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2450 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2454 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2455 (setcc node:$src1, node:$src2, node:$cc), [{
2456 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2457 return !ISD::isUnsignedIntSetCC(CC);
2460 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2461 (setcc node:$src1, node:$src2, node:$cc), [{
2462 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2463 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2466 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2467 (setcc node:$src1, node:$src2, node:$cc), [{
2468 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2469 return ISD::isUnsignedIntSetCC(CC);
2472 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2473 (setcc node:$src1, node:$src2, node:$cc), [{
2474 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2475 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2478 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2479 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2480 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2482 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2483 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2486 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2487 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2488 REX_W, EVEX_CD8<16, CD8VF>;
2489 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2490 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2491 REX_W, EVEX_CD8<16, CD8VF>;
2493 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2494 SchedWriteVecALU, avx512vl_i32_info,
2495 HasAVX512>, EVEX_CD8<32, CD8VF>;
2496 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2497 SchedWriteVecALU, avx512vl_i32_info,
2498 HasAVX512>, EVEX_CD8<32, CD8VF>;
2500 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2501 SchedWriteVecALU, avx512vl_i64_info,
2502 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2503 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2504 SchedWriteVecALU, avx512vl_i64_info,
2505 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2507 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2508 (X86cmpm node:$src1, node:$src2, node:$cc), [{
2509 return N->hasOneUse();
2512 def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2513 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2514 return getI8Imm(Imm, SDLoc(N));
2517 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2519 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2520 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2521 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2523 "$cc, $src2, $src1", "$src1, $src2, $cc",
2524 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2525 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2528 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2529 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2531 "$cc, $src2, $src1", "$src1, $src2, $cc",
2532 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2534 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2536 Sched<[sched.Folded, sched.ReadAfterFold]>;
2538 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2540 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2542 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2543 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2544 (X86any_cmpm (_.VT _.RC:$src1),
2545 (_.VT (_.BroadcastLdFrag addr:$src2)),
2547 (X86cmpm_su (_.VT _.RC:$src1),
2548 (_.VT (_.BroadcastLdFrag addr:$src2)),
2550 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2553 // Patterns for selecting with loads in other operand.
2554 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2556 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2557 (X86cmpm_imm_commute timm:$cc))>;
2559 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2562 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2563 _.RC:$src1, addr:$src2,
2564 (X86cmpm_imm_commute timm:$cc))>;
2566 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2567 (_.VT _.RC:$src1), timm:$cc),
2568 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2569 (X86cmpm_imm_commute timm:$cc))>;
2571 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2574 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2575 _.RC:$src1, addr:$src2,
2576 (X86cmpm_imm_commute timm:$cc))>;
2578 // Patterns for mask intrinsics.
2579 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2580 (_.KVT immAllOnesV)),
2581 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2583 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2584 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2585 _.RC:$src2, timm:$cc)>;
2587 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2588 (_.KVT immAllOnesV)),
2589 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2591 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2593 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2594 addr:$src2, timm:$cc)>;
2596 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2597 (_.KVT immAllOnesV)),
2598 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2600 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2602 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2603 addr:$src2, timm:$cc)>;
2605 // Patterns for mask intrinsics with loads in other operand.
2606 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2607 (_.KVT immAllOnesV)),
2608 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2609 (X86cmpm_imm_commute timm:$cc))>;
2611 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2613 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2614 _.RC:$src1, addr:$src2,
2615 (X86cmpm_imm_commute timm:$cc))>;
2617 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2618 (_.KVT immAllOnesV)),
2619 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2620 (X86cmpm_imm_commute timm:$cc))>;
2622 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2624 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2625 _.RC:$src1, addr:$src2,
2626 (X86cmpm_imm_commute timm:$cc))>;
2629 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2630 // comparison code form (VCMP[EQ/LT/LE/...]
2631 let Uses = [MXCSR] in
2632 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2633 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2634 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2636 "$cc, {sae}, $src2, $src1",
2637 "$src1, $src2, {sae}, $cc",
2638 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2639 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2640 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2641 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2642 EVEX_B, Sched<[sched]>;
2645 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2646 Predicate Pred = HasAVX512> {
2647 let Predicates = [Pred] in {
2648 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2649 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2652 let Predicates = [Pred,HasVLX] in {
2653 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2654 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2658 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2659 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
2660 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2661 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2662 defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2663 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
2665 // Patterns to select fp compares with load as first operand.
2666 let Predicates = [HasAVX512] in {
2667 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2668 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2670 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2671 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2674 let Predicates = [HasFP16] in {
2675 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2676 (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2679 // ----------------------------------------------------------------
2682 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2683 (X86Vfpclasss node:$src1, node:$src2), [{
2684 return N->hasOneUse();
2687 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2688 (X86Vfpclass node:$src1, node:$src2), [{
2689 return N->hasOneUse();
2692 //handle fpclass instruction mask = op(reg_scalar,imm)
2693 // op(mem_scalar,imm)
2694 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2695 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2697 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2698 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2699 (ins _.RC:$src1, i32u8imm:$src2),
2700 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2701 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2702 (i32 timm:$src2)))]>,
2704 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2705 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2707 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2708 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2709 (X86Vfpclasss_su (_.VT _.RC:$src1),
2710 (i32 timm:$src2))))]>,
2711 EVEX_K, Sched<[sched]>;
2712 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2713 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2715 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2717 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2718 (i32 timm:$src2)))]>,
2719 Sched<[sched.Folded, sched.ReadAfterFold]>;
2720 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2721 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2723 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2724 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2725 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2726 (i32 timm:$src2))))]>,
2727 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2731 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2732 // fpclass(reg_vec, mem_vec, imm)
2733 // fpclass(reg_vec, broadcast(eltVt), imm)
2734 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2735 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2737 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2738 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2739 (ins _.RC:$src1, i32u8imm:$src2),
2740 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2741 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2742 (i32 timm:$src2)))]>,
2744 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2745 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2747 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2748 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2749 (X86Vfpclass_su (_.VT _.RC:$src1),
2750 (i32 timm:$src2))))]>,
2751 EVEX_K, Sched<[sched]>;
2752 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2753 (ins _.MemOp:$src1, i32u8imm:$src2),
2754 OpcodeStr#_.Suffix#"{"#mem#"}"#
2755 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2756 [(set _.KRC:$dst,(X86Vfpclass
2757 (_.VT (_.LdFrag addr:$src1)),
2758 (i32 timm:$src2)))]>,
2759 Sched<[sched.Folded, sched.ReadAfterFold]>;
2760 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2761 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2762 OpcodeStr#_.Suffix#"{"#mem#"}"#
2763 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2764 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2765 (_.VT (_.LdFrag addr:$src1)),
2766 (i32 timm:$src2))))]>,
2767 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2768 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2769 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2770 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2771 _.BroadcastStr#", $dst|$dst, ${src1}"
2772 #_.BroadcastStr#", $src2}",
2773 [(set _.KRC:$dst,(X86Vfpclass
2774 (_.VT (_.BroadcastLdFrag addr:$src1)),
2775 (i32 timm:$src2)))]>,
2776 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2777 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2778 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2779 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2780 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2781 _.BroadcastStr#", $src2}",
2782 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2783 (_.VT (_.BroadcastLdFrag addr:$src1)),
2784 (i32 timm:$src2))))]>,
2785 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2788 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2790 def : InstAlias<OpcodeStr#_.Suffix#mem#
2791 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2792 (!cast<Instruction>(NAME#"rr")
2793 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2794 def : InstAlias<OpcodeStr#_.Suffix#mem#
2795 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2796 (!cast<Instruction>(NAME#"rrk")
2797 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2798 def : InstAlias<OpcodeStr#_.Suffix#mem#
2799 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2800 _.BroadcastStr#", $src2}",
2801 (!cast<Instruction>(NAME#"rmb")
2802 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2803 def : InstAlias<OpcodeStr#_.Suffix#mem#
2804 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2805 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2806 (!cast<Instruction>(NAME#"rmbk")
2807 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2810 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2811 bits<8> opc, X86SchedWriteWidths sched,
2813 let Predicates = [prd] in {
2814 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2815 _.info512, "z">, EVEX_V512;
2817 let Predicates = [prd, HasVLX] in {
2818 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2819 _.info128, "x">, EVEX_V128;
2820 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2821 _.info256, "y">, EVEX_V256;
2825 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2826 bits<8> opcScalar, X86SchedWriteWidths sched> {
2827 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec,
2829 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2830 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2831 sched.Scl, f16x_info, HasFP16>,
2832 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2833 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2835 EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2836 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2838 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
2839 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2840 sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2841 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2842 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2843 sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2844 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
2847 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2849 //-----------------------------------------------------------------
2850 // Mask register copy, including
2851 // - copy between mask registers
2852 // - load/store mask registers
2853 // - copy from GPR to mask register and vice versa
2855 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2856 string OpcodeStr, RegisterClass KRC,
2857 ValueType vvt, X86MemOperand x86memop> {
2858 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2859 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2860 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2862 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2863 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2864 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2866 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2867 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2868 [(store KRC:$src, addr:$dst)]>,
2869 Sched<[WriteStore]>;
2872 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2874 RegisterClass KRC, RegisterClass GRC> {
2875 let hasSideEffects = 0 in {
2876 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2877 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2879 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2880 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2885 let Predicates = [HasDQI] in
2886 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2887 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2890 let Predicates = [HasAVX512] in
2891 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2892 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2895 let Predicates = [HasBWI] in {
2896 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2898 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2900 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2902 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2906 // GR from/to mask register
2907 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2908 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2909 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2910 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2911 def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2912 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2914 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2915 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2916 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2917 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2919 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2920 (KMOVWrk VK16:$src)>;
2921 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2922 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2923 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2924 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2925 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2926 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2928 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2929 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2930 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2931 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2932 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2933 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2934 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2935 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2937 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2938 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2939 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2940 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2941 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2942 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2943 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2944 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2947 let Predicates = [HasDQI] in {
2948 def : Pat<(v1i1 (load addr:$src)),
2949 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2950 def : Pat<(v2i1 (load addr:$src)),
2951 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2952 def : Pat<(v4i1 (load addr:$src)),
2953 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2956 let Predicates = [HasAVX512] in {
2957 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2958 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2959 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2960 (KMOVWkm addr:$src)>;
2963 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2964 SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2965 SDTCVecEltisVT<1, i1>,
2968 let Predicates = [HasAVX512] in {
2969 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2970 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2971 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2973 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2974 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2976 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2977 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2979 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2980 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2983 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2984 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2985 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2986 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2987 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2988 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2989 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2991 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2992 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2994 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2998 // Mask unary operation
3000 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
3001 RegisterClass KRC, SDPatternOperator OpNode,
3002 X86FoldableSchedWrite sched, Predicate prd> {
3003 let Predicates = [prd] in
3004 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
3005 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3006 [(set KRC:$dst, (OpNode KRC:$src))]>,
3010 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
3011 SDPatternOperator OpNode,
3012 X86FoldableSchedWrite sched> {
3013 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3014 sched, HasDQI>, VEX, PD;
3015 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3016 sched, HasAVX512>, VEX, PS;
3017 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3018 sched, HasBWI>, VEX, PD, REX_W;
3019 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3020 sched, HasBWI>, VEX, PS, REX_W;
3023 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3024 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
3026 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
3027 let Predicates = [HasAVX512, NoDQI] in
3028 def : Pat<(vnot VK8:$src),
3029 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
3031 def : Pat<(vnot VK4:$src),
3032 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
3033 def : Pat<(vnot VK2:$src),
3034 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
3035 def : Pat<(vnot VK1:$src),
3036 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
3038 // Mask binary operation
3039 // - KAND, KANDN, KOR, KXNOR, KXOR
3040 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
3041 RegisterClass KRC, SDPatternOperator OpNode,
3042 X86FoldableSchedWrite sched, Predicate prd,
3044 let Predicates = [prd], isCommutable = IsCommutable in
3045 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
3046 !strconcat(OpcodeStr,
3047 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3048 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
3052 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
3053 SDPatternOperator OpNode,
3054 X86FoldableSchedWrite sched, bit IsCommutable,
3055 Predicate prdW = HasAVX512> {
3056 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3057 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3058 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3059 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3060 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3061 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PD;
3062 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3063 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PS;
3066 // These nodes use 'vnot' instead of 'not' to support vectors.
3067 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3068 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3070 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3071 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
3072 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
3073 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
3074 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
3075 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
3076 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3078 multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3080 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3081 // for the DQI set, this type is legal and KxxxB instruction is used
3082 let Predicates = [NoDQI] in
3083 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3085 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3086 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3088 // All types smaller than 8 bits require conversion anyway
3089 def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3090 (COPY_TO_REGCLASS (Inst
3091 (COPY_TO_REGCLASS VK1:$src1, VK16),
3092 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3093 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3094 (COPY_TO_REGCLASS (Inst
3095 (COPY_TO_REGCLASS VK2:$src1, VK16),
3096 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3097 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3098 (COPY_TO_REGCLASS (Inst
3099 (COPY_TO_REGCLASS VK4:$src1, VK16),
3100 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3103 defm : avx512_binop_pat<and, KANDWrr>;
3104 defm : avx512_binop_pat<vandn, KANDNWrr>;
3105 defm : avx512_binop_pat<or, KORWrr>;
3106 defm : avx512_binop_pat<vxnor, KXNORWrr>;
3107 defm : avx512_binop_pat<xor, KXORWrr>;
3110 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3111 X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3113 let Predicates = [prd] in {
3114 let hasSideEffects = 0 in
3115 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3116 (ins Src.KRC:$src1, Src.KRC:$src2),
3117 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3118 VEX_4V, VEX_L, Sched<[sched]>;
3120 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3121 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3125 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
3126 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3127 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, REX_W;
3130 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3131 SDNode OpNode, X86FoldableSchedWrite sched,
3133 let Predicates = [prd], Defs = [EFLAGS] in
3134 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3135 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3136 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3140 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3141 X86FoldableSchedWrite sched,
3142 Predicate prdW = HasAVX512> {
3143 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3145 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3147 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3149 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3153 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3154 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3155 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3158 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3159 SDNode OpNode, X86FoldableSchedWrite sched> {
3160 let Predicates = [HasAVX512] in
3161 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3162 !strconcat(OpcodeStr,
3163 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3164 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3168 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3169 SDNode OpNode, X86FoldableSchedWrite sched> {
3170 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3171 sched>, VEX, TAPD, REX_W;
3172 let Predicates = [HasDQI] in
3173 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3175 let Predicates = [HasBWI] in {
3176 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3177 sched>, VEX, TAPD, REX_W;
3178 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3183 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3184 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3186 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3187 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3189 X86VectorVTInfo Narrow,
3190 X86VectorVTInfo Wide> {
3191 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3192 (Narrow.VT Narrow.RC:$src2), cond)),
3194 (!cast<Instruction>(InstStr#"Zrri")
3195 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3196 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3197 (X86pcmpm_imm $cc)), Narrow.KRC)>;
3199 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3200 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3201 (Narrow.VT Narrow.RC:$src2),
3203 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3204 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3205 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3206 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3207 (X86pcmpm_imm $cc)), Narrow.KRC)>;
3210 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3212 X86VectorVTInfo Narrow,
3213 X86VectorVTInfo Wide> {
3215 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3216 (Narrow.BroadcastLdFrag addr:$src2), cond)),
3218 (!cast<Instruction>(InstStr#"Zrmib")
3219 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3220 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3222 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3224 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3225 (Narrow.BroadcastLdFrag addr:$src2),
3227 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3228 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3229 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3230 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3232 // Commuted with broadcast load.
3233 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3234 (Narrow.VT Narrow.RC:$src1),
3237 (!cast<Instruction>(InstStr#"Zrmib")
3238 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3239 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3241 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3243 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3244 (Narrow.VT Narrow.RC:$src1),
3246 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3247 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3248 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3249 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3252 // Same as above, but for fp types which don't use PatFrags.
3253 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3254 X86VectorVTInfo Narrow,
3255 X86VectorVTInfo Wide> {
3256 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3257 (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3259 (!cast<Instruction>(InstStr#"Zrri")
3260 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3261 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3262 timm:$cc), Narrow.KRC)>;
3264 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3265 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3266 (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3267 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3268 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3269 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3270 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3271 timm:$cc), Narrow.KRC)>;
3274 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3275 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3277 (!cast<Instruction>(InstStr#"Zrmbi")
3278 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3279 addr:$src2, timm:$cc), Narrow.KRC)>;
3281 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3282 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3283 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3284 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3285 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3286 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3287 addr:$src2, timm:$cc), Narrow.KRC)>;
3289 // Commuted with broadcast load.
3290 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3291 (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3293 (!cast<Instruction>(InstStr#"Zrmbi")
3294 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3295 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3297 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3298 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3299 (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3300 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3301 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3302 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3303 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3306 let Predicates = [HasAVX512, NoVLX] in {
3307 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3308 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3310 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3311 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3313 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3314 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3316 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3317 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3319 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3320 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3322 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3323 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3325 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3326 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3328 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3329 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3331 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3332 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3333 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3334 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3337 let Predicates = [HasBWI, NoVLX] in {
3338 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3339 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3341 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3342 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3344 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3345 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3347 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3348 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3351 // Mask setting all 0s or 1s
3352 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3353 let Predicates = [HasAVX512] in
3354 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3355 SchedRW = [WriteZero] in
3356 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3357 [(set KRC:$dst, (VT Val))]>;
3360 multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3361 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3362 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3363 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3366 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3367 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3369 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3370 let Predicates = [HasAVX512] in {
3371 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3372 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3373 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3374 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3375 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3376 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3377 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3378 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3381 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3382 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3383 RegisterClass RC, ValueType VT> {
3384 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3385 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3387 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3388 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3390 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3391 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3392 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3393 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3394 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3395 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3397 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3398 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3399 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3400 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3401 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3403 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3404 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3405 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3406 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3408 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3409 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3410 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3412 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3413 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3415 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3417 //===----------------------------------------------------------------------===//
3418 // AVX-512 - Aligned and unaligned load and store
3421 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3422 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3423 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3424 bit NoRMPattern = 0,
3425 SDPatternOperator SelectOprr = vselect> {
3426 let hasSideEffects = 0 in {
3427 let isMoveReg = 1 in
3428 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3429 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3430 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3431 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3432 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3433 (ins _.KRCWM:$mask, _.RC:$src),
3434 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3435 "${dst} {${mask}} {z}, $src}"),
3436 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3438 _.ImmAllZerosV)))], _.ExeDomain>,
3439 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3441 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3442 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3443 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3444 !if(NoRMPattern, [],
3446 (_.VT (ld_frag addr:$src)))]),
3447 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3448 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3450 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3451 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3452 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3453 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3454 "${dst} {${mask}}, $src1}"),
3455 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3457 (_.VT _.RC:$src0))))], _.ExeDomain>,
3458 EVEX, EVEX_K, Sched<[Sched.RR]>;
3459 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3460 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3461 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3462 "${dst} {${mask}}, $src1}"),
3463 [(set _.RC:$dst, (_.VT
3464 (vselect_mask _.KRCWM:$mask,
3465 (_.VT (ld_frag addr:$src1)),
3466 (_.VT _.RC:$src0))))], _.ExeDomain>,
3467 EVEX, EVEX_K, Sched<[Sched.RM]>;
3469 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3470 (ins _.KRCWM:$mask, _.MemOp:$src),
3471 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3472 "${dst} {${mask}} {z}, $src}",
3473 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3474 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3475 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3477 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3478 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3480 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3481 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3483 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3484 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3485 _.KRCWM:$mask, addr:$ptr)>;
3488 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3489 AVX512VLVectorVTInfo _, Predicate prd,
3490 X86SchedWriteMoveLSWidths Sched,
3491 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3492 let Predicates = [prd] in
3493 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3494 _.info512.AlignedLdFrag, masked_load_aligned,
3495 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3497 let Predicates = [prd, HasVLX] in {
3498 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3499 _.info256.AlignedLdFrag, masked_load_aligned,
3500 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3501 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3502 _.info128.AlignedLdFrag, masked_load_aligned,
3503 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3507 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3508 AVX512VLVectorVTInfo _, Predicate prd,
3509 X86SchedWriteMoveLSWidths Sched,
3510 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3511 SDPatternOperator SelectOprr = vselect> {
3512 let Predicates = [prd] in
3513 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3514 masked_load, Sched.ZMM, "",
3515 NoRMPattern, SelectOprr>, EVEX_V512;
3517 let Predicates = [prd, HasVLX] in {
3518 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3519 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3520 NoRMPattern, SelectOprr>, EVEX_V256;
3521 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3522 masked_load, Sched.XMM, EVEX2VEXOvrd,
3523 NoRMPattern, SelectOprr>, EVEX_V128;
3527 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3528 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3529 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3530 bit NoMRPattern = 0> {
3531 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3532 let isMoveReg = 1 in
3533 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3534 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3535 [], _.ExeDomain>, EVEX,
3537 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3538 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3539 (ins _.KRCWM:$mask, _.RC:$src),
3540 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3541 "${dst} {${mask}}, $src}",
3542 [], _.ExeDomain>, EVEX, EVEX_K,
3544 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3545 (ins _.KRCWM:$mask, _.RC:$src),
3546 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3547 "${dst} {${mask}} {z}, $src}",
3548 [], _.ExeDomain>, EVEX, EVEX_KZ,
3552 let hasSideEffects = 0, mayStore = 1 in
3553 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3554 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3555 !if(NoMRPattern, [],
3556 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3557 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3558 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3559 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3560 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3561 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3562 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
3564 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3565 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3566 _.KRCWM:$mask, _.RC:$src)>;
3568 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3569 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3570 _.RC:$dst, _.RC:$src), 0>;
3571 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3572 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3573 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3574 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3575 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3576 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3579 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3580 AVX512VLVectorVTInfo _, Predicate prd,
3581 X86SchedWriteMoveLSWidths Sched,
3582 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3583 let Predicates = [prd] in
3584 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3585 masked_store, Sched.ZMM, "",
3586 NoMRPattern>, EVEX_V512;
3587 let Predicates = [prd, HasVLX] in {
3588 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3589 masked_store, Sched.YMM,
3590 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3591 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3592 masked_store, Sched.XMM, EVEX2VEXOvrd,
3593 NoMRPattern>, EVEX_V128;
3597 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3598 AVX512VLVectorVTInfo _, Predicate prd,
3599 X86SchedWriteMoveLSWidths Sched,
3600 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3601 let Predicates = [prd] in
3602 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3603 masked_store_aligned, Sched.ZMM, "",
3604 NoMRPattern>, EVEX_V512;
3606 let Predicates = [prd, HasVLX] in {
3607 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3608 masked_store_aligned, Sched.YMM,
3609 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3610 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3611 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3612 NoMRPattern>, EVEX_V128;
3616 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3617 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3618 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3619 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3620 PS, EVEX_CD8<32, CD8VF>;
3622 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3623 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3624 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3625 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3626 PD, REX_W, EVEX_CD8<64, CD8VF>;
3628 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3629 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3630 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3631 SchedWriteFMoveLS, "VMOVUPS">,
3632 PS, EVEX_CD8<32, CD8VF>;
3634 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3635 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3636 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3637 SchedWriteFMoveLS, "VMOVUPD">,
3638 PD, REX_W, EVEX_CD8<64, CD8VF>;
3640 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3641 HasAVX512, SchedWriteVecMoveLS,
3643 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3644 HasAVX512, SchedWriteVecMoveLS,
3646 PD, EVEX_CD8<32, CD8VF>;
3648 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3649 HasAVX512, SchedWriteVecMoveLS,
3651 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3652 HasAVX512, SchedWriteVecMoveLS,
3654 PD, REX_W, EVEX_CD8<64, CD8VF>;
3656 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3657 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3658 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3659 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3660 XD, EVEX_CD8<8, CD8VF>;
3662 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3663 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3664 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3665 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3666 XD, REX_W, EVEX_CD8<16, CD8VF>;
3668 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3669 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3670 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3671 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3672 XS, EVEX_CD8<32, CD8VF>;
3674 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3675 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3676 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3677 SchedWriteVecMoveLS, "VMOVDQU">,
3678 XS, REX_W, EVEX_CD8<64, CD8VF>;
3680 // Special instructions to help with spilling when we don't have VLX. We need
3681 // to load or store from a ZMM register instead. These are converted in
3682 // expandPostRAPseudos.
3683 let isReMaterializable = 1, canFoldAsLoad = 1,
3684 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3685 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3686 "", []>, Sched<[WriteFLoadX]>;
3687 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3688 "", []>, Sched<[WriteFLoadY]>;
3689 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3690 "", []>, Sched<[WriteFLoadX]>;
3691 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3692 "", []>, Sched<[WriteFLoadY]>;
3695 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3696 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3697 "", []>, Sched<[WriteFStoreX]>;
3698 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3699 "", []>, Sched<[WriteFStoreY]>;
3700 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3701 "", []>, Sched<[WriteFStoreX]>;
3702 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3703 "", []>, Sched<[WriteFStoreY]>;
3706 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3707 (v8i64 VR512:$src))),
3708 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3711 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3712 (v16i32 VR512:$src))),
3713 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3715 // These patterns exist to prevent the above patterns from introducing a second
3716 // mask inversion when one already exists.
3717 def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3718 (v8i64 immAllZerosV),
3719 (v8i64 VR512:$src))),
3720 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3721 def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3722 (v16i32 immAllZerosV),
3723 (v16i32 VR512:$src))),
3724 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3726 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3727 X86VectorVTInfo Wide> {
3728 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3729 Narrow.RC:$src1, Narrow.RC:$src0)),
3732 (!cast<Instruction>(InstrStr#"rrk")
3733 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3734 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3735 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3738 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3739 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3742 (!cast<Instruction>(InstrStr#"rrkz")
3743 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3744 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3748 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3749 // available. Use a 512-bit operation and extract.
3750 let Predicates = [HasAVX512, NoVLX] in {
3751 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3752 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3753 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3754 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3756 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3757 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3758 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3759 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3762 let Predicates = [HasBWI, NoVLX] in {
3763 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3764 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3766 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3767 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3769 defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3770 defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3772 defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3773 defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3776 let Predicates = [HasAVX512] in {
3778 def : Pat<(alignedloadv16i32 addr:$src),
3779 (VMOVDQA64Zrm addr:$src)>;
3780 def : Pat<(alignedloadv32i16 addr:$src),
3781 (VMOVDQA64Zrm addr:$src)>;
3782 def : Pat<(alignedloadv32f16 addr:$src),
3783 (VMOVAPSZrm addr:$src)>;
3784 def : Pat<(alignedloadv32bf16 addr:$src),
3785 (VMOVAPSZrm addr:$src)>;
3786 def : Pat<(alignedloadv64i8 addr:$src),
3787 (VMOVDQA64Zrm addr:$src)>;
3788 def : Pat<(loadv16i32 addr:$src),
3789 (VMOVDQU64Zrm addr:$src)>;
3790 def : Pat<(loadv32i16 addr:$src),
3791 (VMOVDQU64Zrm addr:$src)>;
3792 def : Pat<(loadv32f16 addr:$src),
3793 (VMOVUPSZrm addr:$src)>;
3794 def : Pat<(loadv32bf16 addr:$src),
3795 (VMOVUPSZrm addr:$src)>;
3796 def : Pat<(loadv64i8 addr:$src),
3797 (VMOVDQU64Zrm addr:$src)>;
3800 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3801 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3802 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3803 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3804 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3805 (VMOVAPSZmr addr:$dst, VR512:$src)>;
3806 def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3807 (VMOVAPSZmr addr:$dst, VR512:$src)>;
3808 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3809 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3810 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3811 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3812 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3813 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3814 def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3815 (VMOVUPSZmr addr:$dst, VR512:$src)>;
3816 def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3817 (VMOVUPSZmr addr:$dst, VR512:$src)>;
3818 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3819 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3822 let Predicates = [HasVLX] in {
3824 def : Pat<(alignedloadv4i32 addr:$src),
3825 (VMOVDQA64Z128rm addr:$src)>;
3826 def : Pat<(alignedloadv8i16 addr:$src),
3827 (VMOVDQA64Z128rm addr:$src)>;
3828 def : Pat<(alignedloadv8f16 addr:$src),
3829 (VMOVAPSZ128rm addr:$src)>;
3830 def : Pat<(alignedloadv8bf16 addr:$src),
3831 (VMOVAPSZ128rm addr:$src)>;
3832 def : Pat<(alignedloadv16i8 addr:$src),
3833 (VMOVDQA64Z128rm addr:$src)>;
3834 def : Pat<(loadv4i32 addr:$src),
3835 (VMOVDQU64Z128rm addr:$src)>;
3836 def : Pat<(loadv8i16 addr:$src),
3837 (VMOVDQU64Z128rm addr:$src)>;
3838 def : Pat<(loadv8f16 addr:$src),
3839 (VMOVUPSZ128rm addr:$src)>;
3840 def : Pat<(loadv8bf16 addr:$src),
3841 (VMOVUPSZ128rm addr:$src)>;
3842 def : Pat<(loadv16i8 addr:$src),
3843 (VMOVDQU64Z128rm addr:$src)>;
3846 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3847 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3848 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3849 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3850 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3851 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3852 def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3853 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3854 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3855 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3856 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3857 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3858 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3859 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3860 def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3861 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3862 def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3863 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3864 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3865 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3868 def : Pat<(alignedloadv8i32 addr:$src),
3869 (VMOVDQA64Z256rm addr:$src)>;
3870 def : Pat<(alignedloadv16i16 addr:$src),
3871 (VMOVDQA64Z256rm addr:$src)>;
3872 def : Pat<(alignedloadv16f16 addr:$src),
3873 (VMOVAPSZ256rm addr:$src)>;
3874 def : Pat<(alignedloadv16bf16 addr:$src),
3875 (VMOVAPSZ256rm addr:$src)>;
3876 def : Pat<(alignedloadv32i8 addr:$src),
3877 (VMOVDQA64Z256rm addr:$src)>;
3878 def : Pat<(loadv8i32 addr:$src),
3879 (VMOVDQU64Z256rm addr:$src)>;
3880 def : Pat<(loadv16i16 addr:$src),
3881 (VMOVDQU64Z256rm addr:$src)>;
3882 def : Pat<(loadv16f16 addr:$src),
3883 (VMOVUPSZ256rm addr:$src)>;
3884 def : Pat<(loadv16bf16 addr:$src),
3885 (VMOVUPSZ256rm addr:$src)>;
3886 def : Pat<(loadv32i8 addr:$src),
3887 (VMOVDQU64Z256rm addr:$src)>;
3890 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3891 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3892 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3893 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3894 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3895 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3896 def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3897 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3898 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3899 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3900 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3901 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3902 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3903 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3904 def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3905 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3906 def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3907 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3908 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3909 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3912 multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3913 let Predicates = [HasBWI] in {
3914 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3915 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3916 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3917 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3918 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3919 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3920 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3921 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3922 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3923 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3924 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3925 (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3926 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3927 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3928 (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3929 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3930 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3931 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3932 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3933 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3934 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3935 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3937 def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3938 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3940 let Predicates = [HasBWI, HasVLX] in {
3941 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3942 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3943 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3944 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3945 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3946 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3947 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3948 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3949 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3950 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3951 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3952 (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3953 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3954 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3955 (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3956 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3957 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3958 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3959 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3960 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3961 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3962 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3964 def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3965 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3967 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3968 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3969 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3970 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3971 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3972 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3973 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3974 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3975 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3976 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3977 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3978 (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3979 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3980 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3981 (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3982 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3983 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3984 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3985 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3986 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3987 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3988 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3990 def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3991 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3995 defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3996 defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
3998 // Move Int Doubleword to Packed Double Int
4000 let ExeDomain = SSEPackedInt in {
4001 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
4002 "vmovd\t{$src, $dst|$dst, $src}",
4004 (v4i32 (scalar_to_vector GR32:$src)))]>,
4005 EVEX, Sched<[WriteVecMoveFromGpr]>;
4006 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
4007 "vmovd\t{$src, $dst|$dst, $src}",
4009 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
4010 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
4011 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
4012 "vmovq\t{$src, $dst|$dst, $src}",
4014 (v2i64 (scalar_to_vector GR64:$src)))]>,
4015 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
4016 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
4017 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
4019 "vmovq\t{$src, $dst|$dst, $src}", []>,
4020 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
4021 let isCodeGenOnly = 1 in {
4022 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
4023 "vmovq\t{$src, $dst|$dst, $src}",
4024 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
4025 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
4026 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
4027 "vmovq\t{$src, $dst|$dst, $src}",
4028 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
4029 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
4031 } // ExeDomain = SSEPackedInt
4033 // Move Int Doubleword to Single Scalar
4035 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4036 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
4037 "vmovd\t{$src, $dst|$dst, $src}",
4038 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
4039 EVEX, Sched<[WriteVecMoveFromGpr]>;
4040 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4042 // Move doubleword from xmm register to r/m32
4044 let ExeDomain = SSEPackedInt in {
4045 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
4046 "vmovd\t{$src, $dst|$dst, $src}",
4047 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
4049 EVEX, Sched<[WriteVecMoveToGpr]>;
4050 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
4051 (ins i32mem:$dst, VR128X:$src),
4052 "vmovd\t{$src, $dst|$dst, $src}",
4053 [(store (i32 (extractelt (v4i32 VR128X:$src),
4054 (iPTR 0))), addr:$dst)]>,
4055 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
4056 } // ExeDomain = SSEPackedInt
4058 // Move quadword from xmm1 register to r/m64
4060 let ExeDomain = SSEPackedInt in {
4061 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
4062 "vmovq\t{$src, $dst|$dst, $src}",
4063 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
4065 PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
4066 Requires<[HasAVX512]>;
4068 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
4069 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
4070 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
4071 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
4072 Requires<[HasAVX512, In64BitMode]>;
4074 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
4075 (ins i64mem:$dst, VR128X:$src),
4076 "vmovq\t{$src, $dst|$dst, $src}",
4077 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
4079 EVEX, PD, REX_W, EVEX_CD8<64, CD8VT1>,
4080 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
4082 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
4083 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
4085 "vmovq\t{$src, $dst|$dst, $src}", []>,
4086 EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
4087 } // ExeDomain = SSEPackedInt
4089 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
4090 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
4092 let Predicates = [HasAVX512] in {
4093 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
4094 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
4097 // Move Scalar Single to Double Int
4099 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4100 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
4102 "vmovd\t{$src, $dst|$dst, $src}",
4103 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
4104 EVEX, Sched<[WriteVecMoveToGpr]>;
4105 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4107 // Move Quadword Int to Packed Quadword Int
4109 let ExeDomain = SSEPackedInt in {
4110 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
4112 "vmovq\t{$src, $dst|$dst, $src}",
4114 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
4115 EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
4116 } // ExeDomain = SSEPackedInt
4118 // Allow "vmovd" but print "vmovq".
4119 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4120 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
4121 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4122 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
4124 // Conversions between masks and scalar fp.
4125 def : Pat<(v32i1 (bitconvert FR32X:$src)),
4126 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
4127 def : Pat<(f32 (bitconvert VK32:$src)),
4128 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
4130 def : Pat<(v64i1 (bitconvert FR64X:$src)),
4131 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
4132 def : Pat<(f64 (bitconvert VK64:$src)),
4133 (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
4135 //===----------------------------------------------------------------------===//
4136 // AVX-512 MOVSH, MOVSS, MOVSD
4137 //===----------------------------------------------------------------------===//
4139 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
4140 X86VectorVTInfo _, Predicate prd = HasAVX512> {
4141 let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
4142 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4143 (ins _.RC:$src1, _.RC:$src2),
4144 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4145 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
4146 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
4147 let Predicates = [prd] in {
4148 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4149 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4150 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
4151 "$dst {${mask}} {z}, $src1, $src2}"),
4152 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4153 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4155 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
4156 let Constraints = "$src0 = $dst" in
4157 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4158 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4159 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
4160 "$dst {${mask}}, $src1, $src2}"),
4161 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4162 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4163 (_.VT _.RC:$src0))))],
4164 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
4165 let canFoldAsLoad = 1, isReMaterializable = 1 in {
4166 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
4167 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4168 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
4169 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4170 // _alt version uses FR32/FR64 register class.
4171 let isCodeGenOnly = 1 in
4172 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
4173 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4174 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
4175 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4177 let mayLoad = 1, hasSideEffects = 0 in {
4178 let Constraints = "$src0 = $dst" in
4179 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4180 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
4181 !strconcat(asm, "\t{$src, $dst {${mask}}|",
4182 "$dst {${mask}}, $src}"),
4183 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
4184 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4185 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
4186 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
4187 "$dst {${mask}} {z}, $src}"),
4188 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
4190 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4191 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4192 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
4193 EVEX, Sched<[WriteFStore]>;
4194 let mayStore = 1, hasSideEffects = 0 in
4195 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4196 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4197 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4198 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
4202 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4203 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4205 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4206 VEX_LIG, XD, REX_W, EVEX_CD8<64, CD8VT1>;
4208 defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
4210 VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
4212 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4213 PatLeaf ZeroFP, X86VectorVTInfo _> {
4215 def : Pat<(_.VT (OpNode _.RC:$src0,
4216 (_.VT (scalar_to_vector
4217 (_.EltVT (X86selects VK1WM:$mask,
4218 (_.EltVT _.FRC:$src1),
4219 (_.EltVT _.FRC:$src2))))))),
4220 (!cast<Instruction>(InstrStr#rrk)
4221 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4224 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4226 def : Pat<(_.VT (OpNode _.RC:$src0,
4227 (_.VT (scalar_to_vector
4228 (_.EltVT (X86selects VK1WM:$mask,
4229 (_.EltVT _.FRC:$src1),
4230 (_.EltVT ZeroFP))))))),
4231 (!cast<Instruction>(InstrStr#rrkz)
4234 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4237 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4238 dag Mask, RegisterClass MaskRC> {
4240 def : Pat<(masked_store
4241 (_.info512.VT (insert_subvector undef,
4242 (_.info128.VT _.info128.RC:$src),
4243 (iPTR 0))), addr:$dst, Mask),
4244 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4245 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4246 _.info128.RC:$src)>;
4250 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4251 AVX512VLVectorVTInfo _,
4252 dag Mask, RegisterClass MaskRC,
4253 SubRegIndex subreg> {
4255 def : Pat<(masked_store
4256 (_.info512.VT (insert_subvector undef,
4257 (_.info128.VT _.info128.RC:$src),
4258 (iPTR 0))), addr:$dst, Mask),
4259 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4260 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4261 _.info128.RC:$src)>;
4265 // This matches the more recent codegen from clang that avoids emitting a 512
4266 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4267 // bits on AVX512F only targets.
4268 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4269 AVX512VLVectorVTInfo _,
4270 dag Mask512, dag Mask128,
4271 RegisterClass MaskRC,
4272 SubRegIndex subreg> {
4275 def : Pat<(masked_store
4276 (_.info512.VT (insert_subvector undef,
4277 (_.info128.VT _.info128.RC:$src),
4278 (iPTR 0))), addr:$dst, Mask512),
4279 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4280 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4281 _.info128.RC:$src)>;
4283 // AVX512VL pattern.
4284 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4285 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4286 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4287 _.info128.RC:$src)>;
4290 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4291 dag Mask, RegisterClass MaskRC> {
4293 def : Pat<(_.info128.VT (extract_subvector
4294 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4295 _.info512.ImmAllZerosV)),
4297 (!cast<Instruction>(InstrStr#rmkz)
4298 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4301 def : Pat<(_.info128.VT (extract_subvector
4302 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4303 (_.info512.VT (insert_subvector undef,
4304 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4307 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4308 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4313 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4314 AVX512VLVectorVTInfo _,
4315 dag Mask, RegisterClass MaskRC,
4316 SubRegIndex subreg> {
4318 def : Pat<(_.info128.VT (extract_subvector
4319 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4320 _.info512.ImmAllZerosV)),
4322 (!cast<Instruction>(InstrStr#rmkz)
4323 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4326 def : Pat<(_.info128.VT (extract_subvector
4327 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4328 (_.info512.VT (insert_subvector undef,
4329 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4332 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4333 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4338 // This matches the more recent codegen from clang that avoids emitting a 512
4339 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4340 // bits on AVX512F only targets.
4341 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4342 AVX512VLVectorVTInfo _,
4343 dag Mask512, dag Mask128,
4344 RegisterClass MaskRC,
4345 SubRegIndex subreg> {
4346 // AVX512F patterns.
4347 def : Pat<(_.info128.VT (extract_subvector
4348 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4349 _.info512.ImmAllZerosV)),
4351 (!cast<Instruction>(InstrStr#rmkz)
4352 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4355 def : Pat<(_.info128.VT (extract_subvector
4356 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4357 (_.info512.VT (insert_subvector undef,
4358 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4361 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4362 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4365 // AVX512Vl patterns.
4366 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4367 _.info128.ImmAllZerosV)),
4368 (!cast<Instruction>(InstrStr#rmkz)
4369 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4372 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4373 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4374 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4375 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4379 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4380 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4382 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4383 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4384 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4385 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4386 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4387 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4389 let Predicates = [HasFP16] in {
4390 defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4391 defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4392 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4393 defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4394 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4395 defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4396 (v32i1 (insert_subvector
4397 (v32i1 immAllZerosV),
4398 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4400 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4403 defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4404 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4405 defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4406 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4407 defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4408 (v32i1 (insert_subvector
4409 (v32i1 immAllZerosV),
4410 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4412 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4415 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4416 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4417 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4418 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4419 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4421 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4422 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4423 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4426 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4427 (v16i1 (insert_subvector
4428 (v16i1 immAllZerosV),
4429 (v4i1 (extract_subvector
4430 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4433 (v4i1 (extract_subvector
4434 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4435 (iPTR 0))), GR8, sub_8bit>;
4436 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4441 (v16i1 immAllZerosV),
4442 (v2i1 (extract_subvector
4443 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4447 (v2i1 (extract_subvector
4448 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4449 (iPTR 0))), GR8, sub_8bit>;
4451 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4452 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4453 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4454 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4455 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4456 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4458 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4459 (v16i1 (insert_subvector
4460 (v16i1 immAllZerosV),
4461 (v4i1 (extract_subvector
4462 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4465 (v4i1 (extract_subvector
4466 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4467 (iPTR 0))), GR8, sub_8bit>;
4468 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4473 (v16i1 immAllZerosV),
4474 (v2i1 (extract_subvector
4475 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4479 (v2i1 (extract_subvector
4480 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4481 (iPTR 0))), GR8, sub_8bit>;
4483 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4484 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4485 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4486 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4487 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4489 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4490 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4491 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4493 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4495 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4496 VK1WM:$mask, addr:$src)),
4498 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4499 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4501 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4502 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4503 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4504 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4505 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4507 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4508 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4509 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4511 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4513 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4514 VK1WM:$mask, addr:$src)),
4516 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4517 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4520 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4521 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4522 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4523 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4525 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4526 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4527 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4528 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4530 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4531 let Predicates = [HasFP16] in {
4532 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4533 (ins VR128X:$src1, VR128X:$src2),
4534 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4535 []>, T_MAP5XS, EVEX_4V, VEX_LIG,
4536 Sched<[SchedWriteFShuffle.XMM]>;
4538 let Constraints = "$src0 = $dst" in
4539 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4540 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4541 VR128X:$src1, VR128X:$src2),
4542 "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4543 "$dst {${mask}}, $src1, $src2}",
4544 []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
4545 Sched<[SchedWriteFShuffle.XMM]>;
4547 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4548 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4549 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4550 "$dst {${mask}} {z}, $src1, $src2}",
4551 []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
4552 Sched<[SchedWriteFShuffle.XMM]>;
4554 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4555 (ins VR128X:$src1, VR128X:$src2),
4556 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4557 []>, XS, EVEX_4V, VEX_LIG,
4558 Sched<[SchedWriteFShuffle.XMM]>;
4560 let Constraints = "$src0 = $dst" in
4561 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4562 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4563 VR128X:$src1, VR128X:$src2),
4564 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4565 "$dst {${mask}}, $src1, $src2}",
4566 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4567 Sched<[SchedWriteFShuffle.XMM]>;
4569 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4570 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4571 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4572 "$dst {${mask}} {z}, $src1, $src2}",
4573 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4574 Sched<[SchedWriteFShuffle.XMM]>;
4576 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4577 (ins VR128X:$src1, VR128X:$src2),
4578 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4579 []>, XD, EVEX_4V, VEX_LIG, REX_W,
4580 Sched<[SchedWriteFShuffle.XMM]>;
4582 let Constraints = "$src0 = $dst" in
4583 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4584 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4585 VR128X:$src1, VR128X:$src2),
4586 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4587 "$dst {${mask}}, $src1, $src2}",
4588 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4589 REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4591 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4592 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4594 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4595 "$dst {${mask}} {z}, $src1, $src2}",
4596 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4597 REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4600 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4601 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4602 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4603 "$dst {${mask}}, $src1, $src2}",
4604 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4605 VR128X:$src1, VR128X:$src2), 0>;
4606 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4607 "$dst {${mask}} {z}, $src1, $src2}",
4608 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4609 VR128X:$src1, VR128X:$src2), 0>;
4610 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4611 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4612 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4613 "$dst {${mask}}, $src1, $src2}",
4614 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4615 VR128X:$src1, VR128X:$src2), 0>;
4616 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4617 "$dst {${mask}} {z}, $src1, $src2}",
4618 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4619 VR128X:$src1, VR128X:$src2), 0>;
4620 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4621 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4622 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4623 "$dst {${mask}}, $src1, $src2}",
4624 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4625 VR128X:$src1, VR128X:$src2), 0>;
4626 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4627 "$dst {${mask}} {z}, $src1, $src2}",
4628 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4629 VR128X:$src1, VR128X:$src2), 0>;
4631 let Predicates = [HasAVX512, OptForSize] in {
4632 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4633 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4634 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4635 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4637 // Move low f32 and clear high bits.
4638 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4639 (SUBREG_TO_REG (i32 0),
4640 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4641 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4642 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4643 (SUBREG_TO_REG (i32 0),
4644 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4645 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4647 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4648 (SUBREG_TO_REG (i32 0),
4649 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4650 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4651 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4652 (SUBREG_TO_REG (i32 0),
4653 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4654 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4657 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4658 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4659 let Predicates = [HasAVX512, OptForSpeed] in {
4660 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4661 (SUBREG_TO_REG (i32 0),
4662 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4663 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4664 (i8 1))), sub_xmm)>;
4665 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4666 (SUBREG_TO_REG (i32 0),
4667 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4668 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4669 (i8 3))), sub_xmm)>;
4672 let Predicates = [HasAVX512] in {
4673 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4674 (VMOVSSZrm addr:$src)>;
4675 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4676 (VMOVSDZrm addr:$src)>;
4678 // Represent the same patterns above but in the form they appear for
4680 def : Pat<(v8f32 (X86vzload32 addr:$src)),
4681 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4682 def : Pat<(v4f64 (X86vzload64 addr:$src)),
4683 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4685 // Represent the same patterns above but in the form they appear for
4687 def : Pat<(v16f32 (X86vzload32 addr:$src)),
4688 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4689 def : Pat<(v8f64 (X86vzload64 addr:$src)),
4690 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4692 let Predicates = [HasFP16] in {
4693 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4694 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4695 def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4696 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4698 // FIXME we need better canonicalization in dag combine
4699 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4700 (SUBREG_TO_REG (i32 0),
4701 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4702 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4703 def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4704 (SUBREG_TO_REG (i32 0),
4705 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4706 (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4708 // FIXME we need better canonicalization in dag combine
4709 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4710 (SUBREG_TO_REG (i32 0),
4711 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4712 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4713 def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4714 (SUBREG_TO_REG (i32 0),
4715 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4716 (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4718 def : Pat<(v8f16 (X86vzload16 addr:$src)),
4719 (VMOVSHZrm addr:$src)>;
4721 def : Pat<(v16f16 (X86vzload16 addr:$src)),
4722 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4724 def : Pat<(v32f16 (X86vzload16 addr:$src)),
4725 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4728 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4729 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4731 "vmovq\t{$src, $dst|$dst, $src}",
4732 [(set VR128X:$dst, (v2i64 (X86vzmovl
4733 (v2i64 VR128X:$src))))]>,
4737 let Predicates = [HasAVX512] in {
4738 def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4739 (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4740 GR8:$src, sub_8bit)))>;
4741 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4742 (VMOVDI2PDIZrr GR32:$src)>;
4744 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4745 (VMOV64toPQIZrr GR64:$src)>;
4747 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4748 def : Pat<(v4i32 (X86vzload32 addr:$src)),
4749 (VMOVDI2PDIZrm addr:$src)>;
4750 def : Pat<(v8i32 (X86vzload32 addr:$src)),
4751 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4752 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4753 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4754 def : Pat<(v2i64 (X86vzload64 addr:$src)),
4755 (VMOVQI2PQIZrm addr:$src)>;
4756 def : Pat<(v4i64 (X86vzload64 addr:$src)),
4757 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4759 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4760 def : Pat<(v16i32 (X86vzload32 addr:$src)),
4761 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4762 def : Pat<(v8i64 (X86vzload64 addr:$src)),
4763 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4765 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4766 (SUBREG_TO_REG (i32 0),
4767 (v2f64 (VMOVZPQILo2PQIZrr
4768 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4770 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4771 (SUBREG_TO_REG (i32 0),
4772 (v2i64 (VMOVZPQILo2PQIZrr
4773 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4776 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4777 (SUBREG_TO_REG (i32 0),
4778 (v2f64 (VMOVZPQILo2PQIZrr
4779 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4781 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4782 (SUBREG_TO_REG (i32 0),
4783 (v2i64 (VMOVZPQILo2PQIZrr
4784 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4788 //===----------------------------------------------------------------------===//
4789 // AVX-512 - Non-temporals
4790 //===----------------------------------------------------------------------===//
4792 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4793 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4794 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4795 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4797 let Predicates = [HasVLX] in {
4798 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4800 "vmovntdqa\t{$src, $dst|$dst, $src}",
4801 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4802 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4804 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4806 "vmovntdqa\t{$src, $dst|$dst, $src}",
4807 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4808 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4811 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4812 X86SchedWriteMoveLS Sched,
4813 PatFrag st_frag = alignednontemporalstore> {
4814 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4815 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4816 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4817 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4818 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4821 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4822 AVX512VLVectorVTInfo VTInfo,
4823 X86SchedWriteMoveLSWidths Sched> {
4824 let Predicates = [HasAVX512] in
4825 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4827 let Predicates = [HasAVX512, HasVLX] in {
4828 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4829 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4833 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4834 SchedWriteVecMoveLSNT>, PD;
4835 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4836 SchedWriteFMoveLSNT>, PD, REX_W;
4837 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4838 SchedWriteFMoveLSNT>, PS;
4840 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4841 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4842 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4843 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4844 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4845 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4846 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4848 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4849 (VMOVNTDQAZrm addr:$src)>;
4850 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4851 (VMOVNTDQAZrm addr:$src)>;
4852 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4853 (VMOVNTDQAZrm addr:$src)>;
4854 def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4855 (VMOVNTDQAZrm addr:$src)>;
4856 def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4857 (VMOVNTDQAZrm addr:$src)>;
4858 def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4859 (VMOVNTDQAZrm addr:$src)>;
4862 let Predicates = [HasVLX], AddedComplexity = 400 in {
4863 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4864 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4865 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4866 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4867 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4868 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4870 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4871 (VMOVNTDQAZ256rm addr:$src)>;
4872 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4873 (VMOVNTDQAZ256rm addr:$src)>;
4874 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4875 (VMOVNTDQAZ256rm addr:$src)>;
4876 def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4877 (VMOVNTDQAZ256rm addr:$src)>;
4878 def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4879 (VMOVNTDQAZ256rm addr:$src)>;
4880 def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4881 (VMOVNTDQAZ256rm addr:$src)>;
4883 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4884 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4885 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4886 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4887 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4888 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4890 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4891 (VMOVNTDQAZ128rm addr:$src)>;
4892 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4893 (VMOVNTDQAZ128rm addr:$src)>;
4894 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4895 (VMOVNTDQAZ128rm addr:$src)>;
4896 def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4897 (VMOVNTDQAZ128rm addr:$src)>;
4898 def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4899 (VMOVNTDQAZ128rm addr:$src)>;
4900 def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4901 (VMOVNTDQAZ128rm addr:$src)>;
4904 //===----------------------------------------------------------------------===//
4905 // AVX-512 - Integer arithmetic
4907 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4908 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4909 bit IsCommutable = 0> {
4910 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4911 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4912 "$src2, $src1", "$src1, $src2",
4913 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4914 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4917 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4918 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4919 "$src2, $src1", "$src1, $src2",
4920 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4921 AVX512BIBase, EVEX_4V,
4922 Sched<[sched.Folded, sched.ReadAfterFold]>;
4925 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4926 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4927 bit IsCommutable = 0> :
4928 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4929 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4930 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4931 "${src2}"#_.BroadcastStr#", $src1",
4932 "$src1, ${src2}"#_.BroadcastStr,
4933 (_.VT (OpNode _.RC:$src1,
4934 (_.BroadcastLdFrag addr:$src2)))>,
4935 AVX512BIBase, EVEX_4V, EVEX_B,
4936 Sched<[sched.Folded, sched.ReadAfterFold]>;
4939 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4940 AVX512VLVectorVTInfo VTInfo,
4941 X86SchedWriteWidths sched, Predicate prd,
4942 bit IsCommutable = 0> {
4943 let Predicates = [prd] in
4944 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4945 IsCommutable>, EVEX_V512;
4947 let Predicates = [prd, HasVLX] in {
4948 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4949 sched.YMM, IsCommutable>, EVEX_V256;
4950 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4951 sched.XMM, IsCommutable>, EVEX_V128;
4955 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4956 AVX512VLVectorVTInfo VTInfo,
4957 X86SchedWriteWidths sched, Predicate prd,
4958 bit IsCommutable = 0> {
4959 let Predicates = [prd] in
4960 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4961 IsCommutable>, EVEX_V512;
4963 let Predicates = [prd, HasVLX] in {
4964 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4965 sched.YMM, IsCommutable>, EVEX_V256;
4966 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4967 sched.XMM, IsCommutable>, EVEX_V128;
4971 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4972 X86SchedWriteWidths sched, Predicate prd,
4973 bit IsCommutable = 0> {
4974 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4975 sched, prd, IsCommutable>,
4976 REX_W, EVEX_CD8<64, CD8VF>;
4979 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4980 X86SchedWriteWidths sched, Predicate prd,
4981 bit IsCommutable = 0> {
4982 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4983 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4986 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4987 X86SchedWriteWidths sched, Predicate prd,
4988 bit IsCommutable = 0> {
4989 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4990 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4994 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4995 X86SchedWriteWidths sched, Predicate prd,
4996 bit IsCommutable = 0> {
4997 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4998 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
5002 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
5003 SDNode OpNode, X86SchedWriteWidths sched,
5004 Predicate prd, bit IsCommutable = 0> {
5005 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
5008 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
5012 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
5013 SDNode OpNode, X86SchedWriteWidths sched,
5014 Predicate prd, bit IsCommutable = 0> {
5015 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
5018 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
5022 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
5023 bits<8> opc_d, bits<8> opc_q,
5024 string OpcodeStr, SDNode OpNode,
5025 X86SchedWriteWidths sched,
5026 bit IsCommutable = 0> {
5027 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
5028 sched, HasAVX512, IsCommutable>,
5029 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
5030 sched, HasBWI, IsCommutable>;
5033 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
5034 X86FoldableSchedWrite sched,
5035 SDNode OpNode,X86VectorVTInfo _Src,
5036 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
5037 bit IsCommutable = 0> {
5038 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5039 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5040 "$src2, $src1","$src1, $src2",
5042 (_Src.VT _Src.RC:$src1),
5043 (_Src.VT _Src.RC:$src2))),
5045 AVX512BIBase, EVEX_4V, Sched<[sched]>;
5046 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5047 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5048 "$src2, $src1", "$src1, $src2",
5049 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5050 (_Src.LdFrag addr:$src2)))>,
5051 AVX512BIBase, EVEX_4V,
5052 Sched<[sched.Folded, sched.ReadAfterFold]>;
5054 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5055 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
5057 "${src2}"#_Brdct.BroadcastStr#", $src1",
5058 "$src1, ${src2}"#_Brdct.BroadcastStr,
5059 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5060 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
5061 AVX512BIBase, EVEX_4V, EVEX_B,
5062 Sched<[sched.Folded, sched.ReadAfterFold]>;
5065 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
5066 SchedWriteVecALU, 1>;
5067 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
5068 SchedWriteVecALU, 0>;
5069 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
5070 SchedWriteVecALU, HasBWI, 1>;
5071 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
5072 SchedWriteVecALU, HasBWI, 0>;
5073 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
5074 SchedWriteVecALU, HasBWI, 1>;
5075 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
5076 SchedWriteVecALU, HasBWI, 0>;
5077 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
5078 SchedWritePMULLD, HasAVX512, 1>, T8PD;
5079 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
5080 SchedWriteVecIMul, HasBWI, 1>;
5081 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
5082 SchedWriteVecIMul, HasDQI, 1>, T8PD,
5083 NotEVEX2VEXConvertible;
5084 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
5086 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
5088 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
5089 SchedWriteVecIMul, HasBWI, 1>, T8PD;
5090 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
5091 SchedWriteVecALU, HasBWI, 1>;
5092 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
5093 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
5094 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
5095 SchedWriteVecIMul, HasAVX512, 1>;
5097 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
5098 X86SchedWriteWidths sched,
5099 AVX512VLVectorVTInfo _SrcVTInfo,
5100 AVX512VLVectorVTInfo _DstVTInfo,
5101 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
5102 let Predicates = [prd] in
5103 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
5104 _SrcVTInfo.info512, _DstVTInfo.info512,
5105 v8i64_info, IsCommutable>,
5106 EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W;
5107 let Predicates = [HasVLX, prd] in {
5108 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
5109 _SrcVTInfo.info256, _DstVTInfo.info256,
5110 v4i64x_info, IsCommutable>,
5111 EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W;
5112 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
5113 _SrcVTInfo.info128, _DstVTInfo.info128,
5114 v2i64x_info, IsCommutable>,
5115 EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W;
5119 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
5120 avx512vl_i8_info, avx512vl_i8_info,
5121 X86multishift, HasVBMI, 0>, T8PD;
5123 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5124 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
5125 X86FoldableSchedWrite sched> {
5126 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5127 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
5129 "${src2}"#_Src.BroadcastStr#", $src1",
5130 "$src1, ${src2}"#_Src.BroadcastStr,
5131 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5132 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
5133 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
5134 Sched<[sched.Folded, sched.ReadAfterFold]>;
5137 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
5138 SDNode OpNode,X86VectorVTInfo _Src,
5139 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
5140 bit IsCommutable = 0> {
5141 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5142 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5143 "$src2, $src1","$src1, $src2",
5145 (_Src.VT _Src.RC:$src1),
5146 (_Src.VT _Src.RC:$src2))),
5147 IsCommutable, IsCommutable>,
5148 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
5149 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5150 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5151 "$src2, $src1", "$src1, $src2",
5152 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5153 (_Src.LdFrag addr:$src2)))>,
5154 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
5155 Sched<[sched.Folded, sched.ReadAfterFold]>;
5158 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
5160 let Predicates = [HasBWI] in
5161 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
5162 v32i16_info, SchedWriteShuffle.ZMM>,
5163 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
5164 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
5165 let Predicates = [HasBWI, HasVLX] in {
5166 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
5167 v16i16x_info, SchedWriteShuffle.YMM>,
5168 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
5169 v16i16x_info, SchedWriteShuffle.YMM>,
5171 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
5172 v8i16x_info, SchedWriteShuffle.XMM>,
5173 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
5174 v8i16x_info, SchedWriteShuffle.XMM>,
5178 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
5180 let Predicates = [HasBWI] in
5181 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
5182 SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
5183 let Predicates = [HasBWI, HasVLX] in {
5184 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
5185 v32i8x_info, SchedWriteShuffle.YMM>,
5187 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
5188 v16i8x_info, SchedWriteShuffle.XMM>,
5193 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
5194 SDNode OpNode, AVX512VLVectorVTInfo _Src,
5195 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
5196 let Predicates = [HasBWI] in
5197 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
5198 _Dst.info512, SchedWriteVecIMul.ZMM,
5199 IsCommutable>, EVEX_V512;
5200 let Predicates = [HasBWI, HasVLX] in {
5201 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
5202 _Dst.info256, SchedWriteVecIMul.YMM,
5203 IsCommutable>, EVEX_V256;
5204 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
5205 _Dst.info128, SchedWriteVecIMul.XMM,
5206 IsCommutable>, EVEX_V128;
5210 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
5211 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
5212 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
5213 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
5215 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
5216 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, WIG;
5217 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
5218 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
5220 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
5221 SchedWriteVecALU, HasBWI, 1>, T8PD;
5222 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
5223 SchedWriteVecALU, HasBWI, 1>;
5224 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
5225 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5226 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
5227 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5228 NotEVEX2VEXConvertible;
5230 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
5231 SchedWriteVecALU, HasBWI, 1>;
5232 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
5233 SchedWriteVecALU, HasBWI, 1>, T8PD;
5234 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
5235 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5236 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
5237 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5238 NotEVEX2VEXConvertible;
5240 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
5241 SchedWriteVecALU, HasBWI, 1>, T8PD;
5242 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
5243 SchedWriteVecALU, HasBWI, 1>;
5244 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5245 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5246 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5247 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5248 NotEVEX2VEXConvertible;
5250 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5251 SchedWriteVecALU, HasBWI, 1>;
5252 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5253 SchedWriteVecALU, HasBWI, 1>, T8PD;
5254 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5255 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5256 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5257 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5258 NotEVEX2VEXConvertible;
5260 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5261 let Predicates = [HasDQI, NoVLX] in {
5262 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5265 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5266 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5268 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5271 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5275 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5278 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5279 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5281 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5284 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5289 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5290 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5292 (!cast<Instruction>(Instr#"rr")
5293 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5294 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5296 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5298 (!cast<Instruction>(Instr#"rmb")
5299 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5303 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5305 (!cast<Instruction>(Instr#"rr")
5306 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5307 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5309 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5311 (!cast<Instruction>(Instr#"rmb")
5312 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5317 let Predicates = [HasAVX512, NoVLX] in {
5318 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5319 defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5320 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5321 defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5324 //===----------------------------------------------------------------------===//
5325 // AVX-512 Logical Instructions
5326 //===----------------------------------------------------------------------===//
5328 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5329 SchedWriteVecLogic, HasAVX512, 1>;
5330 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5331 SchedWriteVecLogic, HasAVX512, 1>;
5332 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5333 SchedWriteVecLogic, HasAVX512, 1>;
5334 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5335 SchedWriteVecLogic, HasAVX512>;
5337 let Predicates = [HasVLX] in {
5338 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5339 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5340 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5341 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5343 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5344 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5345 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5346 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5348 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5349 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5350 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5351 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5353 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5354 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5355 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5356 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5358 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5359 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5360 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5361 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5363 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5364 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5365 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5366 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5368 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5369 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5370 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5371 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5373 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5374 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5375 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5376 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5378 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5379 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5380 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5381 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5383 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5384 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5385 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5386 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5388 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5389 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5390 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5391 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5393 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5394 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5395 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5396 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5398 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5399 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5400 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5401 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5403 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5404 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5405 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5406 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5408 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5409 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5410 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5411 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5413 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5414 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5415 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5416 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5419 let Predicates = [HasAVX512] in {
5420 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5421 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5422 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5423 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5425 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5426 (VPORQZrr VR512:$src1, VR512:$src2)>;
5427 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5428 (VPORQZrr VR512:$src1, VR512:$src2)>;
5430 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5431 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5432 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5433 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5435 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5436 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5437 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5438 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5440 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5441 (VPANDQZrm VR512:$src1, addr:$src2)>;
5442 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5443 (VPANDQZrm VR512:$src1, addr:$src2)>;
5445 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5446 (VPORQZrm VR512:$src1, addr:$src2)>;
5447 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5448 (VPORQZrm VR512:$src1, addr:$src2)>;
5450 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5451 (VPXORQZrm VR512:$src1, addr:$src2)>;
5452 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5453 (VPXORQZrm VR512:$src1, addr:$src2)>;
5455 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5456 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5457 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5458 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5461 // Patterns to catch vselect with different type than logic op.
5462 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5464 X86VectorVTInfo IntInfo> {
5465 // Masked register-register logical operations.
5466 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5467 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5469 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5470 _.RC:$src1, _.RC:$src2)>;
5472 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5473 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5475 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5478 // Masked register-memory logical operations.
5479 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5480 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5481 (load addr:$src2)))),
5483 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5484 _.RC:$src1, addr:$src2)>;
5485 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5486 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5487 (load addr:$src2)))),
5489 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5493 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5495 X86VectorVTInfo IntInfo> {
5496 // Register-broadcast logical operations.
5497 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5499 (IntInfo.VT (OpNode _.RC:$src1,
5500 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5502 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5503 _.RC:$src1, addr:$src2)>;
5504 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5506 (IntInfo.VT (OpNode _.RC:$src1,
5507 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5509 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5510 _.RC:$src1, addr:$src2)>;
5513 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5514 AVX512VLVectorVTInfo SelectInfo,
5515 AVX512VLVectorVTInfo IntInfo> {
5516 let Predicates = [HasVLX] in {
5517 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5519 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5522 let Predicates = [HasAVX512] in {
5523 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5528 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5529 AVX512VLVectorVTInfo SelectInfo,
5530 AVX512VLVectorVTInfo IntInfo> {
5531 let Predicates = [HasVLX] in {
5532 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5533 SelectInfo.info128, IntInfo.info128>;
5534 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5535 SelectInfo.info256, IntInfo.info256>;
5537 let Predicates = [HasAVX512] in {
5538 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5539 SelectInfo.info512, IntInfo.info512>;
5543 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5544 // i64 vselect with i32/i16/i8 logic op
5545 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5547 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5549 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5552 // i32 vselect with i64/i16/i8 logic op
5553 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5555 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5557 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5560 // f32 vselect with i64/i32/i16/i8 logic op
5561 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5563 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5565 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5567 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5570 // f64 vselect with i64/i32/i16/i8 logic op
5571 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5573 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5575 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5577 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5580 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5583 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5588 defm : avx512_logical_lowering_types<"VPAND", and>;
5589 defm : avx512_logical_lowering_types<"VPOR", or>;
5590 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5591 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5593 //===----------------------------------------------------------------------===//
5594 // AVX-512 FP arithmetic
5595 //===----------------------------------------------------------------------===//
5597 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5598 SDPatternOperator OpNode, SDNode VecNode,
5599 X86FoldableSchedWrite sched, bit IsCommutable> {
5600 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5601 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5602 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5603 "$src2, $src1", "$src1, $src2",
5604 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5607 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5608 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5609 "$src2, $src1", "$src1, $src2",
5610 (_.VT (VecNode _.RC:$src1,
5611 (_.ScalarIntMemFrags addr:$src2)))>,
5612 Sched<[sched.Folded, sched.ReadAfterFold]>;
5613 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5614 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5615 (ins _.FRC:$src1, _.FRC:$src2),
5616 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5617 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5619 let isCommutable = IsCommutable;
5621 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5622 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5623 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5624 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5625 (_.ScalarLdFrag addr:$src2)))]>,
5626 Sched<[sched.Folded, sched.ReadAfterFold]>;
5631 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5632 SDNode VecNode, X86FoldableSchedWrite sched> {
5633 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5634 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5635 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5636 "$rc, $src2, $src1", "$src1, $src2, $rc",
5637 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5639 EVEX_B, EVEX_RC, Sched<[sched]>;
5641 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5642 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5643 X86FoldableSchedWrite sched, bit IsCommutable,
5644 string EVEX2VexOvrd> {
5645 let ExeDomain = _.ExeDomain in {
5646 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5647 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5648 "$src2, $src1", "$src1, $src2",
5649 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5650 Sched<[sched]>, SIMD_EXC;
5652 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5653 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5654 "$src2, $src1", "$src1, $src2",
5655 (_.VT (VecNode _.RC:$src1,
5656 (_.ScalarIntMemFrags addr:$src2)))>,
5657 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5659 let isCodeGenOnly = 1, Predicates = [HasAVX512],
5660 Uses = [MXCSR], mayRaiseFPException = 1 in {
5661 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5662 (ins _.FRC:$src1, _.FRC:$src2),
5663 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5664 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5666 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5667 let isCommutable = IsCommutable;
5669 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5670 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5671 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5672 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5673 (_.ScalarLdFrag addr:$src2)))]>,
5674 Sched<[sched.Folded, sched.ReadAfterFold]>,
5675 EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5678 let Uses = [MXCSR] in
5679 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5680 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5681 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5682 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5683 EVEX_B, Sched<[sched]>;
5687 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5688 SDNode VecNode, SDNode RndNode,
5689 X86SchedWriteSizes sched, bit IsCommutable> {
5690 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5691 sched.PS.Scl, IsCommutable>,
5692 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5694 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5695 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5696 sched.PD.Scl, IsCommutable>,
5697 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5699 XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5700 let Predicates = [HasFP16] in
5701 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5702 VecNode, sched.PH.Scl, IsCommutable>,
5703 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5705 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5708 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5709 SDNode VecNode, SDNode SaeNode,
5710 X86SchedWriteSizes sched, bit IsCommutable> {
5711 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5712 VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5714 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5715 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5716 VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5718 XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5719 let Predicates = [HasFP16] in {
5720 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5721 VecNode, SaeNode, sched.PH.Scl, IsCommutable,
5723 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
5724 NotEVEX2VEXConvertible;
5727 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5728 SchedWriteFAddSizes, 1>;
5729 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5730 SchedWriteFMulSizes, 1>;
5731 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5732 SchedWriteFAddSizes, 0>;
5733 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5734 SchedWriteFDivSizes, 0>;
5735 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5736 SchedWriteFCmpSizes, 0>;
5737 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5738 SchedWriteFCmpSizes, 0>;
5740 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5741 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5742 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5743 X86VectorVTInfo _, SDNode OpNode,
5744 X86FoldableSchedWrite sched,
5745 string EVEX2VEXOvrd> {
5746 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5747 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5748 (ins _.FRC:$src1, _.FRC:$src2),
5749 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5750 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5751 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5752 let isCommutable = 1;
5754 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5755 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5756 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5757 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5758 (_.ScalarLdFrag addr:$src2)))]>,
5759 Sched<[sched.Folded, sched.ReadAfterFold]>,
5760 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5763 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5764 SchedWriteFCmp.Scl, "VMINCSS">, XS,
5765 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5767 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5768 SchedWriteFCmp.Scl, "VMINCSD">, XD,
5769 REX_W, EVEX_4V, VEX_LIG,
5770 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5772 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5773 SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5774 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5776 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5777 SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5778 REX_W, EVEX_4V, VEX_LIG,
5779 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5781 defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5782 SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
5783 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5784 NotEVEX2VEXConvertible;
5785 defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5786 SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
5787 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5788 NotEVEX2VEXConvertible;
5790 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5791 SDPatternOperator MaskOpNode,
5792 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5794 bit IsKCommutable = IsCommutable,
5795 string suffix = _.Suffix,
5796 string ClobberConstraint = "",
5797 bit MayRaiseFPException = 1> {
5798 let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5799 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5800 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5801 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5802 "$src2, $src1", "$src1, $src2",
5803 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5804 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5805 IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
5806 let mayLoad = 1 in {
5807 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5808 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5809 "$src2, $src1", "$src1, $src2",
5810 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5811 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5812 ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5813 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5814 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5815 "${src2}"#_.BroadcastStr#", $src1",
5816 "$src1, ${src2}"#_.BroadcastStr,
5817 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5818 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5819 ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5824 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5825 SDPatternOperator OpNodeRnd,
5826 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5827 string suffix = _.Suffix,
5828 string ClobberConstraint = ""> {
5829 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5830 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5831 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5832 "$rc, $src2, $src1", "$src1, $src2, $rc",
5833 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5834 0, 0, 0, vselect_mask, ClobberConstraint>,
5835 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5838 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5839 SDPatternOperator OpNodeSAE,
5840 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5841 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5842 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5843 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5844 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5845 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5846 EVEX_4V, EVEX_B, Sched<[sched]>;
5849 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5850 SDPatternOperator MaskOpNode,
5851 Predicate prd, X86SchedWriteSizes sched,
5852 bit IsCommutable = 0,
5853 bit IsPD128Commutable = IsCommutable> {
5854 let Predicates = [prd] in {
5855 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5856 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5857 EVEX_CD8<32, CD8VF>;
5858 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5859 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, REX_W,
5860 EVEX_CD8<64, CD8VF>;
5863 // Define only if AVX512VL feature is present.
5864 let Predicates = [prd, HasVLX] in {
5865 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5866 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5867 EVEX_CD8<32, CD8VF>;
5868 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5869 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5870 EVEX_CD8<32, CD8VF>;
5871 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5872 sched.PD.XMM, IsPD128Commutable,
5873 IsCommutable>, EVEX_V128, PD, REX_W,
5874 EVEX_CD8<64, CD8VF>;
5875 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5876 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, REX_W,
5877 EVEX_CD8<64, CD8VF>;
5881 multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5882 SDPatternOperator MaskOpNode,
5883 X86SchedWriteSizes sched, bit IsCommutable = 0> {
5884 let Predicates = [HasFP16] in {
5885 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5886 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
5887 EVEX_CD8<16, CD8VF>;
5889 let Predicates = [HasVLX, HasFP16] in {
5890 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5891 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
5892 EVEX_CD8<16, CD8VF>;
5893 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5894 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
5895 EVEX_CD8<16, CD8VF>;
5899 let Uses = [MXCSR] in
5900 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5901 X86SchedWriteSizes sched> {
5902 let Predicates = [HasFP16] in {
5903 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5905 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5907 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5909 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5910 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5912 EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>;
5915 let Uses = [MXCSR] in
5916 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5917 X86SchedWriteSizes sched> {
5918 let Predicates = [HasFP16] in {
5919 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5921 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5923 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5925 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5926 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5928 EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>;
5931 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5932 SchedWriteFAddSizes, 1>,
5933 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5934 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5935 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5936 SchedWriteFMulSizes, 1>,
5937 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5938 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5939 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5940 SchedWriteFAddSizes>,
5941 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5942 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5943 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5944 SchedWriteFDivSizes>,
5945 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5946 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5947 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5948 SchedWriteFCmpSizes, 0>,
5949 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5950 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5951 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5952 SchedWriteFCmpSizes, 0>,
5953 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5954 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5955 let isCodeGenOnly = 1 in {
5956 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5957 SchedWriteFCmpSizes, 1>,
5958 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5959 SchedWriteFCmpSizes, 1>;
5960 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5961 SchedWriteFCmpSizes, 1>,
5962 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5963 SchedWriteFCmpSizes, 1>;
5965 let Uses = []<Register>, mayRaiseFPException = 0 in {
5966 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5967 SchedWriteFLogicSizes, 1>;
5968 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5969 SchedWriteFLogicSizes, 0>;
5970 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5971 SchedWriteFLogicSizes, 1>;
5972 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5973 SchedWriteFLogicSizes, 1>;
5976 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5977 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5978 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5979 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5980 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5981 "$src2, $src1", "$src1, $src2",
5982 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5983 EVEX_4V, Sched<[sched]>;
5984 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5985 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5986 "$src2, $src1", "$src1, $src2",
5987 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5988 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5989 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5990 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5991 "${src2}"#_.BroadcastStr#", $src1",
5992 "$src1, ${src2}"#_.BroadcastStr,
5993 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5994 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5998 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5999 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6000 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
6001 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6002 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
6003 "$src2, $src1", "$src1, $src2",
6004 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
6006 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6007 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
6008 "$src2, $src1", "$src1, $src2",
6009 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
6010 Sched<[sched.Folded, sched.ReadAfterFold]>;
6014 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
6015 X86SchedWriteWidths sched> {
6016 let Predicates = [HasFP16] in {
6017 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
6018 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
6019 EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
6020 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
6021 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
6022 EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
6024 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
6025 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
6026 EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
6027 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
6028 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
6029 EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
6030 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
6031 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
6032 X86scalefsRnd, sched.Scl>,
6033 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
6034 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
6035 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
6036 X86scalefsRnd, sched.Scl>,
6037 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8PD;
6039 // Define only if AVX512VL feature is present.
6040 let Predicates = [HasVLX] in {
6041 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
6042 EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
6043 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
6044 EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
6045 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
6046 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
6047 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
6048 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
6051 let Predicates = [HasFP16, HasVLX] in {
6052 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
6053 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6054 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
6055 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6058 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
6059 SchedWriteFAdd>, NotEVEX2VEXConvertible;
6061 //===----------------------------------------------------------------------===//
6062 // AVX-512 VPTESTM instructions
6063 //===----------------------------------------------------------------------===//
6065 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
6066 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6067 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
6068 // There are just too many permutations due to commutability and bitcasts.
6069 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6070 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
6071 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6072 "$src2, $src1", "$src1, $src2",
6073 (null_frag), (null_frag), 1>,
6074 EVEX_4V, Sched<[sched]>;
6076 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6077 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6078 "$src2, $src1", "$src1, $src2",
6079 (null_frag), (null_frag)>,
6080 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6081 Sched<[sched.Folded, sched.ReadAfterFold]>;
6085 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
6086 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6087 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
6088 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6089 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6090 "${src2}"#_.BroadcastStr#", $src1",
6091 "$src1, ${src2}"#_.BroadcastStr,
6092 (null_frag), (null_frag)>,
6093 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6094 Sched<[sched.Folded, sched.ReadAfterFold]>;
6097 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
6098 X86SchedWriteWidths sched,
6099 AVX512VLVectorVTInfo _> {
6100 let Predicates = [HasAVX512] in
6101 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
6102 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
6104 let Predicates = [HasAVX512, HasVLX] in {
6105 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
6106 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
6107 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
6108 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
6112 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
6113 X86SchedWriteWidths sched> {
6114 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
6116 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
6117 avx512vl_i64_info>, REX_W;
6120 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
6121 X86SchedWriteWidths sched> {
6122 let Predicates = [HasBWI] in {
6123 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
6124 v32i16_info>, EVEX_V512, REX_W;
6125 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
6126 v64i8_info>, EVEX_V512;
6129 let Predicates = [HasVLX, HasBWI] in {
6130 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
6131 v16i16x_info>, EVEX_V256, REX_W;
6132 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
6133 v8i16x_info>, EVEX_V128, REX_W;
6134 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
6135 v32i8x_info>, EVEX_V256;
6136 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
6137 v16i8x_info>, EVEX_V128;
6141 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
6142 X86SchedWriteWidths sched> :
6143 avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
6144 avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
6146 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
6147 SchedWriteVecLogic>, T8PD;
6148 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
6149 SchedWriteVecLogic>, T8XS;
6151 //===----------------------------------------------------------------------===//
6152 // AVX-512 Shift instructions
6153 //===----------------------------------------------------------------------===//
6155 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
6156 string OpcodeStr, SDNode OpNode,
6157 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6158 let ExeDomain = _.ExeDomain in {
6159 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
6160 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
6161 "$src2, $src1", "$src1, $src2",
6162 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
6164 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6165 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
6166 "$src2, $src1", "$src1, $src2",
6167 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
6169 Sched<[sched.Folded]>;
6173 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
6174 string OpcodeStr, SDNode OpNode,
6175 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6176 let ExeDomain = _.ExeDomain in
6177 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6178 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
6179 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
6180 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
6181 EVEX_B, Sched<[sched.Folded]>;
6184 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6185 X86FoldableSchedWrite sched, ValueType SrcVT,
6186 X86VectorVTInfo _> {
6187 // src2 is always 128-bit
6188 let ExeDomain = _.ExeDomain in {
6189 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6190 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
6191 "$src2, $src1", "$src1, $src2",
6192 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
6193 AVX512BIBase, EVEX_4V, Sched<[sched]>;
6194 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6195 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
6196 "$src2, $src1", "$src1, $src2",
6197 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
6199 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6203 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6204 X86SchedWriteWidths sched, ValueType SrcVT,
6205 AVX512VLVectorVTInfo VTInfo,
6207 let Predicates = [prd] in
6208 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
6209 VTInfo.info512>, EVEX_V512,
6210 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
6211 let Predicates = [prd, HasVLX] in {
6212 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
6213 VTInfo.info256>, EVEX_V256,
6214 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
6215 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
6216 VTInfo.info128>, EVEX_V128,
6217 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
6221 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
6222 string OpcodeStr, SDNode OpNode,
6223 X86SchedWriteWidths sched,
6224 bit NotEVEX2VEXConvertibleQ = 0> {
6225 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
6226 avx512vl_i32_info, HasAVX512>;
6227 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6228 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
6229 avx512vl_i64_info, HasAVX512>, REX_W;
6230 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
6231 avx512vl_i16_info, HasBWI>;
6234 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6235 string OpcodeStr, SDNode OpNode,
6236 X86SchedWriteWidths sched,
6237 AVX512VLVectorVTInfo VTInfo> {
6238 let Predicates = [HasAVX512] in
6239 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6240 sched.ZMM, VTInfo.info512>,
6241 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
6242 VTInfo.info512>, EVEX_V512;
6243 let Predicates = [HasAVX512, HasVLX] in {
6244 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6245 sched.YMM, VTInfo.info256>,
6246 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
6247 VTInfo.info256>, EVEX_V256;
6248 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6249 sched.XMM, VTInfo.info128>,
6250 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
6251 VTInfo.info128>, EVEX_V128;
6255 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
6256 string OpcodeStr, SDNode OpNode,
6257 X86SchedWriteWidths sched> {
6258 let Predicates = [HasBWI] in
6259 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6260 sched.ZMM, v32i16_info>, EVEX_V512, WIG;
6261 let Predicates = [HasVLX, HasBWI] in {
6262 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6263 sched.YMM, v16i16x_info>, EVEX_V256, WIG;
6264 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6265 sched.XMM, v8i16x_info>, EVEX_V128, WIG;
6269 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6270 Format ImmFormR, Format ImmFormM,
6271 string OpcodeStr, SDNode OpNode,
6272 X86SchedWriteWidths sched,
6273 bit NotEVEX2VEXConvertibleQ = 0> {
6274 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6275 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6276 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6277 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6278 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
6281 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6282 SchedWriteVecShiftImm>,
6283 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6284 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6286 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6287 SchedWriteVecShiftImm>,
6288 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6289 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6291 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6292 SchedWriteVecShiftImm, 1>,
6293 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6294 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6296 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6297 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6298 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6299 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6301 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6302 SchedWriteVecShift>;
6303 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6304 SchedWriteVecShift, 1>;
6305 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6306 SchedWriteVecShift>;
6308 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6309 let Predicates = [HasAVX512, NoVLX] in {
6310 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6311 (EXTRACT_SUBREG (v8i64
6313 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6314 VR128X:$src2)), sub_ymm)>;
6316 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6317 (EXTRACT_SUBREG (v8i64
6319 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6320 VR128X:$src2)), sub_xmm)>;
6322 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6323 (EXTRACT_SUBREG (v8i64
6325 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6326 timm:$src2)), sub_ymm)>;
6328 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6329 (EXTRACT_SUBREG (v8i64
6331 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6332 timm:$src2)), sub_xmm)>;
6335 //===-------------------------------------------------------------------===//
6336 // Variable Bit Shifts
6337 //===-------------------------------------------------------------------===//
6339 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6340 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6341 let ExeDomain = _.ExeDomain in {
6342 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6343 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6344 "$src2, $src1", "$src1, $src2",
6345 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6346 AVX5128IBase, EVEX_4V, Sched<[sched]>;
6347 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6348 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6349 "$src2, $src1", "$src1, $src2",
6350 (_.VT (OpNode _.RC:$src1,
6351 (_.VT (_.LdFrag addr:$src2))))>,
6352 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6353 Sched<[sched.Folded, sched.ReadAfterFold]>;
6357 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6358 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6359 let ExeDomain = _.ExeDomain in
6360 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6361 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6362 "${src2}"#_.BroadcastStr#", $src1",
6363 "$src1, ${src2}"#_.BroadcastStr,
6364 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6365 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6366 Sched<[sched.Folded, sched.ReadAfterFold]>;
6369 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6370 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6371 let Predicates = [HasAVX512] in
6372 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6373 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6375 let Predicates = [HasAVX512, HasVLX] in {
6376 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6377 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6378 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6379 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6383 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6384 SDNode OpNode, X86SchedWriteWidths sched> {
6385 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6387 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6388 avx512vl_i64_info>, REX_W;
6391 // Use 512bit version to implement 128/256 bit in case NoVLX.
6392 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6393 SDNode OpNode, list<Predicate> p> {
6394 let Predicates = p in {
6395 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6396 (_.info256.VT _.info256.RC:$src2))),
6398 (!cast<Instruction>(OpcodeStr#"Zrr")
6399 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6400 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6403 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6404 (_.info128.VT _.info128.RC:$src2))),
6406 (!cast<Instruction>(OpcodeStr#"Zrr")
6407 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6408 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6412 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6413 SDNode OpNode, X86SchedWriteWidths sched> {
6414 let Predicates = [HasBWI] in
6415 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6417 let Predicates = [HasVLX, HasBWI] in {
6419 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6421 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6426 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6427 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6429 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6430 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6432 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6433 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6435 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6436 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6438 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6439 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6440 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6441 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6444 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6445 let Predicates = [HasAVX512, NoVLX] in {
6446 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6447 (EXTRACT_SUBREG (v8i64
6449 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6450 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6452 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6453 (EXTRACT_SUBREG (v8i64
6455 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6456 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6459 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6460 (EXTRACT_SUBREG (v16i32
6462 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6463 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6465 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6466 (EXTRACT_SUBREG (v16i32
6468 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6469 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6472 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6473 (EXTRACT_SUBREG (v8i64
6475 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6476 timm:$src2)), sub_xmm)>;
6477 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6478 (EXTRACT_SUBREG (v8i64
6480 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6481 timm:$src2)), sub_ymm)>;
6483 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6484 (EXTRACT_SUBREG (v16i32
6486 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6487 timm:$src2)), sub_xmm)>;
6488 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6489 (EXTRACT_SUBREG (v16i32
6491 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6492 timm:$src2)), sub_ymm)>;
6495 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6496 let Predicates = [HasAVX512, NoVLX] in {
6497 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6498 (EXTRACT_SUBREG (v8i64
6500 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6501 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6503 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6504 (EXTRACT_SUBREG (v8i64
6506 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6507 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6510 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6511 (EXTRACT_SUBREG (v16i32
6513 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6514 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6516 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6517 (EXTRACT_SUBREG (v16i32
6519 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6520 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6523 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6524 (EXTRACT_SUBREG (v8i64
6526 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6527 timm:$src2)), sub_xmm)>;
6528 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6529 (EXTRACT_SUBREG (v8i64
6531 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6532 timm:$src2)), sub_ymm)>;
6534 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6535 (EXTRACT_SUBREG (v16i32
6537 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6538 timm:$src2)), sub_xmm)>;
6539 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6540 (EXTRACT_SUBREG (v16i32
6542 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6543 timm:$src2)), sub_ymm)>;
6546 //===-------------------------------------------------------------------===//
6547 // 1-src variable permutation VPERMW/D/Q
6548 //===-------------------------------------------------------------------===//
6550 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6551 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6552 let Predicates = [HasAVX512] in
6553 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6554 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6556 let Predicates = [HasAVX512, HasVLX] in
6557 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6558 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6561 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6562 string OpcodeStr, SDNode OpNode,
6563 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6564 let Predicates = [HasAVX512] in
6565 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6566 sched, VTInfo.info512>,
6567 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6568 sched, VTInfo.info512>, EVEX_V512;
6569 let Predicates = [HasAVX512, HasVLX] in
6570 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6571 sched, VTInfo.info256>,
6572 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6573 sched, VTInfo.info256>, EVEX_V256;
6576 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6577 Predicate prd, SDNode OpNode,
6578 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6579 let Predicates = [prd] in
6580 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6582 let Predicates = [HasVLX, prd] in {
6583 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6585 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6590 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6591 WriteVarShuffle256, avx512vl_i16_info>, REX_W;
6592 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6593 WriteVarShuffle256, avx512vl_i8_info>;
6595 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6596 WriteVarShuffle256, avx512vl_i32_info>;
6597 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6598 WriteVarShuffle256, avx512vl_i64_info>, REX_W;
6599 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6600 WriteFVarShuffle256, avx512vl_f32_info>;
6601 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6602 WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
6604 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6605 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6606 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6607 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6608 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6609 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6611 //===----------------------------------------------------------------------===//
6612 // AVX-512 - VPERMIL
6613 //===----------------------------------------------------------------------===//
6615 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6616 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6617 X86VectorVTInfo Ctrl> {
6618 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6619 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6620 "$src2, $src1", "$src1, $src2",
6621 (_.VT (OpNode _.RC:$src1,
6622 (Ctrl.VT Ctrl.RC:$src2)))>,
6623 T8PD, EVEX_4V, Sched<[sched]>;
6624 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6625 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6626 "$src2, $src1", "$src1, $src2",
6629 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6630 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6631 Sched<[sched.Folded, sched.ReadAfterFold]>;
6632 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6633 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6634 "${src2}"#_.BroadcastStr#", $src1",
6635 "$src1, ${src2}"#_.BroadcastStr,
6638 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6639 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6640 Sched<[sched.Folded, sched.ReadAfterFold]>;
6643 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6644 X86SchedWriteWidths sched,
6645 AVX512VLVectorVTInfo _,
6646 AVX512VLVectorVTInfo Ctrl> {
6647 let Predicates = [HasAVX512] in {
6648 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6649 _.info512, Ctrl.info512>, EVEX_V512;
6651 let Predicates = [HasAVX512, HasVLX] in {
6652 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6653 _.info128, Ctrl.info128>, EVEX_V128;
6654 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6655 _.info256, Ctrl.info256>, EVEX_V256;
6659 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6660 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6661 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6663 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6664 X86VPermilpi, SchedWriteFShuffle, _>,
6665 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6668 let ExeDomain = SSEPackedSingle in
6669 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6671 let ExeDomain = SSEPackedDouble in
6672 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6673 avx512vl_i64_info>, VEX_W1X;
6675 //===----------------------------------------------------------------------===//
6676 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6677 //===----------------------------------------------------------------------===//
6679 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6680 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6681 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6682 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6683 X86PShufhw, SchedWriteShuffle>,
6684 EVEX, AVX512XSIi8Base;
6685 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6686 X86PShuflw, SchedWriteShuffle>,
6687 EVEX, AVX512XDIi8Base;
6689 //===----------------------------------------------------------------------===//
6690 // AVX-512 - VPSHUFB
6691 //===----------------------------------------------------------------------===//
6693 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6694 X86SchedWriteWidths sched> {
6695 let Predicates = [HasBWI] in
6696 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6699 let Predicates = [HasVLX, HasBWI] in {
6700 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6702 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6707 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6708 SchedWriteVarShuffle>, WIG;
6710 //===----------------------------------------------------------------------===//
6711 // Move Low to High and High to Low packed FP Instructions
6712 //===----------------------------------------------------------------------===//
6714 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6715 (ins VR128X:$src1, VR128X:$src2),
6716 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6717 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6718 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6719 let isCommutable = 1 in
6720 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6721 (ins VR128X:$src1, VR128X:$src2),
6722 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6723 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6724 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6726 //===----------------------------------------------------------------------===//
6727 // VMOVHPS/PD VMOVLPS Instructions
6728 // All patterns was taken from SSS implementation.
6729 //===----------------------------------------------------------------------===//
6731 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6732 SDPatternOperator OpNode,
6733 X86VectorVTInfo _> {
6734 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6735 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6736 (ins _.RC:$src1, f64mem:$src2),
6737 !strconcat(OpcodeStr,
6738 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6742 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6743 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6746 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6747 // SSE1. And MOVLPS pattern is even more complex.
6748 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6749 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6750 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6751 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W;
6752 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6753 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6754 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6755 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W;
6757 let Predicates = [HasAVX512] in {
6759 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6760 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6763 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6764 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6767 let SchedRW = [WriteFStore] in {
6768 let mayStore = 1, hasSideEffects = 0 in
6769 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6770 (ins f64mem:$dst, VR128X:$src),
6771 "vmovhps\t{$src, $dst|$dst, $src}",
6772 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6773 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6774 (ins f64mem:$dst, VR128X:$src),
6775 "vmovhpd\t{$src, $dst|$dst, $src}",
6776 [(store (f64 (extractelt
6777 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6778 (iPTR 0))), addr:$dst)]>,
6779 EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6780 let mayStore = 1, hasSideEffects = 0 in
6781 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6782 (ins f64mem:$dst, VR128X:$src),
6783 "vmovlps\t{$src, $dst|$dst, $src}",
6784 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6785 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6786 (ins f64mem:$dst, VR128X:$src),
6787 "vmovlpd\t{$src, $dst|$dst, $src}",
6788 [(store (f64 (extractelt (v2f64 VR128X:$src),
6789 (iPTR 0))), addr:$dst)]>,
6790 EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6793 let Predicates = [HasAVX512] in {
6795 def : Pat<(store (f64 (extractelt
6796 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6797 (iPTR 0))), addr:$dst),
6798 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6800 //===----------------------------------------------------------------------===//
6801 // FMA - Fused Multiply Operations
6804 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6805 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6806 X86VectorVTInfo _> {
6807 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6808 Uses = [MXCSR], mayRaiseFPException = 1 in {
6809 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6810 (ins _.RC:$src2, _.RC:$src3),
6811 OpcodeStr, "$src3, $src2", "$src2, $src3",
6812 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6813 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6814 EVEX_4V, Sched<[sched]>;
6816 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6817 (ins _.RC:$src2, _.MemOp:$src3),
6818 OpcodeStr, "$src3, $src2", "$src2, $src3",
6819 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6820 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6821 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
6822 sched.ReadAfterFold]>;
6824 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6825 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6826 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6827 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6829 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6830 (MaskOpNode _.RC:$src2,
6831 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6832 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6833 sched.ReadAfterFold]>;
6837 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6838 X86FoldableSchedWrite sched,
6839 X86VectorVTInfo _> {
6840 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6842 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6843 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6844 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6845 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6846 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6847 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6850 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6851 SDNode MaskOpNode, SDNode OpNodeRnd,
6852 X86SchedWriteWidths sched,
6853 AVX512VLVectorVTInfo _,
6854 Predicate prd = HasAVX512> {
6855 let Predicates = [prd] in {
6856 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6857 sched.ZMM, _.info512>,
6858 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6860 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6862 let Predicates = [HasVLX, prd] in {
6863 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6864 sched.YMM, _.info256>,
6865 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6866 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6867 sched.XMM, _.info128>,
6868 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6872 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6873 SDNode MaskOpNode, SDNode OpNodeRnd> {
6874 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6875 OpNodeRnd, SchedWriteFMA,
6876 avx512vl_f16_info, HasFP16>, T_MAP6PD;
6877 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6878 OpNodeRnd, SchedWriteFMA,
6879 avx512vl_f32_info>, T8PD;
6880 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6881 OpNodeRnd, SchedWriteFMA,
6882 avx512vl_f64_info>, T8PD, REX_W;
6885 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6887 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6888 X86Fmsub, X86FmsubRnd>;
6889 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6890 X86Fmaddsub, X86FmaddsubRnd>;
6891 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6892 X86Fmsubadd, X86FmsubaddRnd>;
6893 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6894 X86Fnmadd, X86FnmaddRnd>;
6895 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6896 X86Fnmsub, X86FnmsubRnd>;
6899 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6900 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6901 X86VectorVTInfo _> {
6902 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6903 Uses = [MXCSR], mayRaiseFPException = 1 in {
6904 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6905 (ins _.RC:$src2, _.RC:$src3),
6906 OpcodeStr, "$src3, $src2", "$src2, $src3",
6908 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6909 EVEX_4V, Sched<[sched]>;
6911 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6912 (ins _.RC:$src2, _.MemOp:$src3),
6913 OpcodeStr, "$src3, $src2", "$src2, $src3",
6914 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6915 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6916 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
6917 sched.ReadAfterFold]>;
6919 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6920 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6921 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6922 "$src2, ${src3}"#_.BroadcastStr,
6923 (_.VT (OpNode _.RC:$src2,
6924 (_.VT (_.BroadcastLdFrag addr:$src3)),
6926 (_.VT (MaskOpNode _.RC:$src2,
6927 (_.VT (_.BroadcastLdFrag addr:$src3)),
6928 _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
6929 Sched<[sched.Folded, sched.ReadAfterFold,
6930 sched.ReadAfterFold]>;
6934 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6935 X86FoldableSchedWrite sched,
6936 X86VectorVTInfo _> {
6937 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6939 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6940 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6941 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6943 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6944 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6947 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6948 SDNode MaskOpNode, SDNode OpNodeRnd,
6949 X86SchedWriteWidths sched,
6950 AVX512VLVectorVTInfo _,
6951 Predicate prd = HasAVX512> {
6952 let Predicates = [prd] in {
6953 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6954 sched.ZMM, _.info512>,
6955 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6957 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6959 let Predicates = [HasVLX, prd] in {
6960 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6961 sched.YMM, _.info256>,
6962 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6963 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6964 sched.XMM, _.info128>,
6965 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6969 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6970 SDNode MaskOpNode, SDNode OpNodeRnd > {
6971 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6972 OpNodeRnd, SchedWriteFMA,
6973 avx512vl_f16_info, HasFP16>, T_MAP6PD;
6974 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6975 OpNodeRnd, SchedWriteFMA,
6976 avx512vl_f32_info>, T8PD;
6977 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6978 OpNodeRnd, SchedWriteFMA,
6979 avx512vl_f64_info>, T8PD, REX_W;
6982 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6984 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6985 X86Fmsub, X86FmsubRnd>;
6986 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6987 X86Fmaddsub, X86FmaddsubRnd>;
6988 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6989 X86Fmsubadd, X86FmsubaddRnd>;
6990 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6991 X86Fnmadd, X86FnmaddRnd>;
6992 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6993 X86Fnmsub, X86FnmsubRnd>;
6995 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6996 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6997 X86VectorVTInfo _> {
6998 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6999 Uses = [MXCSR], mayRaiseFPException = 1 in {
7000 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
7001 (ins _.RC:$src2, _.RC:$src3),
7002 OpcodeStr, "$src3, $src2", "$src2, $src3",
7004 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
7005 EVEX_4V, Sched<[sched]>;
7007 // Pattern is 312 order so that the load is in a different place from the
7008 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7009 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
7010 (ins _.RC:$src2, _.MemOp:$src3),
7011 OpcodeStr, "$src3, $src2", "$src2, $src3",
7012 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
7013 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
7014 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
7015 sched.ReadAfterFold]>;
7017 // Pattern is 312 order so that the load is in a different place from the
7018 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7019 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
7020 (ins _.RC:$src2, _.ScalarMemOp:$src3),
7021 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
7022 "$src2, ${src3}"#_.BroadcastStr,
7023 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
7024 _.RC:$src1, _.RC:$src2)),
7025 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
7026 _.RC:$src1, _.RC:$src2)), 1, 0>,
7027 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7028 sched.ReadAfterFold]>;
7032 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
7033 X86FoldableSchedWrite sched,
7034 X86VectorVTInfo _> {
7035 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
7037 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
7038 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7039 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
7041 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
7042 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
7045 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7046 SDNode MaskOpNode, SDNode OpNodeRnd,
7047 X86SchedWriteWidths sched,
7048 AVX512VLVectorVTInfo _,
7049 Predicate prd = HasAVX512> {
7050 let Predicates = [prd] in {
7051 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7052 sched.ZMM, _.info512>,
7053 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
7055 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7057 let Predicates = [HasVLX, prd] in {
7058 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7059 sched.YMM, _.info256>,
7060 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7061 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7062 sched.XMM, _.info128>,
7063 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7067 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7068 SDNode MaskOpNode, SDNode OpNodeRnd > {
7069 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
7070 OpNodeRnd, SchedWriteFMA,
7071 avx512vl_f16_info, HasFP16>, T_MAP6PD;
7072 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
7073 OpNodeRnd, SchedWriteFMA,
7074 avx512vl_f32_info>, T8PD;
7075 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
7076 OpNodeRnd, SchedWriteFMA,
7077 avx512vl_f64_info>, T8PD, REX_W;
7080 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
7082 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
7083 X86Fmsub, X86FmsubRnd>;
7084 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
7085 X86Fmaddsub, X86FmaddsubRnd>;
7086 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
7087 X86Fmsubadd, X86FmsubaddRnd>;
7088 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
7089 X86Fnmadd, X86FnmaddRnd>;
7090 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
7091 X86Fnmsub, X86FnmsubRnd>;
7094 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7095 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
7096 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
7097 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7098 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
7099 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7100 EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
7103 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7104 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
7105 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7106 EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
7107 SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7109 let Uses = [MXCSR] in
7110 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7111 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7112 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
7113 EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
7115 let isCodeGenOnly = 1, isCommutable = 1 in {
7116 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7117 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
7118 !strconcat(OpcodeStr,
7119 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7120 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
7121 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
7122 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
7123 !strconcat(OpcodeStr,
7124 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7125 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
7126 SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
7128 let Uses = [MXCSR] in
7129 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7130 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
7131 !strconcat(OpcodeStr,
7132 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
7133 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
7134 Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
7135 }// isCodeGenOnly = 1
7136 }// Constraints = "$src1 = $dst"
7139 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7140 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
7141 X86VectorVTInfo _, string SUFF> {
7142 let ExeDomain = _.ExeDomain in {
7143 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
7144 // Operands for intrinsic are in 123 order to preserve passthu
7146 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7148 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7149 (_.ScalarLdFrag addr:$src3)))),
7150 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
7151 _.FRC:$src3, (i32 timm:$rc)))), 0>;
7153 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
7154 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
7156 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
7157 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
7158 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
7159 _.FRC:$src1, (i32 timm:$rc)))), 1>;
7161 // One pattern is 312 order so that the load is in a different place from the
7162 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7163 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
7164 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
7166 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
7167 _.FRC:$src1, _.FRC:$src2))),
7168 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
7169 _.FRC:$src2, (i32 timm:$rc)))), 1>;
7173 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7174 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
7175 let Predicates = [HasAVX512] in {
7176 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7177 OpNodeRnd, f32x_info, "SS">,
7178 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
7179 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7180 OpNodeRnd, f64x_info, "SD">,
7181 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD;
7183 let Predicates = [HasFP16] in {
7184 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7185 OpNodeRnd, f16x_info, "SH">,
7186 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
7190 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
7191 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
7192 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
7193 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
7195 multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
7196 SDNode RndOp, string Prefix,
7197 string Suffix, SDNode Move,
7198 X86VectorVTInfo _, PatLeaf ZeroFP,
7199 Predicate prd = HasAVX512> {
7200 let Predicates = [prd] in {
7201 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7203 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7205 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
7206 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7207 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7209 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7210 (Op _.FRC:$src2, _.FRC:$src3,
7211 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7212 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
7213 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7214 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7216 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7218 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7219 (_.ScalarLdFrag addr:$src3)))))),
7220 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
7221 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7224 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7225 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7226 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
7227 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
7228 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7231 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7232 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7233 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7234 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
7235 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7238 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7239 (X86selects_mask VK1WM:$mask,
7240 (MaskedOp _.FRC:$src2,
7241 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7243 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7244 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
7245 VR128X:$src1, VK1WM:$mask,
7246 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7247 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7249 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7250 (X86selects_mask VK1WM:$mask,
7251 (MaskedOp _.FRC:$src2,
7252 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7253 (_.ScalarLdFrag addr:$src3)),
7254 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7255 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
7256 VR128X:$src1, VK1WM:$mask,
7257 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7259 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7260 (X86selects_mask VK1WM:$mask,
7261 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7262 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7263 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7264 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7265 VR128X:$src1, VK1WM:$mask,
7266 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7268 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7269 (X86selects_mask VK1WM:$mask,
7270 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7271 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7272 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7273 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7274 VR128X:$src1, VK1WM:$mask,
7275 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7276 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7278 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7279 (X86selects_mask VK1WM:$mask,
7280 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7281 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7282 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7283 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7284 VR128X:$src1, VK1WM:$mask,
7285 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7287 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7288 (X86selects_mask VK1WM:$mask,
7289 (MaskedOp _.FRC:$src2,
7290 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7292 (_.EltVT ZeroFP)))))),
7293 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7294 VR128X:$src1, VK1WM:$mask,
7295 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7296 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7298 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7299 (X86selects_mask VK1WM:$mask,
7300 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7301 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7302 (_.EltVT ZeroFP)))))),
7303 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7304 VR128X:$src1, VK1WM:$mask,
7305 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7306 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7308 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7309 (X86selects_mask VK1WM:$mask,
7310 (MaskedOp _.FRC:$src2,
7311 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7312 (_.ScalarLdFrag addr:$src3)),
7313 (_.EltVT ZeroFP)))))),
7314 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7315 VR128X:$src1, VK1WM:$mask,
7316 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7318 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7319 (X86selects_mask VK1WM:$mask,
7320 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7321 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7322 (_.EltVT ZeroFP)))))),
7323 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7324 VR128X:$src1, VK1WM:$mask,
7325 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7327 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7328 (X86selects_mask VK1WM:$mask,
7329 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7330 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7331 (_.EltVT ZeroFP)))))),
7332 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7333 VR128X:$src1, VK1WM:$mask,
7334 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7336 // Patterns with rounding mode.
7337 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7339 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7340 _.FRC:$src3, (i32 timm:$rc)))))),
7341 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7342 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7343 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7345 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7346 (RndOp _.FRC:$src2, _.FRC:$src3,
7347 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7348 (i32 timm:$rc)))))),
7349 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7350 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7351 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7353 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7354 (X86selects_mask VK1WM:$mask,
7356 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7357 _.FRC:$src3, (i32 timm:$rc)),
7358 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7359 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7360 VR128X:$src1, VK1WM:$mask,
7361 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7362 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7364 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7365 (X86selects_mask VK1WM:$mask,
7366 (RndOp _.FRC:$src2, _.FRC:$src3,
7367 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7369 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7370 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7371 VR128X:$src1, VK1WM:$mask,
7372 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7373 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7375 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7376 (X86selects_mask VK1WM:$mask,
7378 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7379 _.FRC:$src3, (i32 timm:$rc)),
7380 (_.EltVT ZeroFP)))))),
7381 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7382 VR128X:$src1, VK1WM:$mask,
7383 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7384 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7386 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7387 (X86selects_mask VK1WM:$mask,
7388 (RndOp _.FRC:$src2, _.FRC:$src3,
7389 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7391 (_.EltVT ZeroFP)))))),
7392 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7393 VR128X:$src1, VK1WM:$mask,
7394 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7395 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7398 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7399 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7400 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7401 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7402 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7403 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7404 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7405 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7407 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7408 "SS", X86Movss, v4f32x_info, fp32imm0>;
7409 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7410 "SS", X86Movss, v4f32x_info, fp32imm0>;
7411 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7412 "SS", X86Movss, v4f32x_info, fp32imm0>;
7413 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7414 "SS", X86Movss, v4f32x_info, fp32imm0>;
7416 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7417 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7418 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7419 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7420 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7421 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7422 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7423 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7425 //===----------------------------------------------------------------------===//
7426 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7427 //===----------------------------------------------------------------------===//
7428 let Constraints = "$src1 = $dst" in {
7429 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7430 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7431 // NOTE: The SDNode have the multiply operands first with the add last.
7432 // This enables commuted load patterns to be autogenerated by tablegen.
7433 let ExeDomain = _.ExeDomain in {
7434 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7435 (ins _.RC:$src2, _.RC:$src3),
7436 OpcodeStr, "$src3, $src2", "$src2, $src3",
7437 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7438 T8PD, EVEX_4V, Sched<[sched]>;
7440 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7441 (ins _.RC:$src2, _.MemOp:$src3),
7442 OpcodeStr, "$src3, $src2", "$src2, $src3",
7443 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7444 T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
7445 sched.ReadAfterFold]>;
7447 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7448 (ins _.RC:$src2, _.ScalarMemOp:$src3),
7449 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
7450 !strconcat("$src2, ${src3}", _.BroadcastStr ),
7452 (_.VT (_.BroadcastLdFrag addr:$src3)),
7454 T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7455 sched.ReadAfterFold]>;
7458 } // Constraints = "$src1 = $dst"
7460 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7461 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7462 let Predicates = [HasIFMA] in {
7463 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7464 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7466 let Predicates = [HasVLX, HasIFMA] in {
7467 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7468 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7469 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7470 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7474 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7475 SchedWriteVecIMul, avx512vl_i64_info>,
7477 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7478 SchedWriteVecIMul, avx512vl_i64_info>,
7481 //===----------------------------------------------------------------------===//
7482 // AVX-512 Scalar convert from sign integer to float/double
7483 //===----------------------------------------------------------------------===//
7485 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7486 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7487 X86MemOperand x86memop, PatFrag ld_frag, string asm,
7488 string mem, list<Register> _Uses = [MXCSR],
7489 bit _mayRaiseFPException = 1> {
7490 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7491 mayRaiseFPException = _mayRaiseFPException in {
7492 let hasSideEffects = 0, isCodeGenOnly = 1 in {
7493 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7494 (ins DstVT.FRC:$src1, SrcRC:$src),
7495 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7496 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7498 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7499 (ins DstVT.FRC:$src1, x86memop:$src),
7500 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7501 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7502 } // hasSideEffects = 0
7503 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7504 (ins DstVT.RC:$src1, SrcRC:$src2),
7505 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7506 [(set DstVT.RC:$dst,
7507 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7508 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7510 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7511 (ins DstVT.RC:$src1, x86memop:$src2),
7512 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7513 [(set DstVT.RC:$dst,
7514 (OpNode (DstVT.VT DstVT.RC:$src1),
7515 (ld_frag addr:$src2)))]>,
7516 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7518 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7519 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7520 DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7523 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7524 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7525 X86VectorVTInfo DstVT, string asm,
7527 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7528 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7529 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7531 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7532 [(set DstVT.RC:$dst,
7533 (OpNode (DstVT.VT DstVT.RC:$src1),
7536 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7537 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7538 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7539 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7542 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7543 X86FoldableSchedWrite sched,
7544 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7545 X86MemOperand x86memop, PatFrag ld_frag,
7546 string asm, string mem> {
7547 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7548 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7549 ld_frag, asm, mem>, VEX_LIG;
7552 let Predicates = [HasAVX512] in {
7553 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7555 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7556 XS, EVEX_CD8<32, CD8VT1>;
7557 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7559 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7560 XS, REX_W, EVEX_CD8<64, CD8VT1>;
7561 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7562 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7563 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7564 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7566 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7567 XD, REX_W, EVEX_CD8<64, CD8VT1>;
7569 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7570 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7571 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7572 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7574 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7575 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7576 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7577 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7578 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7579 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7580 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7581 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7583 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7584 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7585 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7586 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7587 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7588 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7589 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7590 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7592 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7594 v4f32x_info, i32mem, loadi32,
7595 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7596 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7598 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7599 XS, REX_W, EVEX_CD8<64, CD8VT1>;
7600 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7601 i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7602 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7603 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7605 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7606 XD, REX_W, EVEX_CD8<64, CD8VT1>;
7608 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7609 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7610 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7611 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7613 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7614 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7615 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7616 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7617 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7618 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7619 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7620 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7622 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7623 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7624 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7625 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7626 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7627 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7628 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7629 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7632 //===----------------------------------------------------------------------===//
7633 // AVX-512 Scalar convert from float/double to integer
7634 //===----------------------------------------------------------------------===//
7636 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7637 X86VectorVTInfo DstVT, SDNode OpNode,
7639 X86FoldableSchedWrite sched, string asm,
7640 string aliasStr, Predicate prd = HasAVX512> {
7641 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7642 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7643 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7644 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7645 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7646 let Uses = [MXCSR] in
7647 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7648 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7649 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7650 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7652 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7653 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7654 [(set DstVT.RC:$dst, (OpNode
7655 (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7656 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7657 } // Predicates = [prd]
7659 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7660 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7661 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7662 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7663 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7664 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7665 SrcVT.IntScalarMemOp:$src), 0, "att">;
7668 // Convert float/double to signed/unsigned int 32/64
7669 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7670 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7671 XS, EVEX_CD8<32, CD8VT1>;
7672 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7673 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7674 XS, REX_W, EVEX_CD8<32, CD8VT1>;
7675 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7676 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7677 XS, EVEX_CD8<32, CD8VT1>;
7678 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7679 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7680 XS, REX_W, EVEX_CD8<32, CD8VT1>;
7681 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7682 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7683 XD, EVEX_CD8<64, CD8VT1>;
7684 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7685 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7686 XD, REX_W, EVEX_CD8<64, CD8VT1>;
7687 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7688 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7689 XD, EVEX_CD8<64, CD8VT1>;
7690 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7691 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7692 XD, REX_W, EVEX_CD8<64, CD8VT1>;
7694 multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7695 X86VectorVTInfo DstVT, SDNode OpNode,
7696 X86FoldableSchedWrite sched> {
7697 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7698 let isCodeGenOnly = 1 in {
7699 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7700 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7701 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7702 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7703 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7704 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7705 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7706 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7708 } // Predicates = [HasAVX512]
7711 defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7712 lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
7713 defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7714 llrint, WriteCvtSS2I>, REX_W, XS, EVEX_CD8<32, CD8VT1>;
7715 defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7716 lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
7717 defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7718 llrint, WriteCvtSD2I>, REX_W, XD, EVEX_CD8<64, CD8VT1>;
7720 let Predicates = [HasAVX512] in {
7721 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7722 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7724 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7725 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7728 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7729 // which produce unnecessary vmovs{s,d} instructions
7730 let Predicates = [HasAVX512] in {
7731 def : Pat<(v4f32 (X86Movss
7732 (v4f32 VR128X:$dst),
7733 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7734 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7736 def : Pat<(v4f32 (X86Movss
7737 (v4f32 VR128X:$dst),
7738 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7739 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7741 def : Pat<(v4f32 (X86Movss
7742 (v4f32 VR128X:$dst),
7743 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7744 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7746 def : Pat<(v4f32 (X86Movss
7747 (v4f32 VR128X:$dst),
7748 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7749 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7751 def : Pat<(v2f64 (X86Movsd
7752 (v2f64 VR128X:$dst),
7753 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7754 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7756 def : Pat<(v2f64 (X86Movsd
7757 (v2f64 VR128X:$dst),
7758 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7759 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7761 def : Pat<(v2f64 (X86Movsd
7762 (v2f64 VR128X:$dst),
7763 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7764 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7766 def : Pat<(v2f64 (X86Movsd
7767 (v2f64 VR128X:$dst),
7768 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7769 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7771 def : Pat<(v4f32 (X86Movss
7772 (v4f32 VR128X:$dst),
7773 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7774 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7776 def : Pat<(v4f32 (X86Movss
7777 (v4f32 VR128X:$dst),
7778 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7779 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7781 def : Pat<(v4f32 (X86Movss
7782 (v4f32 VR128X:$dst),
7783 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7784 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7786 def : Pat<(v4f32 (X86Movss
7787 (v4f32 VR128X:$dst),
7788 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7789 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7791 def : Pat<(v2f64 (X86Movsd
7792 (v2f64 VR128X:$dst),
7793 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7794 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7796 def : Pat<(v2f64 (X86Movsd
7797 (v2f64 VR128X:$dst),
7798 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7799 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7801 def : Pat<(v2f64 (X86Movsd
7802 (v2f64 VR128X:$dst),
7803 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7804 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7806 def : Pat<(v2f64 (X86Movsd
7807 (v2f64 VR128X:$dst),
7808 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7809 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7810 } // Predicates = [HasAVX512]
7812 // Convert float/double to signed/unsigned int 32/64 with truncation
7813 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7814 X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7815 SDNode OpNodeInt, SDNode OpNodeSAE,
7816 X86FoldableSchedWrite sched, string aliasStr,
7817 Predicate prd = HasAVX512> {
7818 let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7819 let isCodeGenOnly = 1 in {
7820 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7821 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7822 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7823 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7824 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7825 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7826 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7827 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7830 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7831 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7832 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7833 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7834 let Uses = [MXCSR] in
7835 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7836 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7837 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7838 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7839 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7840 (ins _SrcRC.IntScalarMemOp:$src),
7841 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7842 [(set _DstRC.RC:$dst,
7843 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7844 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7845 } // Predicates = [prd]
7847 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7848 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7849 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7850 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7851 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7852 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7853 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7856 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7857 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7858 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7859 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7860 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7861 "{q}">, REX_W, XS, EVEX_CD8<32, CD8VT1>;
7862 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7863 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7864 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7865 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7866 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7867 "{q}">, REX_W, XD, EVEX_CD8<64, CD8VT1>;
7869 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7870 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7871 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7872 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7873 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7874 "{q}">, XS,REX_W, EVEX_CD8<32, CD8VT1>;
7875 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7876 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7877 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7878 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7879 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7880 "{q}">, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7882 //===----------------------------------------------------------------------===//
7883 // AVX-512 Convert form float to double and back
7884 //===----------------------------------------------------------------------===//
7886 let Uses = [MXCSR], mayRaiseFPException = 1 in
7887 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7888 X86VectorVTInfo _Src, SDNode OpNode,
7889 X86FoldableSchedWrite sched> {
7890 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7891 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7892 "$src2, $src1", "$src1, $src2",
7893 (_.VT (OpNode (_.VT _.RC:$src1),
7894 (_Src.VT _Src.RC:$src2)))>,
7895 EVEX_4V, VEX_LIG, Sched<[sched]>;
7896 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7897 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7898 "$src2, $src1", "$src1, $src2",
7899 (_.VT (OpNode (_.VT _.RC:$src1),
7900 (_Src.ScalarIntMemFrags addr:$src2)))>,
7902 Sched<[sched.Folded, sched.ReadAfterFold]>;
7904 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7905 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7906 (ins _.FRC:$src1, _Src.FRC:$src2),
7907 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7908 EVEX_4V, VEX_LIG, Sched<[sched]>;
7910 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7911 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7912 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7913 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7917 // Scalar Conversion with SAE - suppress all exceptions
7918 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7919 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7920 X86FoldableSchedWrite sched> {
7921 let Uses = [MXCSR] in
7922 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7923 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7924 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7925 (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7926 (_Src.VT _Src.RC:$src2)))>,
7927 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7930 // Scalar Conversion with rounding control (RC)
7931 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7932 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7933 X86FoldableSchedWrite sched> {
7934 let Uses = [MXCSR] in
7935 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7936 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7937 "$rc, $src2, $src1", "$src1, $src2, $rc",
7938 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7939 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7940 EVEX_4V, VEX_LIG, Sched<[sched]>,
7943 multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7944 SDNode OpNode, SDNode OpNodeRnd,
7945 X86FoldableSchedWrite sched,
7946 X86VectorVTInfo _src, X86VectorVTInfo _dst,
7947 Predicate prd = HasAVX512> {
7948 let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7949 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7950 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7951 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7955 multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7956 SDNode OpNode, SDNode OpNodeSAE,
7957 X86FoldableSchedWrite sched,
7958 X86VectorVTInfo _src, X86VectorVTInfo _dst,
7959 Predicate prd = HasAVX512> {
7960 let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7961 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7962 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7963 EVEX_CD8<_src.EltSize, CD8VT1>;
7966 defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7967 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7968 f32x_info>, XD, REX_W;
7969 defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7970 X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7972 defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7973 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7974 f16x_info, HasFP16>, T_MAP5XD, REX_W;
7975 defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7976 X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7977 f64x_info, HasFP16>, T_MAP5XS;
7978 defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7979 X86froundsRnd, WriteCvtSD2SS, f32x_info,
7980 f16x_info, HasFP16>, T_MAP5PS;
7981 defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7982 X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7983 f32x_info, HasFP16>, T_MAP6PS;
7985 def : Pat<(f64 (any_fpextend FR32X:$src)),
7986 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7987 Requires<[HasAVX512]>;
7988 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7989 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7990 Requires<[HasAVX512, OptForSize]>;
7992 def : Pat<(f32 (any_fpround FR64X:$src)),
7993 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7994 Requires<[HasAVX512]>;
7996 def : Pat<(f32 (any_fpextend FR16X:$src)),
7997 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7998 Requires<[HasFP16]>;
7999 def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
8000 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
8001 Requires<[HasFP16, OptForSize]>;
8003 def : Pat<(f64 (any_fpextend FR16X:$src)),
8004 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
8005 Requires<[HasFP16]>;
8006 def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
8007 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
8008 Requires<[HasFP16, OptForSize]>;
8010 def : Pat<(f16 (any_fpround FR32X:$src)),
8011 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
8012 Requires<[HasFP16]>;
8013 def : Pat<(f16 (any_fpround FR64X:$src)),
8014 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
8015 Requires<[HasFP16]>;
8017 def : Pat<(v4f32 (X86Movss
8018 (v4f32 VR128X:$dst),
8019 (v4f32 (scalar_to_vector
8020 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
8021 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
8022 Requires<[HasAVX512]>;
8024 def : Pat<(v2f64 (X86Movsd
8025 (v2f64 VR128X:$dst),
8026 (v2f64 (scalar_to_vector
8027 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
8028 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
8029 Requires<[HasAVX512]>;
8031 //===----------------------------------------------------------------------===//
8032 // AVX-512 Vector convert from signed/unsigned integer to float/double
8033 // and from float/double to signed/unsigned integer
8034 //===----------------------------------------------------------------------===//
8036 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8037 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
8038 X86FoldableSchedWrite sched,
8039 string Broadcast = _.BroadcastStr,
8040 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8041 RegisterClass MaskRC = _.KRCWM,
8042 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
8043 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
8044 let Uses = [MXCSR], mayRaiseFPException = 1 in {
8045 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
8047 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
8048 (ins MaskRC:$mask, _Src.RC:$src),
8049 OpcodeStr, "$src", "$src",
8050 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
8051 (vselect_mask MaskRC:$mask,
8052 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
8054 (vselect_mask MaskRC:$mask,
8055 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
8057 EVEX, Sched<[sched]>;
8059 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8061 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
8062 (ins MaskRC:$mask, MemOp:$src),
8063 OpcodeStr#Alias, "$src", "$src",
8065 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
8066 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
8067 EVEX, Sched<[sched.Folded]>;
8069 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8070 (ins _Src.ScalarMemOp:$src),
8071 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
8072 (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
8074 "${src}"#Broadcast, "${src}"#Broadcast,
8075 (_.VT (OpNode (_Src.VT
8076 (_Src.BroadcastLdFrag addr:$src))
8078 (vselect_mask MaskRC:$mask,
8082 (_Src.BroadcastLdFrag addr:$src)))),
8084 (vselect_mask MaskRC:$mask,
8088 (_Src.BroadcastLdFrag addr:$src)))),
8090 EVEX, EVEX_B, Sched<[sched.Folded]>;
8093 // Conversion with SAE - suppress all exceptions
8094 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8095 X86VectorVTInfo _Src, SDNode OpNodeSAE,
8096 X86FoldableSchedWrite sched> {
8097 let Uses = [MXCSR] in
8098 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8099 (ins _Src.RC:$src), OpcodeStr,
8100 "{sae}, $src", "$src, {sae}",
8101 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
8102 EVEX, EVEX_B, Sched<[sched]>;
8105 // Conversion with rounding control (RC)
8106 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8107 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
8108 X86FoldableSchedWrite sched> {
8109 let Uses = [MXCSR] in
8110 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8111 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
8112 "$rc, $src", "$src, $rc",
8113 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
8114 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8117 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
8118 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8119 X86VectorVTInfo _Src, SDPatternOperator OpNode,
8121 X86FoldableSchedWrite sched,
8122 string Broadcast = _.BroadcastStr,
8123 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8124 RegisterClass MaskRC = _.KRCWM>
8125 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
8126 Alias, MemOp, MaskRC,
8127 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
8128 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
8130 // Extend [Float to Double, Half to Float]
8131 multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
8132 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8133 X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
8134 let Predicates = [prd] in {
8135 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256,
8136 any_fpextend, fpextend, sched.ZMM>,
8137 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
8138 X86vfpextSAE, sched.ZMM>, EVEX_V512;
8140 let Predicates = [prd, HasVLX] in {
8141 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
8142 X86any_vfpext, X86vfpext, sched.XMM,
8143 _dst.info128.BroadcastStr,
8144 "", f64mem>, EVEX_V128;
8145 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
8146 any_fpextend, fpextend, sched.YMM>, EVEX_V256;
8150 // Truncate [Double to Float, Float to Half]
8151 multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
8152 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8153 X86SchedWriteWidths sched, Predicate prd = HasAVX512,
8154 PatFrag bcast128 = _src.info128.BroadcastLdFrag,
8155 PatFrag loadVT128 = _src.info128.LdFrag,
8156 RegisterClass maskRC128 = _src.info128.KRCWM> {
8157 let Predicates = [prd] in {
8158 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
8159 X86any_vfpround, X86vfpround, sched.ZMM>,
8160 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8161 X86vfproundRnd, sched.ZMM>, EVEX_V512;
8163 let Predicates = [prd, HasVLX] in {
8164 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
8165 null_frag, null_frag, sched.XMM,
8166 _src.info128.BroadcastStr, "{x}",
8167 f128mem, maskRC128>, EVEX_V128;
8168 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
8169 X86any_vfpround, X86vfpround,
8170 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
8172 // Special patterns to allow use of X86vmfpround for masking. Instruction
8173 // patterns have been disabled with null_frag.
8174 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
8175 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8176 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8178 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
8179 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8181 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
8183 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
8184 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8185 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
8187 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8188 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
8190 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
8192 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
8193 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8194 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8195 (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
8196 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8197 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8198 _dst.info128.ImmAllZerosV, maskRC128:$mask),
8199 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
8202 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8203 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8204 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8205 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8206 VK2WM:$mask, VR128X:$src), 0, "att">;
8207 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
8208 "$dst {${mask}} {z}, $src}",
8209 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8210 VK2WM:$mask, VR128X:$src), 0, "att">;
8211 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8212 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8213 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8214 "$dst {${mask}}, ${src}{1to2}}",
8215 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8216 VK2WM:$mask, f64mem:$src), 0, "att">;
8217 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8218 "$dst {${mask}} {z}, ${src}{1to2}}",
8219 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8220 VK2WM:$mask, f64mem:$src), 0, "att">;
8222 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8223 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8224 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8225 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8226 VK4WM:$mask, VR256X:$src), 0, "att">;
8227 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8228 "$dst {${mask}} {z}, $src}",
8229 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8230 VK4WM:$mask, VR256X:$src), 0, "att">;
8231 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8232 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8233 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8234 "$dst {${mask}}, ${src}{1to4}}",
8235 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8236 VK4WM:$mask, f64mem:$src), 0, "att">;
8237 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8238 "$dst {${mask}} {z}, ${src}{1to4}}",
8239 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8240 VK4WM:$mask, f64mem:$src), 0, "att">;
8243 defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
8244 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
8245 REX_W, PD, EVEX_CD8<64, CD8VF>;
8246 defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
8247 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
8248 PS, EVEX_CD8<32, CD8VH>;
8250 // Extend Half to Double
8251 multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
8252 X86SchedWriteWidths sched> {
8253 let Predicates = [HasFP16] in {
8254 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
8255 any_fpextend, fpextend, sched.ZMM>,
8256 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
8257 X86vfpextSAE, sched.ZMM>, EVEX_V512;
8258 def : Pat<(v8f64 (extloadv8f16 addr:$src)),
8259 (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
8261 let Predicates = [HasFP16, HasVLX] in {
8262 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8263 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8265 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8266 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8271 // Truncate Double to Half
8272 multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8273 let Predicates = [HasFP16] in {
8274 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8275 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8276 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8277 X86vfproundRnd, sched.ZMM>, EVEX_V512;
8279 let Predicates = [HasFP16, HasVLX] in {
8280 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8281 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8283 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8284 null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8287 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8288 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8289 VR128X:$src), 0, "att">;
8290 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8291 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8292 VK2WM:$mask, VR128X:$src), 0, "att">;
8293 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8294 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8295 VK2WM:$mask, VR128X:$src), 0, "att">;
8296 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8297 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8298 i64mem:$src), 0, "att">;
8299 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8300 "$dst {${mask}}, ${src}{1to2}}",
8301 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8302 VK2WM:$mask, i64mem:$src), 0, "att">;
8303 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8304 "$dst {${mask}} {z}, ${src}{1to2}}",
8305 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8306 VK2WM:$mask, i64mem:$src), 0, "att">;
8308 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8309 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8310 VR256X:$src), 0, "att">;
8311 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8312 "$dst {${mask}}, $src}",
8313 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8314 VK4WM:$mask, VR256X:$src), 0, "att">;
8315 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8316 "$dst {${mask}} {z}, $src}",
8317 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8318 VK4WM:$mask, VR256X:$src), 0, "att">;
8319 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8320 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8321 i64mem:$src), 0, "att">;
8322 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8323 "$dst {${mask}}, ${src}{1to4}}",
8324 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8325 VK4WM:$mask, i64mem:$src), 0, "att">;
8326 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8327 "$dst {${mask}} {z}, ${src}{1to4}}",
8328 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8329 VK4WM:$mask, i64mem:$src), 0, "att">;
8331 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8332 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8333 VR512:$src), 0, "att">;
8334 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8335 "$dst {${mask}}, $src}",
8336 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8337 VK8WM:$mask, VR512:$src), 0, "att">;
8338 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8339 "$dst {${mask}} {z}, $src}",
8340 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8341 VK8WM:$mask, VR512:$src), 0, "att">;
8342 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8343 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8344 i64mem:$src), 0, "att">;
8345 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8346 "$dst {${mask}}, ${src}{1to8}}",
8347 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8348 VK8WM:$mask, i64mem:$src), 0, "att">;
8349 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8350 "$dst {${mask}} {z}, ${src}{1to8}}",
8351 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8352 VK8WM:$mask, i64mem:$src), 0, "att">;
8355 defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8356 avx512vl_f32_info, SchedWriteCvtPD2PS,
8357 HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
8358 defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8359 avx512vl_f16_info, SchedWriteCvtPS2PD,
8360 HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
8361 defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8362 REX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
8363 defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8364 T_MAP5PS, EVEX_CD8<16, CD8VQ>;
8366 let Predicates = [HasFP16, HasVLX] in {
8367 // Special patterns to allow use of X86vmfpround for masking. Instruction
8368 // patterns have been disabled with null_frag.
8369 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8370 (VCVTPD2PHZ256rr VR256X:$src)>;
8371 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8373 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8374 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8376 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8378 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8379 (VCVTPD2PHZ256rm addr:$src)>;
8380 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8382 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8383 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8385 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8387 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8388 (VCVTPD2PHZ256rmb addr:$src)>;
8389 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8390 (v8f16 VR128X:$src0), VK4WM:$mask),
8391 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8392 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8393 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8394 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8396 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8397 (VCVTPD2PHZ128rr VR128X:$src)>;
8398 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8400 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8401 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8403 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8405 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8406 (VCVTPD2PHZ128rm addr:$src)>;
8407 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8409 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8410 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8412 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8414 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8415 (VCVTPD2PHZ128rmb addr:$src)>;
8416 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8417 (v8f16 VR128X:$src0), VK2WM:$mask),
8418 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8419 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8420 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8421 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8424 // Convert Signed/Unsigned Doubleword to Double
8425 let Uses = []<Register>, mayRaiseFPException = 0 in
8426 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8427 SDNode MaskOpNode, SDPatternOperator OpNode128,
8428 SDNode MaskOpNode128,
8429 X86SchedWriteWidths sched> {
8430 // No rounding in this op
8431 let Predicates = [HasAVX512] in
8432 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8433 MaskOpNode, sched.ZMM>, EVEX_V512;
8435 let Predicates = [HasVLX] in {
8436 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8437 OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8439 (v2f64 (OpNode128 (bc_v4i32
8441 (scalar_to_vector (loadi64 addr:$src)))))),
8442 (v2f64 (MaskOpNode128 (bc_v4i32
8444 (scalar_to_vector (loadi64 addr:$src))))))>,
8446 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8447 MaskOpNode, sched.YMM>, EVEX_V256;
8451 // Convert Signed/Unsigned Doubleword to Float
8452 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8453 SDNode MaskOpNode, SDNode OpNodeRnd,
8454 X86SchedWriteWidths sched> {
8455 let Predicates = [HasAVX512] in
8456 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8457 MaskOpNode, sched.ZMM>,
8458 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8459 OpNodeRnd, sched.ZMM>, EVEX_V512;
8461 let Predicates = [HasVLX] in {
8462 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8463 MaskOpNode, sched.XMM>, EVEX_V128;
8464 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8465 MaskOpNode, sched.YMM>, EVEX_V256;
8469 // Convert Float to Signed/Unsigned Doubleword with truncation
8470 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8472 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8473 let Predicates = [HasAVX512] in {
8474 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8475 MaskOpNode, sched.ZMM>,
8476 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8477 OpNodeSAE, sched.ZMM>, EVEX_V512;
8479 let Predicates = [HasVLX] in {
8480 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8481 MaskOpNode, sched.XMM>, EVEX_V128;
8482 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8483 MaskOpNode, sched.YMM>, EVEX_V256;
8487 // Convert Float to Signed/Unsigned Doubleword
8488 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8489 SDNode MaskOpNode, SDNode OpNodeRnd,
8490 X86SchedWriteWidths sched> {
8491 let Predicates = [HasAVX512] in {
8492 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8493 MaskOpNode, sched.ZMM>,
8494 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8495 OpNodeRnd, sched.ZMM>, EVEX_V512;
8497 let Predicates = [HasVLX] in {
8498 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8499 MaskOpNode, sched.XMM>, EVEX_V128;
8500 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8501 MaskOpNode, sched.YMM>, EVEX_V256;
8505 // Convert Double to Signed/Unsigned Doubleword with truncation
8506 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8507 SDNode MaskOpNode, SDNode OpNodeSAE,
8508 X86SchedWriteWidths sched> {
8509 let Predicates = [HasAVX512] in {
8510 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8511 MaskOpNode, sched.ZMM>,
8512 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8513 OpNodeSAE, sched.ZMM>, EVEX_V512;
8515 let Predicates = [HasVLX] in {
8516 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8517 // memory forms of these instructions in Asm Parser. They have the same
8518 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8519 // due to the same reason.
8520 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8521 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8523 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8524 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8527 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8528 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8529 VR128X:$src), 0, "att">;
8530 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8531 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8532 VK2WM:$mask, VR128X:$src), 0, "att">;
8533 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8534 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8535 VK2WM:$mask, VR128X:$src), 0, "att">;
8536 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8537 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8538 f64mem:$src), 0, "att">;
8539 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8540 "$dst {${mask}}, ${src}{1to2}}",
8541 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8542 VK2WM:$mask, f64mem:$src), 0, "att">;
8543 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8544 "$dst {${mask}} {z}, ${src}{1to2}}",
8545 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8546 VK2WM:$mask, f64mem:$src), 0, "att">;
8548 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8549 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8550 VR256X:$src), 0, "att">;
8551 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8552 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8553 VK4WM:$mask, VR256X:$src), 0, "att">;
8554 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8555 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8556 VK4WM:$mask, VR256X:$src), 0, "att">;
8557 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8558 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8559 f64mem:$src), 0, "att">;
8560 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8561 "$dst {${mask}}, ${src}{1to4}}",
8562 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8563 VK4WM:$mask, f64mem:$src), 0, "att">;
8564 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8565 "$dst {${mask}} {z}, ${src}{1to4}}",
8566 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8567 VK4WM:$mask, f64mem:$src), 0, "att">;
8570 // Convert Double to Signed/Unsigned Doubleword
8571 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8572 SDNode MaskOpNode, SDNode OpNodeRnd,
8573 X86SchedWriteWidths sched> {
8574 let Predicates = [HasAVX512] in {
8575 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8576 MaskOpNode, sched.ZMM>,
8577 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8578 OpNodeRnd, sched.ZMM>, EVEX_V512;
8580 let Predicates = [HasVLX] in {
8581 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8582 // memory forms of these instructions in Asm Parcer. They have the same
8583 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8584 // due to the same reason.
8585 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8586 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8588 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8589 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8592 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8593 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8594 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8595 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8596 VK2WM:$mask, VR128X:$src), 0, "att">;
8597 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8598 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8599 VK2WM:$mask, VR128X:$src), 0, "att">;
8600 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8601 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8602 f64mem:$src), 0, "att">;
8603 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8604 "$dst {${mask}}, ${src}{1to2}}",
8605 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8606 VK2WM:$mask, f64mem:$src), 0, "att">;
8607 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8608 "$dst {${mask}} {z}, ${src}{1to2}}",
8609 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8610 VK2WM:$mask, f64mem:$src), 0, "att">;
8612 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8613 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8614 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8615 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8616 VK4WM:$mask, VR256X:$src), 0, "att">;
8617 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8618 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8619 VK4WM:$mask, VR256X:$src), 0, "att">;
8620 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8621 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8622 f64mem:$src), 0, "att">;
8623 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8624 "$dst {${mask}}, ${src}{1to4}}",
8625 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8626 VK4WM:$mask, f64mem:$src), 0, "att">;
8627 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8628 "$dst {${mask}} {z}, ${src}{1to4}}",
8629 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8630 VK4WM:$mask, f64mem:$src), 0, "att">;
8633 // Convert Double to Signed/Unsigned Quardword
8634 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8635 SDNode MaskOpNode, SDNode OpNodeRnd,
8636 X86SchedWriteWidths sched> {
8637 let Predicates = [HasDQI] in {
8638 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8639 MaskOpNode, sched.ZMM>,
8640 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8641 OpNodeRnd, sched.ZMM>, EVEX_V512;
8643 let Predicates = [HasDQI, HasVLX] in {
8644 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8645 MaskOpNode, sched.XMM>, EVEX_V128;
8646 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8647 MaskOpNode, sched.YMM>, EVEX_V256;
8651 // Convert Double to Signed/Unsigned Quardword with truncation
8652 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8653 SDNode MaskOpNode, SDNode OpNodeRnd,
8654 X86SchedWriteWidths sched> {
8655 let Predicates = [HasDQI] in {
8656 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8657 MaskOpNode, sched.ZMM>,
8658 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8659 OpNodeRnd, sched.ZMM>, EVEX_V512;
8661 let Predicates = [HasDQI, HasVLX] in {
8662 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8663 MaskOpNode, sched.XMM>, EVEX_V128;
8664 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8665 MaskOpNode, sched.YMM>, EVEX_V256;
8669 // Convert Signed/Unsigned Quardword to Double
8670 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8671 SDNode MaskOpNode, SDNode OpNodeRnd,
8672 X86SchedWriteWidths sched> {
8673 let Predicates = [HasDQI] in {
8674 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8675 MaskOpNode, sched.ZMM>,
8676 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8677 OpNodeRnd, sched.ZMM>, EVEX_V512;
8679 let Predicates = [HasDQI, HasVLX] in {
8680 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8681 MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8682 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8683 MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8687 // Convert Float to Signed/Unsigned Quardword
8688 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8689 SDNode MaskOpNode, SDNode OpNodeRnd,
8690 X86SchedWriteWidths sched> {
8691 let Predicates = [HasDQI] in {
8692 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8693 MaskOpNode, sched.ZMM>,
8694 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8695 OpNodeRnd, sched.ZMM>, EVEX_V512;
8697 let Predicates = [HasDQI, HasVLX] in {
8698 // Explicitly specified broadcast string, since we take only 2 elements
8699 // from v4f32x_info source
8700 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8701 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8702 (v2i64 (OpNode (bc_v4f32
8704 (scalar_to_vector (loadf64 addr:$src)))))),
8705 (v2i64 (MaskOpNode (bc_v4f32
8707 (scalar_to_vector (loadf64 addr:$src))))))>,
8709 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8710 MaskOpNode, sched.YMM>, EVEX_V256;
8714 // Convert Float to Signed/Unsigned Quardword with truncation
8715 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8716 SDNode MaskOpNode, SDNode OpNodeRnd,
8717 X86SchedWriteWidths sched> {
8718 let Predicates = [HasDQI] in {
8719 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8720 MaskOpNode, sched.ZMM>,
8721 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8722 OpNodeRnd, sched.ZMM>, EVEX_V512;
8724 let Predicates = [HasDQI, HasVLX] in {
8725 // Explicitly specified broadcast string, since we take only 2 elements
8726 // from v4f32x_info source
8727 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8728 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8729 (v2i64 (OpNode (bc_v4f32
8731 (scalar_to_vector (loadf64 addr:$src)))))),
8732 (v2i64 (MaskOpNode (bc_v4f32
8734 (scalar_to_vector (loadf64 addr:$src))))))>,
8736 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8737 MaskOpNode, sched.YMM>, EVEX_V256;
8741 // Convert Signed/Unsigned Quardword to Float
8742 // Also Convert Signed/Unsigned Doubleword to Half
8743 multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8744 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8745 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8746 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8747 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8748 let Predicates = [prd] in {
8749 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8750 MaskOpNode, sched.ZMM>,
8751 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8752 OpNodeRnd, sched.ZMM>, EVEX_V512;
8754 let Predicates = [prd, HasVLX] in {
8755 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8756 // memory forms of these instructions in Asm Parcer. They have the same
8757 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8758 // due to the same reason.
8759 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8760 null_frag, sched.XMM, _src.info128.BroadcastStr,
8761 "{x}", i128mem, _src.info128.KRCWM>,
8762 EVEX_V128, NotEVEX2VEXConvertible;
8763 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8764 MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8766 NotEVEX2VEXConvertible;
8768 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8769 // patterns have been disabled with null_frag.
8770 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8771 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8772 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8773 _src.info128.KRCWM:$mask),
8774 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8775 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8776 _src.info128.KRCWM:$mask),
8777 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8779 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8780 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8781 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8782 _src.info128.KRCWM:$mask),
8783 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8784 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8785 _src.info128.KRCWM:$mask),
8786 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8788 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8789 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8790 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8791 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8792 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8793 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8794 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8795 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8798 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8799 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8800 VR128X:$src), 0, "att">;
8801 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8802 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8803 VK2WM:$mask, VR128X:$src), 0, "att">;
8804 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8805 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8806 VK2WM:$mask, VR128X:$src), 0, "att">;
8807 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8808 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8809 i64mem:$src), 0, "att">;
8810 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8811 "$dst {${mask}}, ${src}{1to2}}",
8812 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8813 VK2WM:$mask, i64mem:$src), 0, "att">;
8814 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8815 "$dst {${mask}} {z}, ${src}{1to2}}",
8816 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8817 VK2WM:$mask, i64mem:$src), 0, "att">;
8819 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8820 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8821 VR256X:$src), 0, "att">;
8822 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8823 "$dst {${mask}}, $src}",
8824 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8825 VK4WM:$mask, VR256X:$src), 0, "att">;
8826 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8827 "$dst {${mask}} {z}, $src}",
8828 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8829 VK4WM:$mask, VR256X:$src), 0, "att">;
8830 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8831 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8832 i64mem:$src), 0, "att">;
8833 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8834 "$dst {${mask}}, ${src}{1to4}}",
8835 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8836 VK4WM:$mask, i64mem:$src), 0, "att">;
8837 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8838 "$dst {${mask}} {z}, ${src}{1to4}}",
8839 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8840 VK4WM:$mask, i64mem:$src), 0, "att">;
8843 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8844 X86any_VSintToFP, X86VSintToFP,
8845 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8847 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8848 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8849 PS, EVEX_CD8<32, CD8VF>;
8851 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8852 X86cvttp2si, X86cvttp2siSAE,
8853 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8855 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8856 X86cvttp2si, X86cvttp2siSAE,
8857 SchedWriteCvtPD2DQ>,
8858 PD, REX_W, EVEX_CD8<64, CD8VF>;
8860 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8861 X86cvttp2ui, X86cvttp2uiSAE,
8862 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8864 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8865 X86cvttp2ui, X86cvttp2uiSAE,
8866 SchedWriteCvtPD2DQ>,
8867 PS, REX_W, EVEX_CD8<64, CD8VF>;
8869 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8870 uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8871 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8873 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8874 uint_to_fp, X86VUintToFpRnd,
8875 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8877 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8878 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8879 EVEX_CD8<32, CD8VF>;
8881 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8882 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8883 REX_W, EVEX_CD8<64, CD8VF>;
8885 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8886 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8887 PS, EVEX_CD8<32, CD8VF>;
8889 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8890 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8891 PS, EVEX_CD8<64, CD8VF>;
8893 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8894 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
8895 PD, EVEX_CD8<64, CD8VF>;
8897 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8898 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8899 EVEX_CD8<32, CD8VH>;
8901 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8902 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8903 PD, EVEX_CD8<64, CD8VF>;
8905 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8906 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8907 EVEX_CD8<32, CD8VH>;
8909 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8910 X86cvttp2si, X86cvttp2siSAE,
8911 SchedWriteCvtPD2DQ>, REX_W,
8912 PD, EVEX_CD8<64, CD8VF>;
8914 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8915 X86cvttp2si, X86cvttp2siSAE,
8916 SchedWriteCvtPS2DQ>, PD,
8917 EVEX_CD8<32, CD8VH>;
8919 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8920 X86cvttp2ui, X86cvttp2uiSAE,
8921 SchedWriteCvtPD2DQ>, REX_W,
8922 PD, EVEX_CD8<64, CD8VF>;
8924 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8925 X86cvttp2ui, X86cvttp2uiSAE,
8926 SchedWriteCvtPS2DQ>, PD,
8927 EVEX_CD8<32, CD8VH>;
8929 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8930 sint_to_fp, X86VSintToFpRnd,
8931 SchedWriteCvtDQ2PD>, REX_W, XS, EVEX_CD8<64, CD8VF>;
8933 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8934 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8935 REX_W, XS, EVEX_CD8<64, CD8VF>;
8937 defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8938 X86any_VSintToFP, X86VMSintToFP,
8939 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8940 SchedWriteCvtDQ2PS, HasFP16>,
8941 T_MAP5PS, EVEX_CD8<32, CD8VF>;
8943 defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8944 X86any_VUintToFP, X86VMUintToFP,
8945 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8946 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
8947 EVEX_CD8<32, CD8VF>;
8949 defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8950 X86any_VSintToFP, X86VMSintToFP,
8951 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8952 SchedWriteCvtDQ2PS>, REX_W, PS,
8953 EVEX_CD8<64, CD8VF>;
8955 defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8956 X86any_VUintToFP, X86VMUintToFP,
8957 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8958 SchedWriteCvtDQ2PS>, REX_W, XD,
8959 EVEX_CD8<64, CD8VF>;
8961 let Predicates = [HasVLX] in {
8962 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8963 // patterns have been disabled with null_frag.
8964 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8965 (VCVTPD2DQZ128rr VR128X:$src)>;
8966 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8968 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8969 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8971 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8973 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8974 (VCVTPD2DQZ128rm addr:$src)>;
8975 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8977 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8978 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8980 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8982 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8983 (VCVTPD2DQZ128rmb addr:$src)>;
8984 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8985 (v4i32 VR128X:$src0), VK2WM:$mask),
8986 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8987 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8988 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8989 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8991 // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8992 // patterns have been disabled with null_frag.
8993 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8994 (VCVTTPD2DQZ128rr VR128X:$src)>;
8995 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8997 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8998 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
9000 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9002 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
9003 (VCVTTPD2DQZ128rm addr:$src)>;
9004 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9006 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9007 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9009 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
9011 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
9012 (VCVTTPD2DQZ128rmb addr:$src)>;
9013 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
9014 (v4i32 VR128X:$src0), VK2WM:$mask),
9015 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9016 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
9017 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9018 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
9020 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
9021 // patterns have been disabled with null_frag.
9022 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
9023 (VCVTPD2UDQZ128rr VR128X:$src)>;
9024 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
9026 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
9027 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
9029 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9031 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
9032 (VCVTPD2UDQZ128rm addr:$src)>;
9033 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9035 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9036 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9038 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
9040 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
9041 (VCVTPD2UDQZ128rmb addr:$src)>;
9042 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
9043 (v4i32 VR128X:$src0), VK2WM:$mask),
9044 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9045 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
9046 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9047 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9049 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
9050 // patterns have been disabled with null_frag.
9051 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
9052 (VCVTTPD2UDQZ128rr VR128X:$src)>;
9053 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
9055 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
9056 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
9058 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9060 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
9061 (VCVTTPD2UDQZ128rm addr:$src)>;
9062 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9064 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9065 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9067 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
9069 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
9070 (VCVTTPD2UDQZ128rmb addr:$src)>;
9071 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9072 (v4i32 VR128X:$src0), VK2WM:$mask),
9073 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9074 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9075 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9076 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9079 let Predicates = [HasDQI, HasVLX] in {
9080 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9081 (VCVTPS2QQZ128rm addr:$src)>;
9082 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9083 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9085 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9086 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9087 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9088 v2i64x_info.ImmAllZerosV)),
9089 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9091 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9092 (VCVTPS2UQQZ128rm addr:$src)>;
9093 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9094 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9096 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9097 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9098 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9099 v2i64x_info.ImmAllZerosV)),
9100 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9102 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9103 (VCVTTPS2QQZ128rm addr:$src)>;
9104 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9105 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9107 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9108 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9109 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9110 v2i64x_info.ImmAllZerosV)),
9111 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9113 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9114 (VCVTTPS2UQQZ128rm addr:$src)>;
9115 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9116 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9118 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9119 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9120 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9121 v2i64x_info.ImmAllZerosV)),
9122 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9125 let Predicates = [HasVLX] in {
9126 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9127 (VCVTDQ2PDZ128rm addr:$src)>;
9128 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9129 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9131 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9132 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9133 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9134 v2f64x_info.ImmAllZerosV)),
9135 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9137 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9138 (VCVTUDQ2PDZ128rm addr:$src)>;
9139 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9140 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9142 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9143 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9144 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9145 v2f64x_info.ImmAllZerosV)),
9146 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9149 //===----------------------------------------------------------------------===//
9150 // Half precision conversion instructions
9151 //===----------------------------------------------------------------------===//
9153 let Uses = [MXCSR], mayRaiseFPException = 1 in
9154 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9155 X86MemOperand x86memop, dag ld_dag,
9156 X86FoldableSchedWrite sched> {
9157 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
9158 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
9159 (X86any_cvtph2ps (_src.VT _src.RC:$src)),
9160 (X86cvtph2ps (_src.VT _src.RC:$src))>,
9161 T8PD, Sched<[sched]>;
9162 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
9163 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
9164 (X86any_cvtph2ps (_src.VT ld_dag)),
9165 (X86cvtph2ps (_src.VT ld_dag))>,
9166 T8PD, Sched<[sched.Folded]>;
9169 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9170 X86FoldableSchedWrite sched> {
9171 let Uses = [MXCSR] in
9172 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
9173 (ins _src.RC:$src), "vcvtph2ps",
9174 "{sae}, $src", "$src, {sae}",
9175 (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
9176 T8PD, EVEX_B, Sched<[sched]>;
9179 let Predicates = [HasAVX512] in
9180 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
9181 (load addr:$src), WriteCvtPH2PSZ>,
9182 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
9183 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9185 let Predicates = [HasVLX] in {
9186 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
9187 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
9188 EVEX_CD8<32, CD8VH>;
9189 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
9190 (bitconvert (v2i64 (X86vzload64 addr:$src))),
9191 WriteCvtPH2PS>, EVEX, EVEX_V128,
9192 EVEX_CD8<32, CD8VH>;
9194 // Pattern match vcvtph2ps of a scalar i64 load.
9195 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
9196 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
9197 (VCVTPH2PSZ128rm addr:$src)>;
9200 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9201 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
9202 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9203 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9204 (ins _src.RC:$src1, i32u8imm:$src2),
9205 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9206 [(set _dest.RC:$dst,
9207 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9209 let Constraints = "$src0 = $dst" in
9210 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9211 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9212 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
9213 [(set _dest.RC:$dst,
9214 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9215 _dest.RC:$src0, _src.KRCWM:$mask))]>,
9216 Sched<[RR]>, EVEX_K;
9217 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9218 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9219 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
9220 [(set _dest.RC:$dst,
9221 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9222 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9223 Sched<[RR]>, EVEX_KZ;
9224 let hasSideEffects = 0, mayStore = 1 in {
9225 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
9226 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
9227 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9229 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9230 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9231 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9232 EVEX_K, Sched<[MR]>;
9237 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9239 let hasSideEffects = 0, Uses = [MXCSR] in {
9240 def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9241 (ins _src.RC:$src1, i32u8imm:$src2),
9242 "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
9243 [(set _dest.RC:$dst,
9244 (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9245 EVEX_B, Sched<[Sched]>;
9246 let Constraints = "$src0 = $dst" in
9247 def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9248 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9249 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
9250 [(set _dest.RC:$dst,
9251 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9252 _dest.RC:$src0, _src.KRCWM:$mask))]>,
9253 EVEX_B, Sched<[Sched]>, EVEX_K;
9254 def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9255 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9256 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
9257 [(set _dest.RC:$dst,
9258 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9259 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9260 EVEX_B, Sched<[Sched]>, EVEX_KZ;
9264 let Predicates = [HasAVX512] in {
9265 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9266 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9267 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9268 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9270 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9271 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9274 let Predicates = [HasVLX] in {
9275 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9276 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9277 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9278 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9279 WriteCvtPS2PH, WriteCvtPS2PHSt>,
9280 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9282 def : Pat<(store (f64 (extractelt
9283 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9284 (iPTR 0))), addr:$dst),
9285 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9286 def : Pat<(store (i64 (extractelt
9287 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9288 (iPTR 0))), addr:$dst),
9289 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9290 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9291 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9294 // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9295 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9296 string OpcodeStr, Domain d,
9297 X86FoldableSchedWrite sched = WriteFComX> {
9298 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9299 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9300 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9301 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9304 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9305 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9306 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9307 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9308 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9309 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9310 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9311 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9312 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9315 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9316 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9317 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9318 EVEX_CD8<32, CD8VT1>;
9319 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9320 "ucomisd", SSEPackedDouble>, PD, EVEX,
9321 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9322 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9323 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9324 EVEX_CD8<32, CD8VT1>;
9325 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9326 "comisd", SSEPackedDouble>, PD, EVEX,
9327 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9328 let isCodeGenOnly = 1 in {
9329 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9330 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9331 EVEX_CD8<32, CD8VT1>;
9332 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9333 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
9334 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9336 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9337 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9338 EVEX_CD8<32, CD8VT1>;
9339 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9340 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
9341 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9345 let Defs = [EFLAGS], Predicates = [HasFP16] in {
9346 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9347 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9348 EVEX_CD8<16, CD8VT1>;
9349 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9350 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9351 EVEX_CD8<16, CD8VT1>;
9352 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9353 "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
9354 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9355 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9356 "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
9357 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9358 let isCodeGenOnly = 1 in {
9359 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9360 sse_load_f16, "ucomish", SSEPackedSingle>,
9361 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9363 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9364 sse_load_f16, "comish", SSEPackedSingle>,
9365 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9369 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9370 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9371 X86FoldableSchedWrite sched, X86VectorVTInfo _,
9372 Predicate prd = HasAVX512> {
9373 let Predicates = [prd], ExeDomain = _.ExeDomain in {
9374 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9375 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9376 "$src2, $src1", "$src1, $src2",
9377 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9378 EVEX_4V, VEX_LIG, Sched<[sched]>;
9379 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9380 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9381 "$src2, $src1", "$src1, $src2",
9382 (OpNode (_.VT _.RC:$src1),
9383 (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
9384 Sched<[sched.Folded, sched.ReadAfterFold]>;
9388 defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9389 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9391 defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9392 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9393 EVEX_CD8<16, CD8VT1>, T_MAP6PD;
9394 let Uses = [MXCSR] in {
9395 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9396 f32x_info>, EVEX_CD8<32, CD8VT1>,
9398 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9399 f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
9401 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9402 SchedWriteFRsqrt.Scl, f32x_info>,
9403 EVEX_CD8<32, CD8VT1>, T8PD;
9404 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9405 SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
9406 EVEX_CD8<64, CD8VT1>, T8PD;
9409 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9410 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9411 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9412 let ExeDomain = _.ExeDomain in {
9413 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9414 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9415 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
9417 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9418 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9420 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
9421 Sched<[sched.Folded, sched.ReadAfterFold]>;
9422 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9423 (ins _.ScalarMemOp:$src), OpcodeStr,
9424 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9426 (_.BroadcastLdFrag addr:$src)))>,
9427 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9431 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9432 X86SchedWriteWidths sched> {
9433 let Uses = [MXCSR] in {
9434 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9435 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9436 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9437 v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9439 let Predicates = [HasFP16] in
9440 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9441 v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9443 // Define only if AVX512VL feature is present.
9444 let Predicates = [HasVLX], Uses = [MXCSR] in {
9445 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9446 OpNode, sched.XMM, v4f32x_info>,
9447 EVEX_V128, EVEX_CD8<32, CD8VF>;
9448 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9449 OpNode, sched.YMM, v8f32x_info>,
9450 EVEX_V256, EVEX_CD8<32, CD8VF>;
9451 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9452 OpNode, sched.XMM, v2f64x_info>,
9453 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
9454 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9455 OpNode, sched.YMM, v4f64x_info>,
9456 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
9458 let Predicates = [HasFP16, HasVLX] in {
9459 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9460 OpNode, sched.XMM, v8f16x_info>,
9461 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9462 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9463 OpNode, sched.YMM, v16f16x_info>,
9464 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9468 defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9469 defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9471 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9472 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9473 SDNode OpNode, SDNode OpNodeSAE,
9474 X86FoldableSchedWrite sched> {
9475 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9476 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9477 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9478 "$src2, $src1", "$src1, $src2",
9479 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9480 Sched<[sched]>, SIMD_EXC;
9482 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9483 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9484 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9485 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9486 EVEX_B, Sched<[sched]>;
9488 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9489 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9490 "$src2, $src1", "$src1, $src2",
9491 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9492 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9496 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9497 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9498 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9499 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
9500 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9501 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD, EVEX_4V;
9504 multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9505 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9506 let Predicates = [HasFP16] in
9507 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>,
9508 EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
9511 let Predicates = [HasERI] in {
9512 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9513 SchedWriteFRcp.Scl>;
9514 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9515 SchedWriteFRsqrt.Scl>;
9518 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9519 SchedWriteFRnd.Scl>,
9520 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9521 SchedWriteFRnd.Scl>;
9522 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9524 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9525 SDNode OpNode, X86FoldableSchedWrite sched> {
9526 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9527 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9528 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9529 (OpNode (_.VT _.RC:$src))>,
9532 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9533 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9535 (bitconvert (_.LdFrag addr:$src))))>,
9536 Sched<[sched.Folded, sched.ReadAfterFold]>;
9538 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9539 (ins _.ScalarMemOp:$src), OpcodeStr,
9540 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9542 (_.BroadcastLdFrag addr:$src)))>,
9543 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9546 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9547 SDNode OpNode, X86FoldableSchedWrite sched> {
9548 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9549 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9550 (ins _.RC:$src), OpcodeStr,
9551 "{sae}, $src", "$src, {sae}",
9552 (OpNode (_.VT _.RC:$src))>,
9553 EVEX_B, Sched<[sched]>;
9556 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9557 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9558 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9559 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9560 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9561 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9562 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9563 T8PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9566 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9567 SDNode OpNode, X86SchedWriteWidths sched> {
9568 // Define only if AVX512VL feature is present.
9569 let Predicates = [HasVLX] in {
9570 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9572 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
9573 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9575 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
9576 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9578 EVEX_V128, REX_W, T8PD, EVEX_CD8<64, CD8VF>;
9579 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9581 EVEX_V256, REX_W, T8PD, EVEX_CD8<64, CD8VF>;
9585 multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9586 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9587 let Predicates = [HasFP16] in
9588 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9589 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9590 T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9591 let Predicates = [HasFP16, HasVLX] in {
9592 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9593 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9594 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9595 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9598 let Predicates = [HasERI] in {
9599 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9600 SchedWriteFRsqrt>, EVEX;
9601 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9602 SchedWriteFRcp>, EVEX;
9603 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9604 SchedWriteFAdd>, EVEX;
9606 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9608 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9610 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9611 SchedWriteFRnd>, EVEX;
9613 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9614 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9615 let ExeDomain = _.ExeDomain in
9616 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9617 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9618 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9619 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9622 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9623 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9624 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9625 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9626 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9627 (_.VT (any_fsqrt _.RC:$src)),
9628 (_.VT (fsqrt _.RC:$src))>, EVEX,
9630 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9631 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9632 (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9633 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9634 Sched<[sched.Folded, sched.ReadAfterFold]>;
9635 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9636 (ins _.ScalarMemOp:$src), OpcodeStr,
9637 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9638 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9639 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9640 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9644 let Uses = [MXCSR], mayRaiseFPException = 1 in
9645 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9646 X86SchedWriteSizes sched> {
9647 let Predicates = [HasFP16] in
9648 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9649 sched.PH.ZMM, v32f16_info>,
9650 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9651 let Predicates = [HasFP16, HasVLX] in {
9652 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9653 sched.PH.XMM, v8f16x_info>,
9654 EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9655 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9656 sched.PH.YMM, v16f16x_info>,
9657 EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9659 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9660 sched.PS.ZMM, v16f32_info>,
9661 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9662 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9663 sched.PD.ZMM, v8f64_info>,
9664 EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>;
9665 // Define only if AVX512VL feature is present.
9666 let Predicates = [HasVLX] in {
9667 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9668 sched.PS.XMM, v4f32x_info>,
9669 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
9670 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9671 sched.PS.YMM, v8f32x_info>,
9672 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
9673 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9674 sched.PD.XMM, v2f64x_info>,
9675 EVEX_V128, REX_W, PD, EVEX_CD8<64, CD8VF>;
9676 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9677 sched.PD.YMM, v4f64x_info>,
9678 EVEX_V256, REX_W, PD, EVEX_CD8<64, CD8VF>;
9682 let Uses = [MXCSR] in
9683 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9684 X86SchedWriteSizes sched> {
9685 let Predicates = [HasFP16] in
9686 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9687 sched.PH.ZMM, v32f16_info>,
9688 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9689 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9690 sched.PS.ZMM, v16f32_info>,
9691 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9692 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9693 sched.PD.ZMM, v8f64_info>,
9694 EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>;
9697 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9698 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9699 let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9700 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9701 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9702 "$src2, $src1", "$src1, $src2",
9703 (X86fsqrts (_.VT _.RC:$src1),
9704 (_.VT _.RC:$src2))>,
9705 Sched<[sched]>, SIMD_EXC;
9706 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9707 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9708 "$src2, $src1", "$src1, $src2",
9709 (X86fsqrts (_.VT _.RC:$src1),
9710 (_.ScalarIntMemFrags addr:$src2))>,
9711 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9712 let Uses = [MXCSR] in
9713 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9714 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9715 "$rc, $src2, $src1", "$src1, $src2, $rc",
9716 (X86fsqrtRnds (_.VT _.RC:$src1),
9719 EVEX_B, EVEX_RC, Sched<[sched]>;
9721 let isCodeGenOnly = 1, hasSideEffects = 0 in {
9722 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9723 (ins _.FRC:$src1, _.FRC:$src2),
9724 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9725 Sched<[sched]>, SIMD_EXC;
9727 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9728 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9729 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9730 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9734 let Predicates = [prd] in {
9735 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9736 (!cast<Instruction>(Name#Zr)
9737 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9740 let Predicates = [prd, OptForSize] in {
9741 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9742 (!cast<Instruction>(Name#Zm)
9743 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9747 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9748 X86SchedWriteSizes sched> {
9749 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9750 EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
9751 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9752 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9753 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9754 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, REX_W;
9757 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9758 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9760 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9762 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9763 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9764 let ExeDomain = _.ExeDomain in {
9765 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9766 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9767 "$src3, $src2, $src1", "$src1, $src2, $src3",
9768 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9769 (i32 timm:$src3)))>,
9770 Sched<[sched]>, SIMD_EXC;
9772 let Uses = [MXCSR] in
9773 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9774 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9775 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9776 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9777 (i32 timm:$src3)))>, EVEX_B,
9780 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9781 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9783 "$src3, $src2, $src1", "$src1, $src2, $src3",
9784 (_.VT (X86RndScales _.RC:$src1,
9785 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9786 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9788 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9789 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9790 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9791 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9792 []>, Sched<[sched]>, SIMD_EXC;
9795 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9796 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9797 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9798 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9802 let Predicates = [HasAVX512] in {
9803 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9804 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9805 _.FRC:$src1, timm:$src2))>;
9808 let Predicates = [HasAVX512, OptForSize] in {
9809 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9810 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9811 addr:$src1, timm:$src2))>;
9815 let Predicates = [HasFP16] in
9816 defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9817 SchedWriteFRnd.Scl, f16x_info>,
9818 AVX512PSIi8Base, TA, EVEX_4V,
9819 EVEX_CD8<16, CD8VT1>;
9821 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9822 SchedWriteFRnd.Scl, f32x_info>,
9823 AVX512AIi8Base, EVEX_4V, VEX_LIG,
9824 EVEX_CD8<32, CD8VT1>;
9826 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9827 SchedWriteFRnd.Scl, f64x_info>,
9828 REX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9829 EVEX_CD8<64, CD8VT1>;
9831 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9832 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9833 dag OutMask, Predicate BasePredicate> {
9834 let Predicates = [BasePredicate] in {
9835 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9836 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9837 (extractelt _.VT:$dst, (iPTR 0))))),
9838 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9839 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9841 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9842 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9844 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9845 OutMask, _.VT:$src2, _.VT:$src1)>;
9849 defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9850 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9851 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>;
9852 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9853 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9854 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9855 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9856 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9857 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9860 //-------------------------------------------------
9861 // Integer truncate and extend operations
9862 //-------------------------------------------------
9864 // PatFrags that contain a select and a truncate op. The take operands in the
9865 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9866 // either to the multiclasses.
9867 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9868 (vselect_mask node:$mask,
9869 (trunc node:$src), node:$src0)>;
9870 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9871 (vselect_mask node:$mask,
9872 (X86vtruncs node:$src), node:$src0)>;
9873 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9874 (vselect_mask node:$mask,
9875 (X86vtruncus node:$src), node:$src0)>;
9877 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9878 SDPatternOperator MaskNode,
9879 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9880 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9881 let ExeDomain = DestInfo.ExeDomain in {
9882 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9883 (ins SrcInfo.RC:$src),
9884 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9885 [(set DestInfo.RC:$dst,
9886 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9887 EVEX, Sched<[sched]>;
9888 let Constraints = "$src0 = $dst" in
9889 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9890 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9891 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9892 [(set DestInfo.RC:$dst,
9893 (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9894 (DestInfo.VT DestInfo.RC:$src0),
9895 SrcInfo.KRCWM:$mask))]>,
9896 EVEX, EVEX_K, Sched<[sched]>;
9897 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9898 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9899 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9900 [(set DestInfo.RC:$dst,
9901 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9902 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9903 EVEX, EVEX_KZ, Sched<[sched]>;
9906 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9907 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9908 (ins x86memop:$dst, SrcInfo.RC:$src),
9909 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9910 EVEX, Sched<[sched.Folded]>;
9912 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9913 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9914 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9915 EVEX, EVEX_K, Sched<[sched.Folded]>;
9916 }//mayStore = 1, hasSideEffects = 0
9919 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9920 PatFrag truncFrag, PatFrag mtruncFrag,
9923 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9924 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9925 addr:$dst, SrcInfo.RC:$src)>;
9927 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9928 SrcInfo.KRCWM:$mask),
9929 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9930 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9933 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9934 SDNode OpNode256, SDNode OpNode512,
9935 SDPatternOperator MaskNode128,
9936 SDPatternOperator MaskNode256,
9937 SDPatternOperator MaskNode512,
9938 X86SchedWriteWidths sched,
9939 AVX512VLVectorVTInfo VTSrcInfo,
9940 X86VectorVTInfo DestInfoZ128,
9941 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9942 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9943 X86MemOperand x86memopZ, PatFrag truncFrag,
9944 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9946 let Predicates = [HasVLX, prd] in {
9947 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9948 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9949 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9950 mtruncFrag, NAME>, EVEX_V128;
9952 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9953 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9954 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9955 mtruncFrag, NAME>, EVEX_V256;
9957 let Predicates = [prd] in
9958 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9959 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9960 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9961 mtruncFrag, NAME>, EVEX_V512;
9964 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9965 X86SchedWriteWidths sched, PatFrag StoreNode,
9966 PatFrag MaskedStoreNode, SDNode InVecNode,
9967 SDPatternOperator InVecMaskNode> {
9968 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9969 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9970 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9971 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9972 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9975 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9976 SDPatternOperator MaskNode,
9977 X86SchedWriteWidths sched, PatFrag StoreNode,
9978 PatFrag MaskedStoreNode, SDNode InVecNode,
9979 SDPatternOperator InVecMaskNode> {
9980 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9981 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9982 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9983 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9984 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9987 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9988 SDPatternOperator MaskNode,
9989 X86SchedWriteWidths sched, PatFrag StoreNode,
9990 PatFrag MaskedStoreNode, SDNode InVecNode,
9991 SDPatternOperator InVecMaskNode> {
9992 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9993 InVecMaskNode, MaskNode, MaskNode, sched,
9994 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9995 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9996 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9999 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
10000 SDPatternOperator MaskNode,
10001 X86SchedWriteWidths sched, PatFrag StoreNode,
10002 PatFrag MaskedStoreNode, SDNode InVecNode,
10003 SDPatternOperator InVecMaskNode> {
10004 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
10005 InVecMaskNode, InVecMaskNode, MaskNode, sched,
10006 avx512vl_i32_info, v16i8x_info, v16i8x_info,
10007 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
10008 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
10011 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10012 SDPatternOperator MaskNode,
10013 X86SchedWriteWidths sched, PatFrag StoreNode,
10014 PatFrag MaskedStoreNode, SDNode InVecNode,
10015 SDPatternOperator InVecMaskNode> {
10016 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
10017 InVecMaskNode, MaskNode, MaskNode, sched,
10018 avx512vl_i32_info, v8i16x_info, v8i16x_info,
10019 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
10020 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
10023 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10024 SDPatternOperator MaskNode,
10025 X86SchedWriteWidths sched, PatFrag StoreNode,
10026 PatFrag MaskedStoreNode, SDNode InVecNode,
10027 SDPatternOperator InVecMaskNode> {
10028 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
10029 InVecMaskNode, MaskNode, MaskNode, sched,
10030 avx512vl_i16_info, v16i8x_info, v16i8x_info,
10031 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
10032 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
10035 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb",
10036 SchedWriteVecTruncate, truncstorevi8,
10037 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
10038 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb",
10039 SchedWriteVecTruncate, truncstore_s_vi8,
10040 masked_truncstore_s_vi8, X86vtruncs,
10042 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb",
10043 SchedWriteVecTruncate, truncstore_us_vi8,
10044 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
10046 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
10047 SchedWriteVecTruncate, truncstorevi16,
10048 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
10049 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
10050 SchedWriteVecTruncate, truncstore_s_vi16,
10051 masked_truncstore_s_vi16, X86vtruncs,
10053 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
10054 select_truncus, SchedWriteVecTruncate,
10055 truncstore_us_vi16, masked_truncstore_us_vi16,
10056 X86vtruncus, X86vmtruncus>;
10058 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
10059 SchedWriteVecTruncate, truncstorevi32,
10060 masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
10061 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
10062 SchedWriteVecTruncate, truncstore_s_vi32,
10063 masked_truncstore_s_vi32, X86vtruncs,
10065 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
10066 select_truncus, SchedWriteVecTruncate,
10067 truncstore_us_vi32, masked_truncstore_us_vi32,
10068 X86vtruncus, X86vmtruncus>;
10070 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
10071 SchedWriteVecTruncate, truncstorevi8,
10072 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
10073 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
10074 SchedWriteVecTruncate, truncstore_s_vi8,
10075 masked_truncstore_s_vi8, X86vtruncs,
10077 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
10078 select_truncus, SchedWriteVecTruncate,
10079 truncstore_us_vi8, masked_truncstore_us_vi8,
10080 X86vtruncus, X86vmtruncus>;
10082 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
10083 SchedWriteVecTruncate, truncstorevi16,
10084 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
10085 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
10086 SchedWriteVecTruncate, truncstore_s_vi16,
10087 masked_truncstore_s_vi16, X86vtruncs,
10089 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
10090 select_truncus, SchedWriteVecTruncate,
10091 truncstore_us_vi16, masked_truncstore_us_vi16,
10092 X86vtruncus, X86vmtruncus>;
10094 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
10095 SchedWriteVecTruncate, truncstorevi8,
10096 masked_truncstorevi8, X86vtrunc,
10098 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
10099 SchedWriteVecTruncate, truncstore_s_vi8,
10100 masked_truncstore_s_vi8, X86vtruncs,
10102 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
10103 select_truncus, SchedWriteVecTruncate,
10104 truncstore_us_vi8, masked_truncstore_us_vi8,
10105 X86vtruncus, X86vmtruncus>;
10107 let Predicates = [HasAVX512, NoVLX] in {
10108 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
10109 (v8i16 (EXTRACT_SUBREG
10110 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
10111 VR256X:$src, sub_ymm)))), sub_xmm))>;
10112 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
10113 (v4i32 (EXTRACT_SUBREG
10114 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
10115 VR256X:$src, sub_ymm)))), sub_xmm))>;
10118 let Predicates = [HasBWI, NoVLX] in {
10119 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10120 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
10121 VR256X:$src, sub_ymm))), sub_xmm))>;
10124 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
10125 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
10126 X86VectorVTInfo DestInfo,
10127 X86VectorVTInfo SrcInfo> {
10128 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10130 SrcInfo.KRCWM:$mask)),
10131 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
10132 SrcInfo.KRCWM:$mask,
10135 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10136 DestInfo.ImmAllZerosV,
10137 SrcInfo.KRCWM:$mask)),
10138 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
10142 let Predicates = [HasVLX] in {
10143 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
10144 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
10145 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
10148 let Predicates = [HasAVX512] in {
10149 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
10150 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
10151 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
10153 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
10154 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
10155 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
10157 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
10158 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
10159 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
10162 multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
10163 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
10164 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
10165 let ExeDomain = DestInfo.ExeDomain in {
10166 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10167 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
10168 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
10169 EVEX, Sched<[sched]>;
10171 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10172 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
10173 (DestInfo.VT (LdFrag addr:$src))>,
10174 EVEX, Sched<[sched.Folded]>;
10178 multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
10179 SDNode OpNode, SDNode InVecNode, string ExtTy,
10180 X86SchedWriteWidths sched,
10181 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10182 let Predicates = [HasVLX, HasBWI] in {
10183 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
10184 v16i8x_info, i64mem, LdFrag, InVecNode>,
10185 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, WIG;
10187 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
10188 v16i8x_info, i128mem, LdFrag, OpNode>,
10189 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, WIG;
10191 let Predicates = [HasBWI] in {
10192 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
10193 v32i8x_info, i256mem, LdFrag, OpNode>,
10194 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, WIG;
10198 multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
10199 SDNode OpNode, SDNode InVecNode, string ExtTy,
10200 X86SchedWriteWidths sched,
10201 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10202 let Predicates = [HasVLX, HasAVX512] in {
10203 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10204 v16i8x_info, i32mem, LdFrag, InVecNode>,
10205 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, WIG;
10207 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10208 v16i8x_info, i64mem, LdFrag, InVecNode>,
10209 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, WIG;
10211 let Predicates = [HasAVX512] in {
10212 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10213 v16i8x_info, i128mem, LdFrag, OpNode>,
10214 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, WIG;
10218 multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
10219 SDNode InVecNode, string ExtTy,
10220 X86SchedWriteWidths sched,
10221 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10222 let Predicates = [HasVLX, HasAVX512] in {
10223 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10224 v16i8x_info, i16mem, LdFrag, InVecNode>,
10225 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, WIG;
10227 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10228 v16i8x_info, i32mem, LdFrag, InVecNode>,
10229 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, WIG;
10231 let Predicates = [HasAVX512] in {
10232 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10233 v16i8x_info, i64mem, LdFrag, InVecNode>,
10234 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, WIG;
10238 multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
10239 SDNode OpNode, SDNode InVecNode, string ExtTy,
10240 X86SchedWriteWidths sched,
10241 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10242 let Predicates = [HasVLX, HasAVX512] in {
10243 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10244 v8i16x_info, i64mem, LdFrag, InVecNode>,
10245 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, WIG;
10247 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10248 v8i16x_info, i128mem, LdFrag, OpNode>,
10249 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, WIG;
10251 let Predicates = [HasAVX512] in {
10252 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10253 v16i16x_info, i256mem, LdFrag, OpNode>,
10254 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, WIG;
10258 multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
10259 SDNode OpNode, SDNode InVecNode, string ExtTy,
10260 X86SchedWriteWidths sched,
10261 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10262 let Predicates = [HasVLX, HasAVX512] in {
10263 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10264 v8i16x_info, i32mem, LdFrag, InVecNode>,
10265 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, WIG;
10267 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10268 v8i16x_info, i64mem, LdFrag, InVecNode>,
10269 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, WIG;
10271 let Predicates = [HasAVX512] in {
10272 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10273 v8i16x_info, i128mem, LdFrag, OpNode>,
10274 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, WIG;
10278 multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
10279 SDNode OpNode, SDNode InVecNode, string ExtTy,
10280 X86SchedWriteWidths sched,
10281 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10283 let Predicates = [HasVLX, HasAVX512] in {
10284 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10285 v4i32x_info, i64mem, LdFrag, InVecNode>,
10286 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
10288 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10289 v4i32x_info, i128mem, LdFrag, OpNode>,
10290 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
10292 let Predicates = [HasAVX512] in {
10293 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10294 v8i32x_info, i256mem, LdFrag, OpNode>,
10295 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
10299 defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10300 defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10301 defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>;
10302 defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10303 defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10304 defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10306 defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10307 defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10308 defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>;
10309 defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10310 defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10311 defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10314 // Patterns that we also need any extend versions of. aext_vector_inreg
10315 // is currently legalized to zext_vector_inreg.
10316 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10317 // 256-bit patterns
10318 let Predicates = [HasVLX, HasBWI] in {
10319 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10320 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10323 let Predicates = [HasVLX] in {
10324 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10325 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10327 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10328 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10331 // 512-bit patterns
10332 let Predicates = [HasBWI] in {
10333 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10334 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10336 let Predicates = [HasAVX512] in {
10337 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10338 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10339 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10340 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10342 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10343 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10345 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10346 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10350 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10352 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10353 // 128-bit patterns
10354 let Predicates = [HasVLX, HasBWI] in {
10355 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10356 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10357 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10358 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10359 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10360 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10362 let Predicates = [HasVLX] in {
10363 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10364 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10365 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10366 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10368 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10369 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10371 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10372 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10373 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10374 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10375 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10376 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10378 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10379 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10380 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10381 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10383 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10384 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10385 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10386 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10387 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10388 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10390 let Predicates = [HasVLX] in {
10391 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10392 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10393 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10394 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10395 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10396 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10398 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10399 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10400 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10401 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10403 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10404 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10405 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10406 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10407 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10408 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10410 // 512-bit patterns
10411 let Predicates = [HasAVX512] in {
10412 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10413 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10414 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10415 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10416 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10417 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10421 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10422 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10424 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10425 // ext+trunc aggressively making it impossible to legalize the DAG to this
10426 // pattern directly.
10427 let Predicates = [HasAVX512, NoBWI] in {
10428 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10429 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10430 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10431 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10434 //===----------------------------------------------------------------------===//
10435 // GATHER - SCATTER Operations
10437 // FIXME: Improve scheduling of gather/scatter instructions.
10438 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10439 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10440 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10441 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10442 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10443 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10444 !strconcat(OpcodeStr#_.Suffix,
10445 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10446 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10447 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10450 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10451 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10452 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10453 vy512xmem>, EVEX_V512, REX_W;
10454 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10455 vz512mem>, EVEX_V512, REX_W;
10456 let Predicates = [HasVLX] in {
10457 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10458 vx256xmem>, EVEX_V256, REX_W;
10459 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10460 vy256xmem>, EVEX_V256, REX_W;
10461 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10462 vx128xmem>, EVEX_V128, REX_W;
10463 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10464 vx128xmem>, EVEX_V128, REX_W;
10468 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10469 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10470 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10472 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10474 let Predicates = [HasVLX] in {
10475 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10476 vy256xmem>, EVEX_V256;
10477 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10478 vy128xmem>, EVEX_V256;
10479 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10480 vx128xmem>, EVEX_V128;
10481 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10482 vx64xmem, VK2WM>, EVEX_V128;
10487 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10488 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10490 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10491 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10493 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10494 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10496 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10497 hasSideEffects = 0 in
10499 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10500 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10501 !strconcat(OpcodeStr#_.Suffix,
10502 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10503 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10504 Sched<[WriteStore]>;
10507 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10508 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10509 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10510 vy512xmem>, EVEX_V512, REX_W;
10511 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10512 vz512mem>, EVEX_V512, REX_W;
10513 let Predicates = [HasVLX] in {
10514 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10515 vx256xmem>, EVEX_V256, REX_W;
10516 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10517 vy256xmem>, EVEX_V256, REX_W;
10518 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10519 vx128xmem>, EVEX_V128, REX_W;
10520 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10521 vx128xmem>, EVEX_V128, REX_W;
10525 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10526 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10527 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10529 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10531 let Predicates = [HasVLX] in {
10532 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10533 vy256xmem>, EVEX_V256;
10534 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10535 vy128xmem>, EVEX_V256;
10536 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10537 vx128xmem>, EVEX_V128;
10538 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10539 vx64xmem, VK2WM>, EVEX_V128;
10543 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10544 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10546 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10547 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10550 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10551 RegisterClass KRC, X86MemOperand memop> {
10552 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10553 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10554 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10555 EVEX, EVEX_K, Sched<[WriteLoad]>;
10558 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10559 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10561 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10562 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10564 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10565 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10567 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10568 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10570 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10571 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10573 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10574 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10576 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10577 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10579 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10580 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10582 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10583 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10585 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10586 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10588 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10589 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10591 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10592 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10594 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10595 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10597 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10598 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10600 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10601 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10603 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10604 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10606 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10607 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10608 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10609 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10610 EVEX, Sched<[Sched]>;
10613 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10614 string OpcodeStr, Predicate prd> {
10615 let Predicates = [prd] in
10616 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10618 let Predicates = [prd, HasVLX] in {
10619 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10620 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10624 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10625 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
10626 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10627 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
10629 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10630 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10631 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10632 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10633 EVEX, Sched<[WriteMove]>;
10636 // Use 512bit version to implement 128/256 bit in case NoVLX.
10637 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10641 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10642 (_.KVT (COPY_TO_REGCLASS
10643 (!cast<Instruction>(Name#"Zrr")
10644 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10645 _.RC:$src, _.SubRegIdx)),
10649 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10650 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10651 let Predicates = [prd] in
10652 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10655 let Predicates = [prd, HasVLX] in {
10656 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10658 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10661 let Predicates = [prd, NoVLX] in {
10662 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10663 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10667 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10668 avx512vl_i8_info, HasBWI>;
10669 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10670 avx512vl_i16_info, HasBWI>, REX_W;
10671 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10672 avx512vl_i32_info, HasDQI>;
10673 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10674 avx512vl_i64_info, HasDQI>, REX_W;
10676 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10677 // is available, but BWI is not. We can't handle this in lowering because
10678 // a target independent DAG combine likes to combine sext and trunc.
10679 let Predicates = [HasDQI, NoBWI] in {
10680 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10681 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10682 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10683 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10686 let Predicates = [HasDQI, NoBWI, HasVLX] in {
10687 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10688 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10691 //===----------------------------------------------------------------------===//
10692 // AVX-512 - COMPRESS and EXPAND
10695 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10696 string OpcodeStr, X86FoldableSchedWrite sched> {
10697 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10698 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10699 (null_frag)>, AVX5128IBase,
10702 let mayStore = 1, hasSideEffects = 0 in
10703 def mr : AVX5128I<opc, MRMDestMem, (outs),
10704 (ins _.MemOp:$dst, _.RC:$src),
10705 OpcodeStr # "\t{$src, $dst|$dst, $src}",
10706 []>, EVEX_CD8<_.EltSize, CD8VT1>,
10707 Sched<[sched.Folded]>;
10709 def mrk : AVX5128I<opc, MRMDestMem, (outs),
10710 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10711 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10713 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10714 Sched<[sched.Folded]>;
10717 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10718 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10719 (!cast<Instruction>(Name#_.ZSuffix#mrk)
10720 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10722 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10723 (!cast<Instruction>(Name#_.ZSuffix#rrk)
10724 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10725 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10726 (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10727 _.KRCWM:$mask, _.RC:$src)>;
10730 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10731 X86FoldableSchedWrite sched,
10732 AVX512VLVectorVTInfo VTInfo,
10733 Predicate Pred = HasAVX512> {
10734 let Predicates = [Pred] in
10735 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10736 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10738 let Predicates = [Pred, HasVLX] in {
10739 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10740 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10741 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10742 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10746 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10747 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10748 avx512vl_i32_info>, EVEX;
10749 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10750 avx512vl_i64_info>, EVEX, REX_W;
10751 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10752 avx512vl_f32_info>, EVEX;
10753 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10754 avx512vl_f64_info>, EVEX, REX_W;
10757 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10758 string OpcodeStr, X86FoldableSchedWrite sched> {
10759 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10760 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10761 (null_frag)>, AVX5128IBase,
10764 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10765 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10767 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10768 Sched<[sched.Folded, sched.ReadAfterFold]>;
10771 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10773 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10774 (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10775 _.KRCWM:$mask, addr:$src)>;
10777 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10778 (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10779 _.KRCWM:$mask, addr:$src)>;
10781 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10782 (_.VT _.RC:$src0))),
10783 (!cast<Instruction>(Name#_.ZSuffix#rmk)
10784 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10786 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10787 (!cast<Instruction>(Name#_.ZSuffix#rrk)
10788 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10789 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10790 (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10791 _.KRCWM:$mask, _.RC:$src)>;
10794 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10795 X86FoldableSchedWrite sched,
10796 AVX512VLVectorVTInfo VTInfo,
10797 Predicate Pred = HasAVX512> {
10798 let Predicates = [Pred] in
10799 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10800 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10802 let Predicates = [Pred, HasVLX] in {
10803 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10804 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10805 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10806 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10810 // FIXME: Is there a better scheduler class for VPEXPAND?
10811 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10812 avx512vl_i32_info>, EVEX;
10813 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10814 avx512vl_i64_info>, EVEX, REX_W;
10815 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10816 avx512vl_f32_info>, EVEX;
10817 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10818 avx512vl_f64_info>, EVEX, REX_W;
10820 //handle instruction reg_vec1 = op(reg_vec,imm)
10822 // op(broadcast(eltVt),imm)
10823 //all instruction created with FROUND_CURRENT
10824 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10825 SDPatternOperator OpNode,
10826 SDPatternOperator MaskOpNode,
10827 X86FoldableSchedWrite sched,
10828 X86VectorVTInfo _> {
10829 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10830 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10831 (ins _.RC:$src1, i32u8imm:$src2),
10832 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10833 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10834 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10836 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10837 (ins _.MemOp:$src1, i32u8imm:$src2),
10838 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10839 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10841 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10842 (i32 timm:$src2))>,
10843 Sched<[sched.Folded, sched.ReadAfterFold]>;
10844 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10845 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10846 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10847 "${src1}"#_.BroadcastStr#", $src2",
10848 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10850 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10851 (i32 timm:$src2))>, EVEX_B,
10852 Sched<[sched.Folded, sched.ReadAfterFold]>;
10856 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10857 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10858 SDNode OpNode, X86FoldableSchedWrite sched,
10859 X86VectorVTInfo _> {
10860 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10861 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10862 (ins _.RC:$src1, i32u8imm:$src2),
10863 OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10864 "$src1, {sae}, $src2",
10865 (OpNode (_.VT _.RC:$src1),
10866 (i32 timm:$src2))>,
10867 EVEX_B, Sched<[sched]>;
10870 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10871 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10872 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10874 let Predicates = [prd] in {
10875 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10876 sched.ZMM, _.info512>,
10877 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10878 sched.ZMM, _.info512>, EVEX_V512;
10880 let Predicates = [prd, HasVLX] in {
10881 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10882 sched.XMM, _.info128>, EVEX_V128;
10883 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10884 sched.YMM, _.info256>, EVEX_V256;
10888 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10889 // op(reg_vec2,mem_vec,imm)
10890 // op(reg_vec2,broadcast(eltVt),imm)
10891 //all instruction created with FROUND_CURRENT
10892 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10893 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10894 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10895 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10896 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10897 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10898 (OpNode (_.VT _.RC:$src1),
10900 (i32 timm:$src3))>,
10902 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10903 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10904 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10905 (OpNode (_.VT _.RC:$src1),
10906 (_.VT (bitconvert (_.LdFrag addr:$src2))),
10907 (i32 timm:$src3))>,
10908 Sched<[sched.Folded, sched.ReadAfterFold]>;
10909 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10910 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10911 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10912 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10913 (OpNode (_.VT _.RC:$src1),
10914 (_.VT (_.BroadcastLdFrag addr:$src2)),
10915 (i32 timm:$src3))>, EVEX_B,
10916 Sched<[sched.Folded, sched.ReadAfterFold]>;
10920 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10921 // op(reg_vec2,mem_vec,imm)
10922 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10923 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10924 X86VectorVTInfo SrcInfo>{
10925 let ExeDomain = DestInfo.ExeDomain in {
10926 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10927 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10928 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10929 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10930 (SrcInfo.VT SrcInfo.RC:$src2),
10931 (i8 timm:$src3)))>,
10933 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10934 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10935 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10936 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10937 (SrcInfo.VT (bitconvert
10938 (SrcInfo.LdFrag addr:$src2))),
10939 (i8 timm:$src3)))>,
10940 Sched<[sched.Folded, sched.ReadAfterFold]>;
10944 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10945 // op(reg_vec2,mem_vec,imm)
10946 // op(reg_vec2,broadcast(eltVt),imm)
10947 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10948 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10949 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10951 let ExeDomain = _.ExeDomain in
10952 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10953 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10954 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10955 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10956 (OpNode (_.VT _.RC:$src1),
10957 (_.VT (_.BroadcastLdFrag addr:$src2)),
10958 (i8 timm:$src3))>, EVEX_B,
10959 Sched<[sched.Folded, sched.ReadAfterFold]>;
10962 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10963 // op(reg_vec2,mem_scalar,imm)
10964 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10965 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10966 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10967 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10968 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10969 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10970 (OpNode (_.VT _.RC:$src1),
10972 (i32 timm:$src3))>,
10974 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10975 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10976 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10977 (OpNode (_.VT _.RC:$src1),
10978 (_.ScalarIntMemFrags addr:$src2),
10979 (i32 timm:$src3))>,
10980 Sched<[sched.Folded, sched.ReadAfterFold]>;
10984 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10985 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10986 SDNode OpNode, X86FoldableSchedWrite sched,
10987 X86VectorVTInfo _> {
10988 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10989 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10990 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10991 OpcodeStr, "$src3, {sae}, $src2, $src1",
10992 "$src1, $src2, {sae}, $src3",
10993 (OpNode (_.VT _.RC:$src1),
10995 (i32 timm:$src3))>,
10996 EVEX_B, Sched<[sched]>;
10999 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
11000 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11001 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11002 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
11003 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11004 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
11005 OpcodeStr, "$src3, {sae}, $src2, $src1",
11006 "$src1, $src2, {sae}, $src3",
11007 (OpNode (_.VT _.RC:$src1),
11009 (i32 timm:$src3))>,
11010 EVEX_B, Sched<[sched]>;
11013 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
11014 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
11015 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
11016 let Predicates = [prd] in {
11017 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
11018 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
11022 let Predicates = [prd, HasVLX] in {
11023 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
11025 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
11030 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
11031 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
11032 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
11033 let Predicates = [Pred] in {
11034 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
11035 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
11037 let Predicates = [Pred, HasVLX] in {
11038 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
11039 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
11040 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
11041 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
11045 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
11046 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
11047 Predicate Pred = HasAVX512> {
11048 let Predicates = [Pred] in {
11049 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
11052 let Predicates = [Pred, HasVLX] in {
11053 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
11055 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
11060 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
11061 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
11062 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
11063 let Predicates = [prd] in {
11064 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
11065 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
11069 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
11070 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
11071 SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
11072 X86SchedWriteWidths sched, Predicate prd>{
11073 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
11074 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
11075 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
11076 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
11077 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11078 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
11079 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
11080 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11081 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
11084 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
11085 X86VReduce, X86VReduce, X86VReduceSAE,
11086 SchedWriteFRnd, HasDQI>;
11087 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
11088 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
11089 SchedWriteFRnd, HasAVX512>;
11090 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
11091 X86VGetMant, X86VGetMant, X86VGetMantSAE,
11092 SchedWriteFRnd, HasAVX512>;
11094 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
11095 0x50, X86VRange, X86VRangeSAE,
11096 SchedWriteFAdd, HasDQI>,
11097 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
11098 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
11099 0x50, X86VRange, X86VRangeSAE,
11100 SchedWriteFAdd, HasDQI>,
11101 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11103 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
11104 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11105 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
11106 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
11107 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11108 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11110 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
11111 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11112 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
11113 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
11114 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11115 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11116 defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
11117 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
11118 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11120 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
11121 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11122 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
11123 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
11124 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11125 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11126 defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
11127 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
11128 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11130 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
11131 X86FoldableSchedWrite sched,
11133 X86VectorVTInfo CastInfo,
11134 string EVEX2VEXOvrd> {
11135 let ExeDomain = _.ExeDomain in {
11136 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11137 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11138 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11140 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
11141 (i8 timm:$src3)))))>,
11142 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
11143 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11144 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11145 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11148 (CastInfo.VT (X86Shuf128 _.RC:$src1,
11149 (CastInfo.LdFrag addr:$src2),
11150 (i8 timm:$src3)))))>,
11151 Sched<[sched.Folded, sched.ReadAfterFold]>,
11152 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
11153 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11154 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11155 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11156 "$src1, ${src2}"#_.BroadcastStr#", $src3",
11160 (X86Shuf128 _.RC:$src1,
11161 (_.BroadcastLdFrag addr:$src2),
11162 (i8 timm:$src3)))))>, EVEX_B,
11163 Sched<[sched.Folded, sched.ReadAfterFold]>;
11167 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
11168 AVX512VLVectorVTInfo _,
11169 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
11170 string EVEX2VEXOvrd>{
11171 let Predicates = [HasAVX512] in
11172 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11173 _.info512, CastInfo.info512, "">, EVEX_V512;
11175 let Predicates = [HasAVX512, HasVLX] in
11176 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11177 _.info256, CastInfo.info256,
11178 EVEX2VEXOvrd>, EVEX_V256;
11181 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
11182 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11183 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
11184 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
11185 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11186 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11187 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11188 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
11190 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11191 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11192 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
11193 // instantiation of this class.
11194 let ExeDomain = _.ExeDomain in {
11195 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11196 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11197 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11198 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11199 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
11200 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11201 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11202 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11203 (_.VT (X86VAlign _.RC:$src1,
11204 (bitconvert (_.LdFrag addr:$src2)),
11205 (i8 timm:$src3)))>,
11206 Sched<[sched.Folded, sched.ReadAfterFold]>,
11207 EVEX2VEXOverride<"VPALIGNRrmi">;
11209 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11210 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11211 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11212 "$src1, ${src2}"#_.BroadcastStr#", $src3",
11213 (X86VAlign _.RC:$src1,
11214 (_.VT (_.BroadcastLdFrag addr:$src2)),
11215 (i8 timm:$src3))>, EVEX_B,
11216 Sched<[sched.Folded, sched.ReadAfterFold]>;
11220 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11221 AVX512VLVectorVTInfo _> {
11222 let Predicates = [HasAVX512] in {
11223 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11224 AVX512AIi8Base, EVEX_4V, EVEX_V512;
11226 let Predicates = [HasAVX512, HasVLX] in {
11227 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11228 AVX512AIi8Base, EVEX_4V, EVEX_V128;
11229 // We can't really override the 256-bit version so change it back to unset.
11230 let EVEX2VEXOverride = ? in
11231 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11232 AVX512AIi8Base, EVEX_4V, EVEX_V256;
11236 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11237 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11238 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11239 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11242 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11243 SchedWriteShuffle, avx512vl_i8_info,
11244 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11246 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
11248 def ValignqImm32XForm : SDNodeXForm<timm, [{
11249 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11251 def ValignqImm8XForm : SDNodeXForm<timm, [{
11252 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11254 def ValigndImm8XForm : SDNodeXForm<timm, [{
11255 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11258 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11259 X86VectorVTInfo From, X86VectorVTInfo To,
11260 SDNodeXForm ImmXForm> {
11261 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11263 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11266 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11267 To.RC:$src1, To.RC:$src2,
11268 (ImmXForm timm:$src3))>;
11270 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11272 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11275 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11276 To.RC:$src1, To.RC:$src2,
11277 (ImmXForm timm:$src3))>;
11279 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11281 (From.VT (OpNode From.RC:$src1,
11282 (From.LdFrag addr:$src2),
11285 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11286 To.RC:$src1, addr:$src2,
11287 (ImmXForm timm:$src3))>;
11289 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11291 (From.VT (OpNode From.RC:$src1,
11292 (From.LdFrag addr:$src2),
11295 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11296 To.RC:$src1, addr:$src2,
11297 (ImmXForm timm:$src3))>;
11300 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11301 X86VectorVTInfo From,
11302 X86VectorVTInfo To,
11303 SDNodeXForm ImmXForm> :
11304 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11305 def : Pat<(From.VT (OpNode From.RC:$src1,
11306 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11308 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11309 (ImmXForm timm:$src3))>;
11311 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11313 (From.VT (OpNode From.RC:$src1,
11315 (To.VT (To.BroadcastLdFrag addr:$src2))),
11318 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11319 To.RC:$src1, addr:$src2,
11320 (ImmXForm timm:$src3))>;
11322 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11324 (From.VT (OpNode From.RC:$src1,
11326 (To.VT (To.BroadcastLdFrag addr:$src2))),
11329 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11330 To.RC:$src1, addr:$src2,
11331 (ImmXForm timm:$src3))>;
11334 let Predicates = [HasAVX512] in {
11335 // For 512-bit we lower to the widest element type we can. So we only need
11336 // to handle converting valignq to valignd.
11337 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11338 v16i32_info, ValignqImm32XForm>;
11341 let Predicates = [HasVLX] in {
11342 // For 128-bit we lower to the widest element type we can. So we only need
11343 // to handle converting valignq to valignd.
11344 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11345 v4i32x_info, ValignqImm32XForm>;
11346 // For 256-bit we lower to the widest element type we can. So we only need
11347 // to handle converting valignq to valignd.
11348 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11349 v8i32x_info, ValignqImm32XForm>;
11352 let Predicates = [HasVLX, HasBWI] in {
11353 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11354 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11355 v16i8x_info, ValignqImm8XForm>;
11356 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11357 v16i8x_info, ValigndImm8XForm>;
11360 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11361 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11362 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
11364 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11365 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11366 let ExeDomain = _.ExeDomain in {
11367 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11368 (ins _.RC:$src1), OpcodeStr,
11370 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11373 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11374 (ins _.MemOp:$src1), OpcodeStr,
11376 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11377 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11378 Sched<[sched.Folded]>;
11382 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11383 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11384 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11385 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11386 (ins _.ScalarMemOp:$src1), OpcodeStr,
11387 "${src1}"#_.BroadcastStr,
11388 "${src1}"#_.BroadcastStr,
11389 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11390 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11391 Sched<[sched.Folded]>;
11394 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11395 X86SchedWriteWidths sched,
11396 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11397 let Predicates = [prd] in
11398 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11401 let Predicates = [prd, HasVLX] in {
11402 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11404 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11409 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11410 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11412 let Predicates = [prd] in
11413 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11416 let Predicates = [prd, HasVLX] in {
11417 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11419 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11424 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11425 SDNode OpNode, X86SchedWriteWidths sched,
11427 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11428 avx512vl_i64_info, prd>, REX_W;
11429 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11430 avx512vl_i32_info, prd>;
11433 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11434 SDNode OpNode, X86SchedWriteWidths sched,
11436 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11437 avx512vl_i16_info, prd>, WIG;
11438 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11439 avx512vl_i8_info, prd>, WIG;
11442 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11443 bits<8> opc_d, bits<8> opc_q,
11444 string OpcodeStr, SDNode OpNode,
11445 X86SchedWriteWidths sched> {
11446 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11448 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11452 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11455 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11456 let Predicates = [HasAVX512, NoVLX] in {
11457 def : Pat<(v4i64 (abs VR256X:$src)),
11460 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11462 def : Pat<(v2i64 (abs VR128X:$src)),
11465 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11469 // Use 512bit version to implement 128/256 bit.
11470 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11471 AVX512VLVectorVTInfo _, Predicate prd> {
11472 let Predicates = [prd, NoVLX] in {
11473 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11475 (!cast<Instruction>(InstrStr # "Zrr")
11476 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11477 _.info256.RC:$src1,
11478 _.info256.SubRegIdx)),
11479 _.info256.SubRegIdx)>;
11481 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11483 (!cast<Instruction>(InstrStr # "Zrr")
11484 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11485 _.info128.RC:$src1,
11486 _.info128.SubRegIdx)),
11487 _.info128.SubRegIdx)>;
11491 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11492 SchedWriteVecIMul, HasCDI>;
11494 // FIXME: Is there a better scheduler class for VPCONFLICT?
11495 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11496 SchedWriteVecALU, HasCDI>;
11498 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11499 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11500 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11502 //===---------------------------------------------------------------------===//
11503 // Counts number of ones - VPOPCNTD and VPOPCNTQ
11504 //===---------------------------------------------------------------------===//
11506 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11507 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11508 SchedWriteVecALU, HasVPOPCNTDQ>;
11510 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11511 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11513 //===---------------------------------------------------------------------===//
11514 // Replicate Single FP - MOVSHDUP and MOVSLDUP
11515 //===---------------------------------------------------------------------===//
11517 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11518 X86SchedWriteWidths sched> {
11519 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11520 avx512vl_f32_info, HasAVX512>, XS;
11523 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11524 SchedWriteFShuffle>;
11525 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11526 SchedWriteFShuffle>;
11528 //===----------------------------------------------------------------------===//
11529 // AVX-512 - MOVDDUP
11530 //===----------------------------------------------------------------------===//
11532 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11533 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11534 let ExeDomain = _.ExeDomain in {
11535 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11536 (ins _.RC:$src), OpcodeStr, "$src", "$src",
11537 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11539 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11540 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11541 (_.VT (_.BroadcastLdFrag addr:$src))>,
11542 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11543 Sched<[sched.Folded]>;
11547 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11548 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11549 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11550 VTInfo.info512>, EVEX_V512;
11552 let Predicates = [HasAVX512, HasVLX] in {
11553 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11554 VTInfo.info256>, EVEX_V256;
11555 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11556 VTInfo.info128>, EVEX_V128;
11560 multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11561 X86SchedWriteWidths sched> {
11562 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched,
11563 avx512vl_f64_info>, XD, REX_W;
11566 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11568 let Predicates = [HasVLX] in {
11569 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11570 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11572 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11573 (v2f64 VR128X:$src0)),
11574 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11575 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11576 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11578 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11581 //===----------------------------------------------------------------------===//
11582 // AVX-512 - Unpack Instructions
11583 //===----------------------------------------------------------------------===//
11585 let Uses = []<Register>, mayRaiseFPException = 0 in {
11586 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11587 SchedWriteFShuffleSizes, 0, 1>;
11588 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11589 SchedWriteFShuffleSizes>;
11592 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11593 SchedWriteShuffle, HasBWI>;
11594 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11595 SchedWriteShuffle, HasBWI>;
11596 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11597 SchedWriteShuffle, HasBWI>;
11598 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11599 SchedWriteShuffle, HasBWI>;
11601 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11602 SchedWriteShuffle, HasAVX512>;
11603 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11604 SchedWriteShuffle, HasAVX512>;
11605 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11606 SchedWriteShuffle, HasAVX512>;
11607 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11608 SchedWriteShuffle, HasAVX512>;
11610 //===----------------------------------------------------------------------===//
11611 // AVX-512 - Extract & Insert Integer Instructions
11612 //===----------------------------------------------------------------------===//
11614 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11615 X86VectorVTInfo _> {
11616 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11617 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11618 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11619 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11621 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11624 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11625 let Predicates = [HasBWI] in {
11626 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11627 (ins _.RC:$src1, u8imm:$src2),
11628 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11629 [(set GR32orGR64:$dst,
11630 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11631 EVEX, TAPD, Sched<[WriteVecExtract]>;
11633 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
11637 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11638 let Predicates = [HasBWI] in {
11639 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11640 (ins _.RC:$src1, u8imm:$src2),
11641 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11642 [(set GR32orGR64:$dst,
11643 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11644 EVEX, PD, Sched<[WriteVecExtract]>;
11646 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11647 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11648 (ins _.RC:$src1, u8imm:$src2),
11649 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11650 EVEX, TAPD, Sched<[WriteVecExtract]>;
11652 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
11656 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11657 RegisterClass GRC> {
11658 let Predicates = [HasDQI] in {
11659 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11660 (ins _.RC:$src1, u8imm:$src2),
11661 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11663 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11664 EVEX, TAPD, Sched<[WriteVecExtract]>;
11666 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11667 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11668 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11669 [(store (extractelt (_.VT _.RC:$src1),
11670 imm:$src2),addr:$dst)]>,
11671 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
11672 Sched<[WriteVecExtractSt]>;
11676 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
11677 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
11678 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11679 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
11681 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11682 X86VectorVTInfo _, PatFrag LdFrag,
11683 SDPatternOperator immoperator> {
11684 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11685 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11686 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11688 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11689 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11692 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11693 X86VectorVTInfo _, PatFrag LdFrag> {
11694 let Predicates = [HasBWI] in {
11695 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11696 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11697 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11699 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
11700 Sched<[WriteVecInsert]>;
11702 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11706 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11707 X86VectorVTInfo _, RegisterClass GRC> {
11708 let Predicates = [HasDQI] in {
11709 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11710 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11711 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11713 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11714 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11716 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11717 _.ScalarLdFrag, imm>, TAPD;
11721 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11722 extloadi8>, TAPD, WIG;
11723 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11724 extloadi16>, PD, WIG;
11725 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11726 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
11728 let Predicates = [HasAVX512, NoBWI] in {
11729 def : Pat<(X86pinsrb VR128:$src1,
11730 (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11732 (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11736 let Predicates = [HasBWI] in {
11737 def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11738 (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11739 GR8:$src2, sub_8bit), timm:$src3)>;
11740 def : Pat<(X86pinsrb VR128:$src1,
11741 (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11743 (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11747 // Always select FP16 instructions if available.
11748 let Predicates = [HasBWI], AddedComplexity = -10 in {
11749 def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11750 def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11751 def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11752 def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11755 //===----------------------------------------------------------------------===//
11756 // VSHUFPS - VSHUFPD Operations
11757 //===----------------------------------------------------------------------===//
11759 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11760 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11761 SchedWriteFShuffle>,
11762 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11763 AVX512AIi8Base, EVEX_4V;
11766 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
11767 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, REX_W;
11769 //===----------------------------------------------------------------------===//
11770 // AVX-512 - Byte shift Left/Right
11771 //===----------------------------------------------------------------------===//
11773 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11774 Format MRMm, string OpcodeStr,
11775 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11776 def ri : AVX512<opc, MRMr,
11777 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11778 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11779 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11781 def mi : AVX512<opc, MRMm,
11782 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11783 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11784 [(set _.RC:$dst,(_.VT (OpNode
11785 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11786 (i8 timm:$src2))))]>,
11787 Sched<[sched.Folded, sched.ReadAfterFold]>;
11790 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11791 Format MRMm, string OpcodeStr,
11792 X86SchedWriteWidths sched, Predicate prd>{
11793 let Predicates = [prd] in
11794 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11795 sched.ZMM, v64i8_info>, EVEX_V512;
11796 let Predicates = [prd, HasVLX] in {
11797 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11798 sched.YMM, v32i8x_info>, EVEX_V256;
11799 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11800 sched.XMM, v16i8x_info>, EVEX_V128;
11803 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11804 SchedWriteShuffle, HasBWI>,
11805 AVX512PDIi8Base, EVEX_4V, WIG;
11806 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11807 SchedWriteShuffle, HasBWI>,
11808 AVX512PDIi8Base, EVEX_4V, WIG;
11810 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11811 string OpcodeStr, X86FoldableSchedWrite sched,
11812 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11813 let isCommutable = 1 in
11814 def rr : AVX512BI<opc, MRMSrcReg,
11815 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11816 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11817 [(set _dst.RC:$dst,(_dst.VT
11818 (OpNode (_src.VT _src.RC:$src1),
11819 (_src.VT _src.RC:$src2))))]>,
11821 def rm : AVX512BI<opc, MRMSrcMem,
11822 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11823 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11824 [(set _dst.RC:$dst,(_dst.VT
11825 (OpNode (_src.VT _src.RC:$src1),
11826 (_src.VT (bitconvert
11827 (_src.LdFrag addr:$src2))))))]>,
11828 Sched<[sched.Folded, sched.ReadAfterFold]>;
11831 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11832 string OpcodeStr, X86SchedWriteWidths sched,
11834 let Predicates = [prd] in
11835 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11836 v8i64_info, v64i8_info>, EVEX_V512;
11837 let Predicates = [prd, HasVLX] in {
11838 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11839 v4i64x_info, v32i8x_info>, EVEX_V256;
11840 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11841 v2i64x_info, v16i8x_info>, EVEX_V128;
11845 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11846 SchedWritePSADBW, HasBWI>, EVEX_4V, WIG;
11848 // Transforms to swizzle an immediate to enable better matching when
11849 // memory operand isn't in the right place.
11850 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11851 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11852 uint8_t Imm = N->getZExtValue();
11853 // Swap bits 1/4 and 3/6.
11854 uint8_t NewImm = Imm & 0xa5;
11855 if (Imm & 0x02) NewImm |= 0x10;
11856 if (Imm & 0x10) NewImm |= 0x02;
11857 if (Imm & 0x08) NewImm |= 0x40;
11858 if (Imm & 0x40) NewImm |= 0x08;
11859 return getI8Imm(NewImm, SDLoc(N));
11861 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11862 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11863 uint8_t Imm = N->getZExtValue();
11864 // Swap bits 2/4 and 3/5.
11865 uint8_t NewImm = Imm & 0xc3;
11866 if (Imm & 0x04) NewImm |= 0x10;
11867 if (Imm & 0x10) NewImm |= 0x04;
11868 if (Imm & 0x08) NewImm |= 0x20;
11869 if (Imm & 0x20) NewImm |= 0x08;
11870 return getI8Imm(NewImm, SDLoc(N));
11872 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11873 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11874 uint8_t Imm = N->getZExtValue();
11875 // Swap bits 1/2 and 5/6.
11876 uint8_t NewImm = Imm & 0x99;
11877 if (Imm & 0x02) NewImm |= 0x04;
11878 if (Imm & 0x04) NewImm |= 0x02;
11879 if (Imm & 0x20) NewImm |= 0x40;
11880 if (Imm & 0x40) NewImm |= 0x20;
11881 return getI8Imm(NewImm, SDLoc(N));
11883 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11884 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11885 uint8_t Imm = N->getZExtValue();
11886 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11887 uint8_t NewImm = Imm & 0x81;
11888 if (Imm & 0x02) NewImm |= 0x04;
11889 if (Imm & 0x04) NewImm |= 0x10;
11890 if (Imm & 0x08) NewImm |= 0x40;
11891 if (Imm & 0x10) NewImm |= 0x02;
11892 if (Imm & 0x20) NewImm |= 0x08;
11893 if (Imm & 0x40) NewImm |= 0x20;
11894 return getI8Imm(NewImm, SDLoc(N));
11896 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11897 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11898 uint8_t Imm = N->getZExtValue();
11899 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11900 uint8_t NewImm = Imm & 0x81;
11901 if (Imm & 0x02) NewImm |= 0x10;
11902 if (Imm & 0x04) NewImm |= 0x02;
11903 if (Imm & 0x08) NewImm |= 0x20;
11904 if (Imm & 0x10) NewImm |= 0x04;
11905 if (Imm & 0x20) NewImm |= 0x40;
11906 if (Imm & 0x40) NewImm |= 0x08;
11907 return getI8Imm(NewImm, SDLoc(N));
11910 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11911 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11913 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11914 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11915 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11916 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11917 (OpNode (_.VT _.RC:$src1),
11920 (i8 timm:$src4)), 1, 1>,
11921 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11922 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11923 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11924 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11925 (OpNode (_.VT _.RC:$src1),
11927 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11928 (i8 timm:$src4)), 1, 0>,
11929 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11930 Sched<[sched.Folded, sched.ReadAfterFold]>;
11931 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11932 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11933 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11934 "$src2, ${src3}"#_.BroadcastStr#", $src4",
11935 (OpNode (_.VT _.RC:$src1),
11937 (_.VT (_.BroadcastLdFrag addr:$src3)),
11938 (i8 timm:$src4)), 1, 0>, EVEX_B,
11939 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11940 Sched<[sched.Folded, sched.ReadAfterFold]>;
11941 }// Constraints = "$src1 = $dst"
11943 // Additional patterns for matching passthru operand in other positions.
11944 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11945 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11947 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11948 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11949 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11950 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11952 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11953 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11955 // Additional patterns for matching zero masking with loads in other
11957 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11958 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11959 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11961 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11962 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11963 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11964 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11965 _.RC:$src2, (i8 timm:$src4)),
11967 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11968 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11970 // Additional patterns for matching masked loads with different
11972 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11973 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11974 _.RC:$src2, (i8 timm:$src4)),
11976 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11977 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11978 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11979 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11980 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11982 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11983 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11984 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11985 (OpNode _.RC:$src2, _.RC:$src1,
11986 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11988 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11989 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11990 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11991 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11992 _.RC:$src1, (i8 timm:$src4)),
11994 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11995 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11996 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11997 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11998 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
12000 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
12001 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
12003 // Additional patterns for matching zero masking with broadcasts in other
12005 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12006 (OpNode (_.BroadcastLdFrag addr:$src3),
12007 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
12009 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
12010 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
12011 (VPTERNLOG321_imm8 timm:$src4))>;
12012 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12013 (OpNode _.RC:$src1,
12014 (_.BroadcastLdFrag addr:$src3),
12015 _.RC:$src2, (i8 timm:$src4)),
12017 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
12018 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
12019 (VPTERNLOG132_imm8 timm:$src4))>;
12021 // Additional patterns for matching masked broadcasts with different
12023 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12024 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
12025 _.RC:$src2, (i8 timm:$src4)),
12027 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12028 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
12029 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12030 (OpNode (_.BroadcastLdFrag addr:$src3),
12031 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
12033 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12034 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
12035 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12036 (OpNode _.RC:$src2, _.RC:$src1,
12037 (_.BroadcastLdFrag addr:$src3),
12038 (i8 timm:$src4)), _.RC:$src1)),
12039 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12040 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
12041 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12042 (OpNode _.RC:$src2,
12043 (_.BroadcastLdFrag addr:$src3),
12044 _.RC:$src1, (i8 timm:$src4)),
12046 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12047 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
12048 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12049 (OpNode (_.BroadcastLdFrag addr:$src3),
12050 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
12052 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12053 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
12056 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
12057 AVX512VLVectorVTInfo _> {
12058 let Predicates = [HasAVX512] in
12059 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
12060 _.info512, NAME>, EVEX_V512;
12061 let Predicates = [HasAVX512, HasVLX] in {
12062 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
12063 _.info128, NAME>, EVEX_V128;
12064 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
12065 _.info256, NAME>, EVEX_V256;
12069 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
12070 avx512vl_i32_info>;
12071 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
12072 avx512vl_i64_info>, REX_W;
12074 // Patterns to implement vnot using vpternlog instead of creating all ones
12075 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
12076 // so that the result is only dependent on src0. But we use the same source
12077 // for all operands to prevent a false dependency.
12078 // TODO: We should maybe have a more generalized algorithm for folding to
12080 let Predicates = [HasAVX512] in {
12081 def : Pat<(v64i8 (vnot VR512:$src)),
12082 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12083 def : Pat<(v32i16 (vnot VR512:$src)),
12084 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12085 def : Pat<(v16i32 (vnot VR512:$src)),
12086 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12087 def : Pat<(v8i64 (vnot VR512:$src)),
12088 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12091 let Predicates = [HasAVX512, NoVLX] in {
12092 def : Pat<(v16i8 (vnot VR128X:$src)),
12095 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12096 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12097 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12098 (i8 15)), sub_xmm)>;
12099 def : Pat<(v8i16 (vnot VR128X:$src)),
12102 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12103 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12104 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12105 (i8 15)), sub_xmm)>;
12106 def : Pat<(v4i32 (vnot VR128X:$src)),
12109 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12110 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12111 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12112 (i8 15)), sub_xmm)>;
12113 def : Pat<(v2i64 (vnot VR128X:$src)),
12116 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12117 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12118 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12119 (i8 15)), sub_xmm)>;
12121 def : Pat<(v32i8 (vnot VR256X:$src)),
12124 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12125 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12126 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12127 (i8 15)), sub_ymm)>;
12128 def : Pat<(v16i16 (vnot VR256X:$src)),
12131 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12132 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12133 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12134 (i8 15)), sub_ymm)>;
12135 def : Pat<(v8i32 (vnot VR256X:$src)),
12138 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12139 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12140 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12141 (i8 15)), sub_ymm)>;
12142 def : Pat<(v4i64 (vnot VR256X:$src)),
12145 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12146 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12147 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12148 (i8 15)), sub_ymm)>;
12151 let Predicates = [HasVLX] in {
12152 def : Pat<(v16i8 (vnot VR128X:$src)),
12153 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12154 def : Pat<(v8i16 (vnot VR128X:$src)),
12155 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12156 def : Pat<(v4i32 (vnot VR128X:$src)),
12157 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12158 def : Pat<(v2i64 (vnot VR128X:$src)),
12159 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12161 def : Pat<(v32i8 (vnot VR256X:$src)),
12162 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12163 def : Pat<(v16i16 (vnot VR256X:$src)),
12164 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12165 def : Pat<(v8i32 (vnot VR256X:$src)),
12166 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12167 def : Pat<(v4i64 (vnot VR256X:$src)),
12168 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12171 //===----------------------------------------------------------------------===//
12172 // AVX-512 - FixupImm
12173 //===----------------------------------------------------------------------===//
12175 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
12176 X86FoldableSchedWrite sched, X86VectorVTInfo _,
12177 X86VectorVTInfo TblVT>{
12178 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
12179 Uses = [MXCSR], mayRaiseFPException = 1 in {
12180 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12181 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12182 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12183 (X86VFixupimm (_.VT _.RC:$src1),
12185 (TblVT.VT _.RC:$src3),
12186 (i32 timm:$src4))>, Sched<[sched]>;
12187 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12188 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
12189 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12190 (X86VFixupimm (_.VT _.RC:$src1),
12192 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12193 (i32 timm:$src4))>,
12194 Sched<[sched.Folded, sched.ReadAfterFold]>;
12195 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12196 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12197 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12198 "$src2, ${src3}"#_.BroadcastStr#", $src4",
12199 (X86VFixupimm (_.VT _.RC:$src1),
12201 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12202 (i32 timm:$src4))>,
12203 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12204 } // Constraints = "$src1 = $dst"
12207 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12208 X86FoldableSchedWrite sched,
12209 X86VectorVTInfo _, X86VectorVTInfo TblVT>
12210 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12211 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12212 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12213 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12214 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12215 "$src2, $src3, {sae}, $src4",
12216 (X86VFixupimmSAE (_.VT _.RC:$src1),
12218 (TblVT.VT _.RC:$src3),
12219 (i32 timm:$src4))>,
12220 EVEX_B, Sched<[sched]>;
12224 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12225 X86FoldableSchedWrite sched, X86VectorVTInfo _,
12226 X86VectorVTInfo _src3VT> {
12227 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12228 ExeDomain = _.ExeDomain in {
12229 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12230 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12231 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12232 (X86VFixupimms (_.VT _.RC:$src1),
12234 (_src3VT.VT _src3VT.RC:$src3),
12235 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12236 let Uses = [MXCSR] in
12237 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12238 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12239 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12240 "$src2, $src3, {sae}, $src4",
12241 (X86VFixupimmSAEs (_.VT _.RC:$src1),
12243 (_src3VT.VT _src3VT.RC:$src3),
12244 (i32 timm:$src4))>,
12245 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12246 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12247 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12248 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12249 (X86VFixupimms (_.VT _.RC:$src1),
12251 (_src3VT.VT (scalar_to_vector
12252 (_src3VT.ScalarLdFrag addr:$src3))),
12253 (i32 timm:$src4))>,
12254 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12258 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12259 AVX512VLVectorVTInfo _Vec,
12260 AVX512VLVectorVTInfo _Tbl> {
12261 let Predicates = [HasAVX512] in
12262 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12263 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12264 EVEX_4V, EVEX_V512;
12265 let Predicates = [HasAVX512, HasVLX] in {
12266 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12267 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12268 EVEX_4V, EVEX_V128;
12269 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12270 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12271 EVEX_4V, EVEX_V256;
12275 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12276 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12277 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
12278 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12279 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12280 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
12281 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12282 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12283 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12284 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
12286 // Patterns used to select SSE scalar fp arithmetic instructions from
12289 // (1) a scalar fp operation followed by a blend
12291 // The effect is that the backend no longer emits unnecessary vector
12292 // insert instructions immediately after SSE scalar fp instructions
12293 // like addss or mulss.
12295 // For example, given the following code:
12296 // __m128 foo(__m128 A, __m128 B) {
12301 // Previously we generated:
12302 // addss %xmm0, %xmm1
12303 // movss %xmm1, %xmm0
12305 // We now generate:
12306 // addss %xmm1, %xmm0
12308 // (2) a vector packed single/double fp operation followed by a vector insert
12310 // The effect is that the backend converts the packed fp instruction
12311 // followed by a vector insert into a single SSE scalar fp instruction.
12313 // For example, given the following code:
12314 // __m128 foo(__m128 A, __m128 B) {
12315 // __m128 C = A + B;
12316 // return (__m128) {c[0], a[1], a[2], a[3]};
12319 // Previously we generated:
12320 // addps %xmm0, %xmm1
12321 // movss %xmm1, %xmm0
12323 // We now generate:
12324 // addss %xmm1, %xmm0
12326 // TODO: Some canonicalization in lowering would simplify the number of
12327 // patterns we have to try to match.
12328 multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12329 string OpcPrefix, SDNode MoveNode,
12330 X86VectorVTInfo _, PatLeaf ZeroFP> {
12331 let Predicates = [HasAVX512] in {
12332 // extracted scalar math op with insert via movss
12333 def : Pat<(MoveNode
12334 (_.VT VR128X:$dst),
12335 (_.VT (scalar_to_vector
12336 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12338 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12339 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12340 def : Pat<(MoveNode
12341 (_.VT VR128X:$dst),
12342 (_.VT (scalar_to_vector
12343 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12344 (_.ScalarLdFrag addr:$src))))),
12345 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12347 // extracted masked scalar math op with insert via movss
12348 def : Pat<(MoveNode (_.VT VR128X:$src1),
12350 (X86selects_mask VK1WM:$mask,
12352 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12355 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12356 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12357 VK1WM:$mask, _.VT:$src1,
12358 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12359 def : Pat<(MoveNode (_.VT VR128X:$src1),
12361 (X86selects_mask VK1WM:$mask,
12363 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12364 (_.ScalarLdFrag addr:$src2)),
12366 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12367 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12368 VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12370 // extracted masked scalar math op with insert via movss
12371 def : Pat<(MoveNode (_.VT VR128X:$src1),
12373 (X86selects_mask VK1WM:$mask,
12375 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12376 _.FRC:$src2), (_.EltVT ZeroFP)))),
12377 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12378 VK1WM:$mask, _.VT:$src1,
12379 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12380 def : Pat<(MoveNode (_.VT VR128X:$src1),
12382 (X86selects_mask VK1WM:$mask,
12384 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12385 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12386 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12390 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12391 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12392 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12393 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12395 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12396 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12397 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12398 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12400 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12401 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12402 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12403 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12405 multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12406 SDNode Move, X86VectorVTInfo _> {
12407 let Predicates = [HasAVX512] in {
12408 def : Pat<(_.VT (Move _.VT:$dst,
12409 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12410 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12414 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12415 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12416 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12418 //===----------------------------------------------------------------------===//
12419 // AES instructions
12420 //===----------------------------------------------------------------------===//
12422 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12423 let Predicates = [HasVLX, HasVAES] in {
12424 defm Z128 : AESI_binop_rm_int<Op, OpStr,
12425 !cast<Intrinsic>(IntPrefix),
12426 loadv2i64, 0, VR128X, i128mem>,
12427 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
12428 defm Z256 : AESI_binop_rm_int<Op, OpStr,
12429 !cast<Intrinsic>(IntPrefix#"_256"),
12430 loadv4i64, 0, VR256X, i256mem>,
12431 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
12433 let Predicates = [HasAVX512, HasVAES] in
12434 defm Z : AESI_binop_rm_int<Op, OpStr,
12435 !cast<Intrinsic>(IntPrefix#"_512"),
12436 loadv8i64, 0, VR512, i512mem>,
12437 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
12440 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12441 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12442 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12443 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12445 //===----------------------------------------------------------------------===//
12446 // PCLMUL instructions - Carry less multiplication
12447 //===----------------------------------------------------------------------===//
12449 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12450 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12451 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
12453 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12454 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12455 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
12457 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12458 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12459 EVEX_CD8<64, CD8VF>, WIG;
12463 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12464 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12465 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12467 //===----------------------------------------------------------------------===//
12469 //===----------------------------------------------------------------------===//
12471 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12472 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12473 let Constraints = "$src1 = $dst",
12474 ExeDomain = VTI.ExeDomain in {
12475 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12476 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12477 "$src3, $src2", "$src2, $src3",
12478 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12479 T8PD, EVEX_4V, Sched<[sched]>;
12480 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12481 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12482 "$src3, $src2", "$src2, $src3",
12483 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12484 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12486 Sched<[sched.Folded, sched.ReadAfterFold]>;
12490 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12491 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12492 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12493 let Constraints = "$src1 = $dst",
12494 ExeDomain = VTI.ExeDomain in
12495 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12496 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12497 "${src3}"#VTI.BroadcastStr#", $src2",
12498 "$src2, ${src3}"#VTI.BroadcastStr,
12499 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12500 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12501 T8PD, EVEX_4V, EVEX_B,
12502 Sched<[sched.Folded, sched.ReadAfterFold]>;
12505 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12506 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12507 let Predicates = [HasVBMI2] in
12508 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12510 let Predicates = [HasVBMI2, HasVLX] in {
12511 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12513 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12518 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12519 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12520 let Predicates = [HasVBMI2] in
12521 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12523 let Predicates = [HasVBMI2, HasVLX] in {
12524 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12526 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12530 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12531 SDNode OpNode, X86SchedWriteWidths sched> {
12532 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12533 avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12534 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12535 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12536 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12537 avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
12540 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12541 SDNode OpNode, X86SchedWriteWidths sched> {
12542 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12543 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12544 REX_W, EVEX_CD8<16, CD8VF>;
12545 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12546 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12547 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12548 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
12552 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12553 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12554 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12555 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12558 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12559 avx512vl_i8_info, HasVBMI2>, EVEX;
12560 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12561 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12563 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12564 avx512vl_i8_info, HasVBMI2>, EVEX;
12565 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12566 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12568 //===----------------------------------------------------------------------===//
12570 //===----------------------------------------------------------------------===//
12572 let Constraints = "$src1 = $dst" in
12573 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12574 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12575 bit IsCommutable> {
12576 let ExeDomain = VTI.ExeDomain in {
12577 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12578 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12579 "$src3, $src2", "$src2, $src3",
12580 (VTI.VT (OpNode VTI.RC:$src1,
12581 VTI.RC:$src2, VTI.RC:$src3)),
12582 IsCommutable, IsCommutable>,
12583 EVEX_4V, T8PD, Sched<[sched]>;
12584 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12585 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12586 "$src3, $src2", "$src2, $src3",
12587 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12588 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12589 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12590 Sched<[sched.Folded, sched.ReadAfterFold,
12591 sched.ReadAfterFold]>;
12592 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12593 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12594 OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12595 "$src2, ${src3}"#VTI.BroadcastStr,
12596 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12597 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12598 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12599 T8PD, Sched<[sched.Folded, sched.ReadAfterFold,
12600 sched.ReadAfterFold]>;
12604 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12605 X86SchedWriteWidths sched, bit IsCommutable> {
12606 let Predicates = [HasVNNI] in
12607 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12608 IsCommutable>, EVEX_V512;
12609 let Predicates = [HasVNNI, HasVLX] in {
12610 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12611 IsCommutable>, EVEX_V256;
12612 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12613 IsCommutable>, EVEX_V128;
12617 // FIXME: Is there a better scheduler class for VPDP?
12618 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12619 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12620 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12621 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12623 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12624 let Predicates = [HasVNNI] in {
12625 def : Pat<(v16i32 (add VR512:$src1,
12626 (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12627 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12628 def : Pat<(v16i32 (add VR512:$src1,
12629 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12630 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12632 let Predicates = [HasVNNI,HasVLX] in {
12633 def : Pat<(v8i32 (add VR256X:$src1,
12634 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12635 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12636 def : Pat<(v8i32 (add VR256X:$src1,
12637 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12638 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12639 def : Pat<(v4i32 (add VR128X:$src1,
12640 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12641 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12642 def : Pat<(v4i32 (add VR128X:$src1,
12643 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12644 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12647 //===----------------------------------------------------------------------===//
12649 //===----------------------------------------------------------------------===//
12651 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12652 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12653 avx512vl_i8_info, HasBITALG>;
12654 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12655 avx512vl_i16_info, HasBITALG>, REX_W;
12657 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12658 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12660 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12661 (X86Vpshufbitqmb node:$src1, node:$src2), [{
12662 return N->hasOneUse();
12665 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12666 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12667 (ins VTI.RC:$src1, VTI.RC:$src2),
12669 "$src2, $src1", "$src1, $src2",
12670 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12671 (VTI.VT VTI.RC:$src2)),
12672 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12673 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12675 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12676 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12678 "$src2, $src1", "$src1, $src2",
12679 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12680 (VTI.VT (VTI.LdFrag addr:$src2))),
12681 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12682 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12683 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12684 Sched<[sched.Folded, sched.ReadAfterFold]>;
12687 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12688 let Predicates = [HasBITALG] in
12689 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12690 let Predicates = [HasBITALG, HasVLX] in {
12691 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12692 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12696 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12697 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12699 //===----------------------------------------------------------------------===//
12701 //===----------------------------------------------------------------------===//
12703 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12704 X86SchedWriteWidths sched> {
12705 let Predicates = [HasGFNI, HasAVX512] in
12706 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12708 let Predicates = [HasGFNI, HasVLX] in {
12709 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12711 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12716 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12718 EVEX_CD8<8, CD8VF>, T8PD;
12720 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12721 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12722 X86VectorVTInfo BcstVTI>
12723 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12724 let ExeDomain = VTI.ExeDomain in
12725 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12726 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12727 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12728 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12729 (OpNode (VTI.VT VTI.RC:$src1),
12730 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12731 (i8 timm:$src3))>, EVEX_B,
12732 Sched<[sched.Folded, sched.ReadAfterFold]>;
12735 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12736 X86SchedWriteWidths sched> {
12737 let Predicates = [HasGFNI, HasAVX512] in
12738 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12739 v64i8_info, v8i64_info>, EVEX_V512;
12740 let Predicates = [HasGFNI, HasVLX] in {
12741 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12742 v32i8x_info, v4i64x_info>, EVEX_V256;
12743 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12744 v16i8x_info, v2i64x_info>, EVEX_V128;
12748 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12749 X86GF2P8affineinvqb, SchedWriteVecIMul>,
12750 EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12751 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12752 X86GF2P8affineqb, SchedWriteVecIMul>,
12753 EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12756 //===----------------------------------------------------------------------===//
12758 //===----------------------------------------------------------------------===//
12760 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12761 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12762 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12763 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12764 "v4fmaddps", "$src3, $src2", "$src2, $src3",
12765 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12766 Sched<[SchedWriteFMA.ZMM.Folded]>;
12768 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12769 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12770 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12771 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12772 Sched<[SchedWriteFMA.ZMM.Folded]>;
12774 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12775 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12776 "v4fmaddss", "$src3, $src2", "$src2, $src3",
12777 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12778 Sched<[SchedWriteFMA.Scl.Folded]>;
12780 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12781 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12782 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12783 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12784 Sched<[SchedWriteFMA.Scl.Folded]>;
12787 //===----------------------------------------------------------------------===//
12789 //===----------------------------------------------------------------------===//
12791 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12792 Constraints = "$src1 = $dst" in {
12793 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12794 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12795 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12796 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12797 Sched<[SchedWriteFMA.ZMM.Folded]>;
12799 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12800 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12801 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12802 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12803 Sched<[SchedWriteFMA.ZMM.Folded]>;
12806 let hasSideEffects = 0 in {
12807 let mayStore = 1, SchedRW = [WriteFStoreX] in
12808 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12809 let mayLoad = 1, SchedRW = [WriteFLoadX] in
12810 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12813 //===----------------------------------------------------------------------===//
12815 //===----------------------------------------------------------------------===//
12817 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12818 def rr : I<0x68, MRMSrcReg,
12819 (outs _.KRPC:$dst),
12820 (ins _.RC:$src1, _.RC:$src2),
12821 !strconcat("vp2intersect", _.Suffix,
12822 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12823 [(set _.KRPC:$dst, (X86vp2intersect
12824 _.RC:$src1, (_.VT _.RC:$src2)))]>,
12825 EVEX_4V, T8XD, Sched<[sched]>;
12827 def rm : I<0x68, MRMSrcMem,
12828 (outs _.KRPC:$dst),
12829 (ins _.RC:$src1, _.MemOp:$src2),
12830 !strconcat("vp2intersect", _.Suffix,
12831 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12832 [(set _.KRPC:$dst, (X86vp2intersect
12833 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12834 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12835 Sched<[sched.Folded, sched.ReadAfterFold]>;
12837 def rmb : I<0x68, MRMSrcMem,
12838 (outs _.KRPC:$dst),
12839 (ins _.RC:$src1, _.ScalarMemOp:$src2),
12840 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12841 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12842 [(set _.KRPC:$dst, (X86vp2intersect
12843 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12844 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12845 Sched<[sched.Folded, sched.ReadAfterFold]>;
12848 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12849 let Predicates = [HasAVX512, HasVP2INTERSECT] in
12850 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12852 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12853 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12854 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12858 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12859 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
12861 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12862 X86SchedWriteWidths sched,
12863 AVX512VLVectorVTInfo _SrcVTInfo,
12864 AVX512VLVectorVTInfo _DstVTInfo,
12865 SDNode OpNode, Predicate prd,
12866 bit IsCommutable = 0> {
12867 let Predicates = [prd] in
12868 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12869 _SrcVTInfo.info512, _DstVTInfo.info512,
12870 _SrcVTInfo.info512, IsCommutable>,
12871 EVEX_V512, EVEX_CD8<32, CD8VF>;
12872 let Predicates = [HasVLX, prd] in {
12873 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12874 _SrcVTInfo.info256, _DstVTInfo.info256,
12875 _SrcVTInfo.info256, IsCommutable>,
12876 EVEX_V256, EVEX_CD8<32, CD8VF>;
12877 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12878 _SrcVTInfo.info128, _DstVTInfo.info128,
12879 _SrcVTInfo.info128, IsCommutable>,
12880 EVEX_V128, EVEX_CD8<32, CD8VF>;
12884 let ExeDomain = SSEPackedSingle in
12885 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12886 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12887 avx512vl_f32_info, avx512vl_bf16_info,
12888 X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12890 // Truncate Float to BFloat16
12891 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12892 X86SchedWriteWidths sched> {
12893 let ExeDomain = SSEPackedSingle in {
12894 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12895 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
12896 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12898 let Predicates = [HasBF16, HasVLX] in {
12899 let Uses = []<Register>, mayRaiseFPException = 0 in {
12900 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
12901 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12903 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
12904 X86cvtneps2bf16, X86cvtneps2bf16,
12905 sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12907 } // Predicates = [HasBF16, HasVLX]
12908 } // ExeDomain = SSEPackedSingle
12910 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12911 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12913 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12914 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12915 f128mem:$src), 0, "intel">;
12916 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12917 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12919 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12920 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12921 f256mem:$src), 0, "intel">;
12924 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12925 SchedWriteCvtPD2PS>, T8XS,
12926 EVEX_CD8<32, CD8VF>;
12928 let Predicates = [HasBF16, HasVLX] in {
12929 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12930 // patterns have been disabled with null_frag.
12931 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12932 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12933 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
12935 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12936 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
12938 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12940 def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12941 (VCVTNEPS2BF16Z128rm addr:$src)>;
12942 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
12944 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12945 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
12947 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12949 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
12950 (X86VBroadcastld32 addr:$src)))),
12951 (VCVTNEPS2BF16Z128rmb addr:$src)>;
12952 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12953 (v8bf16 VR128X:$src0), VK4WM:$mask),
12954 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12955 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12956 v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
12957 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12959 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12960 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12961 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12962 (VCVTNEPS2BF16Z128rm addr:$src)>;
12964 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12965 (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12966 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12967 (VCVTNEPS2BF16Z256rm addr:$src)>;
12969 def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12970 (VPBROADCASTWZ128rm addr:$src)>;
12971 def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12972 (VPBROADCASTWZ256rm addr:$src)>;
12974 def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12975 (VPBROADCASTWZ128rr VR128X:$src)>;
12976 def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12977 (VPBROADCASTWZ256rr VR128X:$src)>;
12979 def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
12980 (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12981 def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
12982 (VCVTNEPS2BF16Z256rm addr:$src)>;
12984 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12987 let Predicates = [HasBF16] in {
12988 def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12989 (VPBROADCASTWZrm addr:$src)>;
12991 def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12992 (VPBROADCASTWZrr VR128X:$src)>;
12994 def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
12995 (VCVTNEPS2BF16Zrr VR512:$src)>;
12996 def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
12997 (VCVTNEPS2BF16Zrm addr:$src)>;
12998 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
13001 let Constraints = "$src1 = $dst" in {
13002 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
13003 X86FoldableSchedWrite sched,
13004 X86VectorVTInfo _, X86VectorVTInfo src_v> {
13005 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13006 (ins src_v.RC:$src2, src_v.RC:$src3),
13007 OpcodeStr, "$src3, $src2", "$src2, $src3",
13008 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
13009 EVEX_4V, Sched<[sched]>;
13011 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13012 (ins src_v.RC:$src2, src_v.MemOp:$src3),
13013 OpcodeStr, "$src3, $src2", "$src2, $src3",
13014 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
13015 (src_v.LdFrag addr:$src3)))>, EVEX_4V,
13016 Sched<[sched.Folded, sched.ReadAfterFold]>;
13018 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13019 (ins src_v.RC:$src2, f32mem:$src3),
13021 !strconcat("${src3}", _.BroadcastStr,", $src2"),
13022 !strconcat("$src2, ${src3}", _.BroadcastStr),
13023 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
13024 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
13025 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
13028 } // Constraints = "$src1 = $dst"
13030 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
13031 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
13032 AVX512VLVectorVTInfo src_v, Predicate prd> {
13033 let Predicates = [prd] in {
13034 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
13035 src_v.info512>, EVEX_V512;
13037 let Predicates = [HasVLX, prd] in {
13038 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
13039 src_v.info256>, EVEX_V256;
13040 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
13041 src_v.info128>, EVEX_V128;
13045 let ExeDomain = SSEPackedSingle in
13046 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
13047 avx512vl_f32_info, avx512vl_bf16_info,
13048 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
13050 //===----------------------------------------------------------------------===//
13052 //===----------------------------------------------------------------------===//
13054 let Predicates = [HasFP16] in {
13055 // Move word ( r/m16) to Packed word
13056 def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
13057 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
13058 def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
13059 "vmovw\t{$src, $dst|$dst, $src}",
13061 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
13062 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
13064 def : Pat<(f16 (bitconvert GR16:$src)),
13065 (f16 (COPY_TO_REGCLASS
13067 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
13069 def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
13070 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
13071 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
13072 (VMOVW2SHrr GR32:$src)>;
13073 // FIXME: We should really find a way to improve these patterns.
13074 def : Pat<(v8i32 (X86vzmovl
13075 (insert_subvector undef,
13076 (v4i32 (scalar_to_vector
13077 (and GR32:$src, 0xffff))),
13079 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
13080 def : Pat<(v16i32 (X86vzmovl
13081 (insert_subvector undef,
13082 (v4i32 (scalar_to_vector
13083 (and GR32:$src, 0xffff))),
13085 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
13087 def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
13088 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
13090 // AVX 128-bit movw instruction write zeros in the high 128-bit part.
13091 def : Pat<(v8i16 (X86vzload16 addr:$src)),
13092 (VMOVWrm addr:$src)>;
13093 def : Pat<(v16i16 (X86vzload16 addr:$src)),
13094 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
13096 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
13097 def : Pat<(v32i16 (X86vzload16 addr:$src)),
13098 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
13100 def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
13101 (VMOVWrm addr:$src)>;
13102 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
13103 (VMOVWrm addr:$src)>;
13104 def : Pat<(v8i32 (X86vzmovl
13105 (insert_subvector undef,
13106 (v4i32 (scalar_to_vector
13107 (i32 (zextloadi16 addr:$src)))),
13109 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
13110 def : Pat<(v16i32 (X86vzmovl
13111 (insert_subvector undef,
13112 (v4i32 (scalar_to_vector
13113 (i32 (zextloadi16 addr:$src)))),
13115 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
13117 // Move word from xmm register to r/m16
13118 def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
13119 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
13120 def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs),
13121 (ins i16mem:$dst, VR128X:$src),
13122 "vmovw\t{$src, $dst|$dst, $src}",
13123 [(store (i16 (extractelt (v8i16 VR128X:$src),
13124 (iPTR 0))), addr:$dst)]>,
13125 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
13127 def : Pat<(i16 (bitconvert FR16X:$src)),
13128 (i16 (EXTRACT_SUBREG
13129 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
13131 def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
13132 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
13134 // Allow "vmovw" to use GR64
13135 let hasSideEffects = 0 in {
13136 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
13137 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
13138 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
13139 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
13143 // Convert 16-bit float to i16/u16
13144 multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13145 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13146 AVX512VLVectorVTInfo _Dst,
13147 AVX512VLVectorVTInfo _Src,
13148 X86SchedWriteWidths sched> {
13149 let Predicates = [HasFP16] in {
13150 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13151 OpNode, MaskOpNode, sched.ZMM>,
13152 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
13153 OpNodeRnd, sched.ZMM>, EVEX_V512;
13155 let Predicates = [HasFP16, HasVLX] in {
13156 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13157 OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13158 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13159 OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13163 // Convert 16-bit float to i16/u16 truncate
13164 multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13165 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13166 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
13167 X86SchedWriteWidths sched> {
13168 let Predicates = [HasFP16] in {
13169 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13170 OpNode, MaskOpNode, sched.ZMM>,
13171 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
13172 OpNodeRnd, sched.ZMM>, EVEX_V512;
13174 let Predicates = [HasFP16, HasVLX] in {
13175 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13176 OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13177 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13178 OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13182 defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
13183 X86cvtp2UIntRnd, avx512vl_i16_info,
13184 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13185 T_MAP5PS, EVEX_CD8<16, CD8VF>;
13186 defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
13187 X86VUintToFpRnd, avx512vl_f16_info,
13188 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13189 T_MAP5XD, EVEX_CD8<16, CD8VF>;
13190 defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
13191 X86cvttp2si, X86cvttp2siSAE,
13192 avx512vl_i16_info, avx512vl_f16_info,
13193 SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
13194 defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13195 X86cvttp2ui, X86cvttp2uiSAE,
13196 avx512vl_i16_info, avx512vl_f16_info,
13197 SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
13198 defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13199 X86cvtp2IntRnd, avx512vl_i16_info,
13200 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13201 T_MAP5PD, EVEX_CD8<16, CD8VF>;
13202 defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13203 X86VSintToFpRnd, avx512vl_f16_info,
13204 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13205 T_MAP5XS, EVEX_CD8<16, CD8VF>;
13207 // Convert Half to Signed/Unsigned Doubleword
13208 multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13209 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13210 X86SchedWriteWidths sched> {
13211 let Predicates = [HasFP16] in {
13212 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13213 MaskOpNode, sched.ZMM>,
13214 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13215 OpNodeRnd, sched.ZMM>, EVEX_V512;
13217 let Predicates = [HasFP16, HasVLX] in {
13218 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13219 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13220 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13221 MaskOpNode, sched.YMM>, EVEX_V256;
13225 // Convert Half to Signed/Unsigned Doubleword with truncation
13226 multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13227 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13228 X86SchedWriteWidths sched> {
13229 let Predicates = [HasFP16] in {
13230 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13231 MaskOpNode, sched.ZMM>,
13232 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13233 OpNodeRnd, sched.ZMM>, EVEX_V512;
13235 let Predicates = [HasFP16, HasVLX] in {
13236 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13237 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13238 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13239 MaskOpNode, sched.YMM>, EVEX_V256;
13244 defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13245 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13246 EVEX_CD8<16, CD8VH>;
13247 defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13248 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
13249 EVEX_CD8<16, CD8VH>;
13251 defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13252 X86cvttp2si, X86cvttp2siSAE,
13253 SchedWriteCvtPS2DQ>, T_MAP5XS,
13254 EVEX_CD8<16, CD8VH>;
13256 defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13257 X86cvttp2ui, X86cvttp2uiSAE,
13258 SchedWriteCvtPS2DQ>, T_MAP5PS,
13259 EVEX_CD8<16, CD8VH>;
13261 // Convert Half to Signed/Unsigned Quardword
13262 multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13263 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13264 X86SchedWriteWidths sched> {
13265 let Predicates = [HasFP16] in {
13266 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13267 MaskOpNode, sched.ZMM>,
13268 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13269 OpNodeRnd, sched.ZMM>, EVEX_V512;
13271 let Predicates = [HasFP16, HasVLX] in {
13272 // Explicitly specified broadcast string, since we take only 2 elements
13273 // from v8f16x_info source
13274 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13275 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13277 // Explicitly specified broadcast string, since we take only 4 elements
13278 // from v8f16x_info source
13279 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13280 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13285 // Convert Half to Signed/Unsigned Quardword with truncation
13286 multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13287 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13288 X86SchedWriteWidths sched> {
13289 let Predicates = [HasFP16] in {
13290 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13291 MaskOpNode, sched.ZMM>,
13292 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13293 OpNodeRnd, sched.ZMM>, EVEX_V512;
13295 let Predicates = [HasFP16, HasVLX] in {
13296 // Explicitly specified broadcast string, since we take only 2 elements
13297 // from v8f16x_info source
13298 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13299 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13300 // Explicitly specified broadcast string, since we take only 4 elements
13301 // from v8f16x_info source
13302 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13303 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13307 defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13308 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13309 EVEX_CD8<16, CD8VQ>;
13311 defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13312 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13313 EVEX_CD8<16, CD8VQ>;
13315 defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13316 X86cvttp2si, X86cvttp2siSAE,
13317 SchedWriteCvtPS2DQ>, T_MAP5PD,
13318 EVEX_CD8<16, CD8VQ>;
13320 defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13321 X86cvttp2ui, X86cvttp2uiSAE,
13322 SchedWriteCvtPS2DQ>, T_MAP5PD,
13323 EVEX_CD8<16, CD8VQ>;
13325 // Convert Signed/Unsigned Quardword to Half
13326 multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13327 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13328 X86SchedWriteWidths sched> {
13329 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13330 // 512 memory forms of these instructions in Asm Parcer. They have the same
13331 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13332 // due to the same reason.
13333 let Predicates = [HasFP16] in {
13334 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13335 MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13336 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13337 OpNodeRnd, sched.ZMM>, EVEX_V512;
13339 let Predicates = [HasFP16, HasVLX] in {
13340 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13341 null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13343 EVEX_V128, NotEVEX2VEXConvertible;
13344 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13345 null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13347 EVEX_V256, NotEVEX2VEXConvertible;
13350 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13351 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13352 VR128X:$src), 0, "att">;
13353 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13354 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13355 VK2WM:$mask, VR128X:$src), 0, "att">;
13356 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13357 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13358 VK2WM:$mask, VR128X:$src), 0, "att">;
13359 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13360 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13361 i64mem:$src), 0, "att">;
13362 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13363 "$dst {${mask}}, ${src}{1to2}}",
13364 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13365 VK2WM:$mask, i64mem:$src), 0, "att">;
13366 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13367 "$dst {${mask}} {z}, ${src}{1to2}}",
13368 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13369 VK2WM:$mask, i64mem:$src), 0, "att">;
13371 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13372 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13373 VR256X:$src), 0, "att">;
13374 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13375 "$dst {${mask}}, $src}",
13376 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13377 VK4WM:$mask, VR256X:$src), 0, "att">;
13378 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13379 "$dst {${mask}} {z}, $src}",
13380 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13381 VK4WM:$mask, VR256X:$src), 0, "att">;
13382 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13383 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13384 i64mem:$src), 0, "att">;
13385 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13386 "$dst {${mask}}, ${src}{1to4}}",
13387 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13388 VK4WM:$mask, i64mem:$src), 0, "att">;
13389 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13390 "$dst {${mask}} {z}, ${src}{1to4}}",
13391 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13392 VK4WM:$mask, i64mem:$src), 0, "att">;
13394 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13395 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13396 VR512:$src), 0, "att">;
13397 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13398 "$dst {${mask}}, $src}",
13399 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13400 VK8WM:$mask, VR512:$src), 0, "att">;
13401 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13402 "$dst {${mask}} {z}, $src}",
13403 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13404 VK8WM:$mask, VR512:$src), 0, "att">;
13405 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13406 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13407 i64mem:$src), 0, "att">;
13408 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13409 "$dst {${mask}}, ${src}{1to8}}",
13410 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13411 VK8WM:$mask, i64mem:$src), 0, "att">;
13412 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13413 "$dst {${mask}} {z}, ${src}{1to8}}",
13414 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13415 VK8WM:$mask, i64mem:$src), 0, "att">;
13418 defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13419 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5PS,
13420 EVEX_CD8<64, CD8VF>;
13422 defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13423 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5XD,
13424 EVEX_CD8<64, CD8VF>;
13426 // Convert half to signed/unsigned int 32/64
13427 defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13428 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13429 T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13430 defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13431 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13432 T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
13433 defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13434 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13435 T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13436 defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13437 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13438 T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
13440 defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13441 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13442 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13443 defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13444 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13445 "{q}", HasFP16>, REX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13446 defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13447 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13448 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13449 defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13450 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13451 "{q}", HasFP16>, T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
13453 let Predicates = [HasFP16] in {
13454 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13455 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13456 T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13457 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13458 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13459 T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>;
13460 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13461 v8f16x_info, i32mem, loadi32,
13462 "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13463 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13464 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13465 T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>;
13466 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13467 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13469 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13470 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13473 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13474 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13475 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13476 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13478 def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13479 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13480 def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13481 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13483 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13484 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13485 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13486 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13488 def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13489 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13490 def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13491 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13493 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13494 // which produce unnecessary vmovsh instructions
13495 def : Pat<(v8f16 (X86Movsh
13496 (v8f16 VR128X:$dst),
13497 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13498 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13500 def : Pat<(v8f16 (X86Movsh
13501 (v8f16 VR128X:$dst),
13502 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13503 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13505 def : Pat<(v8f16 (X86Movsh
13506 (v8f16 VR128X:$dst),
13507 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13508 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13510 def : Pat<(v8f16 (X86Movsh
13511 (v8f16 VR128X:$dst),
13512 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13513 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13515 def : Pat<(v8f16 (X86Movsh
13516 (v8f16 VR128X:$dst),
13517 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13518 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13520 def : Pat<(v8f16 (X86Movsh
13521 (v8f16 VR128X:$dst),
13522 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13523 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13525 def : Pat<(v8f16 (X86Movsh
13526 (v8f16 VR128X:$dst),
13527 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13528 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13530 def : Pat<(v8f16 (X86Movsh
13531 (v8f16 VR128X:$dst),
13532 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13533 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13534 } // Predicates = [HasFP16]
13536 let Predicates = [HasFP16, HasVLX] in {
13537 // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13538 // patterns have been disabled with null_frag.
13539 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13540 (VCVTQQ2PHZ256rr VR256X:$src)>;
13541 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13543 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13544 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13546 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13548 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13549 (VCVTQQ2PHZ256rm addr:$src)>;
13550 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13552 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13553 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13555 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13557 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13558 (VCVTQQ2PHZ256rmb addr:$src)>;
13559 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13560 (v8f16 VR128X:$src0), VK4WM:$mask),
13561 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13562 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13563 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13564 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13566 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13567 (VCVTQQ2PHZ128rr VR128X:$src)>;
13568 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13570 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13571 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13573 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13575 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13576 (VCVTQQ2PHZ128rm addr:$src)>;
13577 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13579 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13580 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13582 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13584 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13585 (VCVTQQ2PHZ128rmb addr:$src)>;
13586 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13587 (v8f16 VR128X:$src0), VK2WM:$mask),
13588 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13589 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13590 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13591 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13593 // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13594 // patterns have been disabled with null_frag.
13595 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13596 (VCVTUQQ2PHZ256rr VR256X:$src)>;
13597 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13599 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13600 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13602 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13604 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13605 (VCVTUQQ2PHZ256rm addr:$src)>;
13606 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13608 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13609 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13611 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13613 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13614 (VCVTUQQ2PHZ256rmb addr:$src)>;
13615 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13616 (v8f16 VR128X:$src0), VK4WM:$mask),
13617 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13618 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13619 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13620 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13622 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13623 (VCVTUQQ2PHZ128rr VR128X:$src)>;
13624 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13626 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13627 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13629 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13631 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13632 (VCVTUQQ2PHZ128rm addr:$src)>;
13633 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13635 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13636 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13638 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13640 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13641 (VCVTUQQ2PHZ128rmb addr:$src)>;
13642 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13643 (v8f16 VR128X:$src0), VK2WM:$mask),
13644 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13645 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13646 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13647 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13650 let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13651 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13652 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13653 (ins _.RC:$src2, _.RC:$src3),
13654 OpcodeStr, "$src3, $src2", "$src2, $src3",
13655 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;
13657 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13658 (ins _.RC:$src2, _.MemOp:$src3),
13659 OpcodeStr, "$src3, $src2", "$src2, $src3",
13660 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;
13662 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13663 (ins _.RC:$src2, _.ScalarMemOp:$src3),
13664 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13665 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
13667 } // Constraints = "@earlyclobber $dst, $src1 = $dst"
13669 multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13670 X86VectorVTInfo _> {
13671 let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13672 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13673 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13674 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13675 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13676 EVEX_4V, EVEX_B, EVEX_RC;
13680 multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13681 let Predicates = [HasFP16] in {
13682 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13683 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13684 EVEX_V512, Sched<[WriteFMAZ]>;
13686 let Predicates = [HasVLX, HasFP16] in {
13687 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13688 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13692 multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13693 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13694 let Predicates = [HasFP16] in {
13695 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13696 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13697 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13698 "", "@earlyclobber $dst">, EVEX_V512;
13700 let Predicates = [HasVLX, HasFP16] in {
13701 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13702 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13703 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13704 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13709 let Uses = [MXCSR] in {
13710 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13711 T_MAP6XS, EVEX_CD8<32, CD8VF>;
13712 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13713 T_MAP6XD, EVEX_CD8<32, CD8VF>;
13715 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13716 x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
13717 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13718 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
13722 multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13723 bit IsCommutable> {
13724 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13725 defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13726 (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13727 "$src3, $src2", "$src2, $src3",
13728 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13729 Sched<[WriteFMAX]>;
13730 defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13731 (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13732 "$src3, $src2", "$src2, $src3",
13733 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13734 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13735 defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13736 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13737 "$rc, $src3, $src2", "$src2, $src3, $rc",
13738 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13739 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13743 multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13744 SDNode OpNodeRnd, bit IsCommutable> {
13745 let Predicates = [HasFP16] in {
13746 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13747 (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13748 "$src2, $src1", "$src1, $src2",
13749 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13750 IsCommutable, IsCommutable, IsCommutable,
13751 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13752 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13753 (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13754 "$src2, $src1", "$src1, $src2",
13755 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13756 0, 0, 0, X86selects, "@earlyclobber $dst">,
13757 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13758 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13759 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13760 "$rc, $src2, $src1", "$src1, $src2, $rc",
13761 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13762 0, 0, 0, X86selects, "@earlyclobber $dst">,
13763 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13767 let Uses = [MXCSR] in {
13768 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13769 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13770 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13771 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13773 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13774 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13775 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13776 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;