1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 AVX512 instruction set, defining the
11 // instructions, and properties of the instructions which are needed for code
12 // generation, machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 // Group template arguments that can be derived from the vector type (EltNum x
17 // EltVT). These are things like the register class for the writemask, etc.
18 // The idea is to pass one of these as the template argument rather than the
19 // individual arguments.
20 // The template is also used for scalar types, in this case numelts is 1.
21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
23 RegisterClass RC = rc;
24 ValueType EltVT = eltvt;
25 int NumElts = numelts;
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
34 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
36 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
39 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
48 ValueType VT = !cast<ValueType>(VTName);
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
52 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
55 // "i" for integer types and "f" for floating-point types
56 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
58 // Size of RC in bits, e.g. 512 for VR512.
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70 // due to load promotion during legalization
71 PatFrag LdFrag = !cast<PatFrag>("load" #
72 !if (!eq (TypeVariantName, "i"),
73 !if (!eq (Size, 128), "v2i64",
74 !if (!eq (Size, 256), "v4i64",
75 !if (!eq (Size, 512), "v8i64",
78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79 !if (!eq (TypeVariantName, "i"),
80 !if (!eq (Size, 128), "v2i64",
81 !if (!eq (Size, 256), "v4i64",
82 !if (!eq (Size, 512), "v8i64",
85 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
87 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88 !cast<ComplexPattern>("sse_load_f32"),
89 !if (!eq (EltTypeName, "f64"),
90 !cast<ComplexPattern>("sse_load_f64"),
93 // The string to specify embedded broadcast in assembly.
94 string BroadcastStr = "{1to" # NumElts # "}";
96 // 8-bit compressed displacement tuple/subvector format. This is only
97 // defined for NumElts <= 8.
98 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
99 !cast<CD8VForm>("CD8VT" # NumElts), ?);
101 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
102 !if (!eq (Size, 256), sub_ymm, ?));
104 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
105 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
108 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
110 // A vector tye of the same width with element type i64. This is used to
111 // create patterns for logic ops.
112 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
114 // A vector type of the same width with element type i32. This is used to
115 // create the canonical constant zero node ImmAllZerosV.
116 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
117 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
119 string ZSuffix = !if (!eq (Size, 128), "Z128",
120 !if (!eq (Size, 256), "Z256", "Z"));
123 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
124 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
125 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
126 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
127 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
128 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
130 // "x" in v32i8x_info means RC = VR256X
131 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
132 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
133 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
134 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
135 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
136 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
138 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
139 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
140 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
141 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
142 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
143 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
145 // We map scalar types to the smallest (128-bit) vector type
146 // with the appropriate element type. This allows to use the same masking logic.
147 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
148 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
149 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
150 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
152 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
153 X86VectorVTInfo i128> {
154 X86VectorVTInfo info512 = i512;
155 X86VectorVTInfo info256 = i256;
156 X86VectorVTInfo info128 = i128;
159 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
161 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
163 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
165 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
167 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
169 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
172 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
174 RegisterClass KRC = _krc;
175 RegisterClass KRCWM = _krcwm;
179 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
180 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
181 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
182 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
183 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
184 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
185 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
187 // This multiclass generates the masking variants from the non-masking
188 // variant. It only provides the assembly pieces for the masking variants.
189 // It assumes custom ISel patterns for masking which can be provided as
190 // template arguments.
191 multiclass AVX512_maskable_custom<bits<8> O, Format F,
193 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
195 string AttSrcAsm, string IntelSrcAsm,
197 list<dag> MaskingPattern,
198 list<dag> ZeroMaskingPattern,
199 string MaskingConstraint = "",
200 bit IsCommutable = 0,
201 bit IsKCommutable = 0,
202 bit IsKZCommutable = IsCommutable> {
203 let isCommutable = IsCommutable in
204 def NAME: AVX512<O, F, Outs, Ins,
205 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
206 "$dst, "#IntelSrcAsm#"}",
209 // Prefer over VMOV*rrk Pat<>
210 let isCommutable = IsKCommutable in
211 def NAME#k: AVX512<O, F, Outs, MaskingIns,
212 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
213 "$dst {${mask}}, "#IntelSrcAsm#"}",
216 // In case of the 3src subclass this is overridden with a let.
217 string Constraints = MaskingConstraint;
220 // Zero mask does not add any restrictions to commute operands transformation.
221 // So, it is Ok to use IsCommutable instead of IsKCommutable.
222 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
223 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
224 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
225 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
231 // Common base class of AVX512_maskable and AVX512_maskable_3src.
232 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
234 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
236 string AttSrcAsm, string IntelSrcAsm,
237 dag RHS, dag MaskingRHS,
238 SDNode Select = vselect,
239 string MaskingConstraint = "",
240 bit IsCommutable = 0,
241 bit IsKCommutable = 0,
242 bit IsKZCommutable = IsCommutable> :
243 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
244 AttSrcAsm, IntelSrcAsm,
245 [(set _.RC:$dst, RHS)],
246 [(set _.RC:$dst, MaskingRHS)],
248 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
249 MaskingConstraint, IsCommutable,
250 IsKCommutable, IsKZCommutable>;
252 // This multiclass generates the unconditional/non-masking, the masking and
253 // the zero-masking variant of the vector instruction. In the masking case, the
254 // perserved vector elements come from a new dummy input operand tied to $dst.
255 // This version uses a separate dag for non-masking and masking.
256 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
257 dag Outs, dag Ins, string OpcodeStr,
258 string AttSrcAsm, string IntelSrcAsm,
259 dag RHS, dag MaskRHS,
260 bit IsCommutable = 0, bit IsKCommutable = 0,
261 SDNode Select = vselect> :
262 AVX512_maskable_custom<O, F, Outs, Ins,
263 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
264 !con((ins _.KRCWM:$mask), Ins),
265 OpcodeStr, AttSrcAsm, IntelSrcAsm,
266 [(set _.RC:$dst, RHS)],
268 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
270 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
271 "$src0 = $dst", IsCommutable, IsKCommutable>;
273 // This multiclass generates the unconditional/non-masking, the masking and
274 // the zero-masking variant of the vector instruction. In the masking case, the
275 // perserved vector elements come from a new dummy input operand tied to $dst.
276 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
277 dag Outs, dag Ins, string OpcodeStr,
278 string AttSrcAsm, string IntelSrcAsm,
280 bit IsCommutable = 0, bit IsKCommutable = 0,
281 bit IsKZCommutable = IsCommutable,
282 SDNode Select = vselect> :
283 AVX512_maskable_common<O, F, _, Outs, Ins,
284 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
285 !con((ins _.KRCWM:$mask), Ins),
286 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
287 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
288 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
291 // This multiclass generates the unconditional/non-masking, the masking and
292 // the zero-masking variant of the scalar instruction.
293 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
294 dag Outs, dag Ins, string OpcodeStr,
295 string AttSrcAsm, string IntelSrcAsm,
297 bit IsCommutable = 0> :
298 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
299 RHS, IsCommutable, 0, IsCommutable, X86selects>;
301 // Similar to AVX512_maskable but in this case one of the source operands
302 // ($src1) is already tied to $dst so we just use that for the preserved
303 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
305 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
306 dag Outs, dag NonTiedIns, string OpcodeStr,
307 string AttSrcAsm, string IntelSrcAsm,
309 bit IsCommutable = 0,
310 bit IsKCommutable = 0,
311 SDNode Select = vselect,
313 AVX512_maskable_common<O, F, _, Outs,
314 !con((ins _.RC:$src1), NonTiedIns),
315 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
316 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
317 OpcodeStr, AttSrcAsm, IntelSrcAsm,
318 !if(MaskOnly, (null_frag), RHS),
319 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
320 Select, "", IsCommutable, IsKCommutable>;
322 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
323 // operand differs from the output VT. This requires a bitconvert on
324 // the preserved vector going into the vselect.
325 // NOTE: The unmasked pattern is disabled.
326 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
327 X86VectorVTInfo InVT,
328 dag Outs, dag NonTiedIns, string OpcodeStr,
329 string AttSrcAsm, string IntelSrcAsm,
330 dag RHS, bit IsCommutable = 0> :
331 AVX512_maskable_common<O, F, OutVT, Outs,
332 !con((ins InVT.RC:$src1), NonTiedIns),
333 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
334 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
335 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
336 (vselect InVT.KRCWM:$mask, RHS,
337 (bitconvert InVT.RC:$src1)),
338 vselect, "", IsCommutable>;
340 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341 dag Outs, dag NonTiedIns, string OpcodeStr,
342 string AttSrcAsm, string IntelSrcAsm,
344 bit IsCommutable = 0,
345 bit IsKCommutable = 0,
347 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
348 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
349 X86selects, MaskOnly>;
351 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
354 string AttSrcAsm, string IntelSrcAsm,
356 AVX512_maskable_custom<O, F, Outs, Ins,
357 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
358 !con((ins _.KRCWM:$mask), Ins),
359 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
362 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
363 dag Outs, dag NonTiedIns,
365 string AttSrcAsm, string IntelSrcAsm,
367 AVX512_maskable_custom<O, F, Outs,
368 !con((ins _.RC:$src1), NonTiedIns),
369 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
370 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
371 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
374 // Instruction with mask that puts result in mask register,
375 // like "compare" and "vptest"
376 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
378 dag Ins, dag MaskingIns,
380 string AttSrcAsm, string IntelSrcAsm,
382 list<dag> MaskingPattern,
383 bit IsCommutable = 0> {
384 let isCommutable = IsCommutable in
385 def NAME: AVX512<O, F, Outs, Ins,
386 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
387 "$dst, "#IntelSrcAsm#"}",
390 def NAME#k: AVX512<O, F, Outs, MaskingIns,
391 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
392 "$dst {${mask}}, "#IntelSrcAsm#"}",
393 MaskingPattern>, EVEX_K;
396 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
398 dag Ins, dag MaskingIns,
400 string AttSrcAsm, string IntelSrcAsm,
401 dag RHS, dag MaskingRHS,
402 bit IsCommutable = 0> :
403 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
404 AttSrcAsm, IntelSrcAsm,
405 [(set _.KRC:$dst, RHS)],
406 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
408 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
409 dag Outs, dag Ins, string OpcodeStr,
410 string AttSrcAsm, string IntelSrcAsm,
411 dag RHS, bit IsCommutable = 0> :
412 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
413 !con((ins _.KRCWM:$mask), Ins),
414 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
415 (and _.KRCWM:$mask, RHS), IsCommutable>;
417 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
418 dag Outs, dag Ins, string OpcodeStr,
419 string AttSrcAsm, string IntelSrcAsm> :
420 AVX512_maskable_custom_cmp<O, F, Outs,
421 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
422 AttSrcAsm, IntelSrcAsm, [], []>;
424 // This multiclass generates the unconditional/non-masking, the masking and
425 // the zero-masking variant of the vector instruction. In the masking case, the
426 // perserved vector elements come from a new dummy input operand tied to $dst.
427 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
428 dag Outs, dag Ins, string OpcodeStr,
429 string AttSrcAsm, string IntelSrcAsm,
430 dag RHS, dag MaskedRHS,
431 bit IsCommutable = 0, SDNode Select = vselect> :
432 AVX512_maskable_custom<O, F, Outs, Ins,
433 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
434 !con((ins _.KRCWM:$mask), Ins),
435 OpcodeStr, AttSrcAsm, IntelSrcAsm,
436 [(set _.RC:$dst, RHS)],
438 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
440 (Select _.KRCWM:$mask, MaskedRHS,
442 "$src0 = $dst", IsCommutable>;
445 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
446 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
447 // swizzled by ExecutionDomainFix to pxor.
448 // We set canFoldAsLoad because this can be converted to a constant-pool
449 // load of an all-zeros value if folding it would be beneficial.
450 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
451 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
452 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
453 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
454 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
455 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
458 // Alias instructions that allow VPTERNLOG to be used with a mask to create
459 // a mix of all ones and all zeros elements. This is done this way to force
460 // the same register to be used as input for all three sources.
461 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
462 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
463 (ins VK16WM:$mask), "",
464 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
465 (v16i32 immAllOnesV),
466 (v16i32 immAllZerosV)))]>;
467 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
468 (ins VK8WM:$mask), "",
469 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
470 (bc_v8i64 (v16i32 immAllOnesV)),
471 (bc_v8i64 (v16i32 immAllZerosV))))]>;
474 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
475 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
476 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
477 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
478 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
479 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
482 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
483 // This is expanded by ExpandPostRAPseudos.
484 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
485 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
486 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
487 [(set FR32X:$dst, fp32imm0)]>;
488 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
489 [(set FR64X:$dst, fpimm0)]>;
492 //===----------------------------------------------------------------------===//
493 // AVX-512 - VECTOR INSERT
496 // Supports two different pattern operators for mask and unmasked ops. Allows
497 // null_frag to be passed for one.
498 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
500 SDPatternOperator vinsert_insert,
501 SDPatternOperator vinsert_for_mask,
502 X86FoldableSchedWrite sched> {
503 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
504 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
505 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
506 "vinsert" # From.EltTypeName # "x" # From.NumElts,
507 "$src3, $src2, $src1", "$src1, $src2, $src3",
508 (vinsert_insert:$src3 (To.VT To.RC:$src1),
509 (From.VT From.RC:$src2),
511 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
512 (From.VT From.RC:$src2),
514 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
516 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
517 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
518 "vinsert" # From.EltTypeName # "x" # From.NumElts,
519 "$src3, $src2, $src1", "$src1, $src2, $src3",
520 (vinsert_insert:$src3 (To.VT To.RC:$src1),
521 (From.VT (bitconvert (From.LdFrag addr:$src2))),
523 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
524 (From.VT (bitconvert (From.LdFrag addr:$src2))),
525 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
526 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
527 Sched<[sched.Folded, sched.ReadAfterFold]>;
531 // Passes the same pattern operator for masked and unmasked ops.
532 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
534 SDPatternOperator vinsert_insert,
535 X86FoldableSchedWrite sched> :
536 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
538 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
539 X86VectorVTInfo To, PatFrag vinsert_insert,
540 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
541 let Predicates = p in {
542 def : Pat<(vinsert_insert:$ins
543 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
544 (To.VT (!cast<Instruction>(InstrStr#"rr")
545 To.RC:$src1, From.RC:$src2,
546 (INSERT_get_vinsert_imm To.RC:$ins)))>;
548 def : Pat<(vinsert_insert:$ins
550 (From.VT (bitconvert (From.LdFrag addr:$src2))),
552 (To.VT (!cast<Instruction>(InstrStr#"rm")
553 To.RC:$src1, addr:$src2,
554 (INSERT_get_vinsert_imm To.RC:$ins)))>;
558 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
559 ValueType EltVT64, int Opcode256,
560 X86FoldableSchedWrite sched> {
562 let Predicates = [HasVLX] in
563 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
564 X86VectorVTInfo< 4, EltVT32, VR128X>,
565 X86VectorVTInfo< 8, EltVT32, VR256X>,
566 vinsert128_insert, sched>, EVEX_V256;
568 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
569 X86VectorVTInfo< 4, EltVT32, VR128X>,
570 X86VectorVTInfo<16, EltVT32, VR512>,
571 vinsert128_insert, sched>, EVEX_V512;
573 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
574 X86VectorVTInfo< 4, EltVT64, VR256X>,
575 X86VectorVTInfo< 8, EltVT64, VR512>,
576 vinsert256_insert, sched>, VEX_W, EVEX_V512;
578 // Even with DQI we'd like to only use these instructions for masking.
579 let Predicates = [HasVLX, HasDQI] in
580 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
581 X86VectorVTInfo< 2, EltVT64, VR128X>,
582 X86VectorVTInfo< 4, EltVT64, VR256X>,
583 null_frag, vinsert128_insert, sched>,
586 // Even with DQI we'd like to only use these instructions for masking.
587 let Predicates = [HasDQI] in {
588 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
589 X86VectorVTInfo< 2, EltVT64, VR128X>,
590 X86VectorVTInfo< 8, EltVT64, VR512>,
591 null_frag, vinsert128_insert, sched>,
594 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
595 X86VectorVTInfo< 8, EltVT32, VR256X>,
596 X86VectorVTInfo<16, EltVT32, VR512>,
597 null_frag, vinsert256_insert, sched>,
602 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
603 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
604 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
606 // Codegen pattern with the alternative types,
607 // Even with AVX512DQ we'll still use these for unmasked operations.
608 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
609 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
610 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
611 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
613 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
614 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
615 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
616 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
618 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
619 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
620 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
621 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
623 // Codegen pattern with the alternative types insert VEC128 into VEC256
624 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
625 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
626 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
627 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
628 // Codegen pattern with the alternative types insert VEC128 into VEC512
629 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
630 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
631 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
632 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
633 // Codegen pattern with the alternative types insert VEC256 into VEC512
634 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
635 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
636 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
637 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
640 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
641 X86VectorVTInfo To, X86VectorVTInfo Cast,
642 PatFrag vinsert_insert,
643 SDNodeXForm INSERT_get_vinsert_imm,
645 let Predicates = p in {
647 (vselect Cast.KRCWM:$mask,
649 (vinsert_insert:$ins (To.VT To.RC:$src1),
650 (From.VT From.RC:$src2),
653 (!cast<Instruction>(InstrStr#"rrk")
654 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
655 (INSERT_get_vinsert_imm To.RC:$ins))>;
657 (vselect Cast.KRCWM:$mask,
659 (vinsert_insert:$ins (To.VT To.RC:$src1),
662 (From.LdFrag addr:$src2))),
665 (!cast<Instruction>(InstrStr#"rmk")
666 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
667 (INSERT_get_vinsert_imm To.RC:$ins))>;
670 (vselect Cast.KRCWM:$mask,
672 (vinsert_insert:$ins (To.VT To.RC:$src1),
673 (From.VT From.RC:$src2),
676 (!cast<Instruction>(InstrStr#"rrkz")
677 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
678 (INSERT_get_vinsert_imm To.RC:$ins))>;
680 (vselect Cast.KRCWM:$mask,
682 (vinsert_insert:$ins (To.VT To.RC:$src1),
685 (From.LdFrag addr:$src2))),
688 (!cast<Instruction>(InstrStr#"rmkz")
689 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
690 (INSERT_get_vinsert_imm To.RC:$ins))>;
694 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
695 v8f32x_info, vinsert128_insert,
696 INSERT_get_vinsert128_imm, [HasVLX]>;
697 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
698 v4f64x_info, vinsert128_insert,
699 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
701 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
702 v8i32x_info, vinsert128_insert,
703 INSERT_get_vinsert128_imm, [HasVLX]>;
704 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
705 v8i32x_info, vinsert128_insert,
706 INSERT_get_vinsert128_imm, [HasVLX]>;
707 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
708 v8i32x_info, vinsert128_insert,
709 INSERT_get_vinsert128_imm, [HasVLX]>;
710 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
711 v4i64x_info, vinsert128_insert,
712 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
713 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
714 v4i64x_info, vinsert128_insert,
715 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
716 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
717 v4i64x_info, vinsert128_insert,
718 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
720 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
721 v16f32_info, vinsert128_insert,
722 INSERT_get_vinsert128_imm, [HasAVX512]>;
723 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
724 v8f64_info, vinsert128_insert,
725 INSERT_get_vinsert128_imm, [HasDQI]>;
727 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
728 v16i32_info, vinsert128_insert,
729 INSERT_get_vinsert128_imm, [HasAVX512]>;
730 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
731 v16i32_info, vinsert128_insert,
732 INSERT_get_vinsert128_imm, [HasAVX512]>;
733 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
734 v16i32_info, vinsert128_insert,
735 INSERT_get_vinsert128_imm, [HasAVX512]>;
736 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
737 v8i64_info, vinsert128_insert,
738 INSERT_get_vinsert128_imm, [HasDQI]>;
739 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
740 v8i64_info, vinsert128_insert,
741 INSERT_get_vinsert128_imm, [HasDQI]>;
742 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
743 v8i64_info, vinsert128_insert,
744 INSERT_get_vinsert128_imm, [HasDQI]>;
746 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
747 v16f32_info, vinsert256_insert,
748 INSERT_get_vinsert256_imm, [HasDQI]>;
749 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
750 v8f64_info, vinsert256_insert,
751 INSERT_get_vinsert256_imm, [HasAVX512]>;
753 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
754 v16i32_info, vinsert256_insert,
755 INSERT_get_vinsert256_imm, [HasDQI]>;
756 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
757 v16i32_info, vinsert256_insert,
758 INSERT_get_vinsert256_imm, [HasDQI]>;
759 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
760 v16i32_info, vinsert256_insert,
761 INSERT_get_vinsert256_imm, [HasDQI]>;
762 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
763 v8i64_info, vinsert256_insert,
764 INSERT_get_vinsert256_imm, [HasAVX512]>;
765 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
766 v8i64_info, vinsert256_insert,
767 INSERT_get_vinsert256_imm, [HasAVX512]>;
768 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
769 v8i64_info, vinsert256_insert,
770 INSERT_get_vinsert256_imm, [HasAVX512]>;
772 // vinsertps - insert f32 to XMM
773 let ExeDomain = SSEPackedSingle in {
774 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
775 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
776 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
777 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
778 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
779 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
780 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
781 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
782 [(set VR128X:$dst, (X86insertps VR128X:$src1,
783 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
785 EVEX_4V, EVEX_CD8<32, CD8VT1>,
786 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
789 //===----------------------------------------------------------------------===//
790 // AVX-512 VECTOR EXTRACT
793 // Supports two different pattern operators for mask and unmasked ops. Allows
794 // null_frag to be passed for one.
795 multiclass vextract_for_size_split<int Opcode,
796 X86VectorVTInfo From, X86VectorVTInfo To,
797 SDPatternOperator vextract_extract,
798 SDPatternOperator vextract_for_mask,
799 SchedWrite SchedRR, SchedWrite SchedMR> {
801 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
802 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
803 (ins From.RC:$src1, u8imm:$idx),
804 "vextract" # To.EltTypeName # "x" # To.NumElts,
805 "$idx, $src1", "$src1, $idx",
806 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
807 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
808 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
810 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
811 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
812 "vextract" # To.EltTypeName # "x" # To.NumElts #
813 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
814 [(store (To.VT (vextract_extract:$idx
815 (From.VT From.RC:$src1), (iPTR imm))),
819 let mayStore = 1, hasSideEffects = 0 in
820 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
821 (ins To.MemOp:$dst, To.KRCWM:$mask,
822 From.RC:$src1, u8imm:$idx),
823 "vextract" # To.EltTypeName # "x" # To.NumElts #
824 "\t{$idx, $src1, $dst {${mask}}|"
825 "$dst {${mask}}, $src1, $idx}", []>,
826 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
830 // Passes the same pattern operator for masked and unmasked ops.
831 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
833 SDPatternOperator vextract_extract,
834 SchedWrite SchedRR, SchedWrite SchedMR> :
835 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
837 // Codegen pattern for the alternative types
838 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
839 X86VectorVTInfo To, PatFrag vextract_extract,
840 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
841 let Predicates = p in {
842 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
843 (To.VT (!cast<Instruction>(InstrStr#"rr")
845 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
846 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
847 (iPTR imm))), addr:$dst),
848 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
849 (EXTRACT_get_vextract_imm To.RC:$ext))>;
853 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
854 ValueType EltVT64, int Opcode256,
855 SchedWrite SchedRR, SchedWrite SchedMR> {
856 let Predicates = [HasAVX512] in {
857 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
858 X86VectorVTInfo<16, EltVT32, VR512>,
859 X86VectorVTInfo< 4, EltVT32, VR128X>,
860 vextract128_extract, SchedRR, SchedMR>,
861 EVEX_V512, EVEX_CD8<32, CD8VT4>;
862 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
863 X86VectorVTInfo< 8, EltVT64, VR512>,
864 X86VectorVTInfo< 4, EltVT64, VR256X>,
865 vextract256_extract, SchedRR, SchedMR>,
866 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
868 let Predicates = [HasVLX] in
869 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
870 X86VectorVTInfo< 8, EltVT32, VR256X>,
871 X86VectorVTInfo< 4, EltVT32, VR128X>,
872 vextract128_extract, SchedRR, SchedMR>,
873 EVEX_V256, EVEX_CD8<32, CD8VT4>;
875 // Even with DQI we'd like to only use these instructions for masking.
876 let Predicates = [HasVLX, HasDQI] in
877 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
878 X86VectorVTInfo< 4, EltVT64, VR256X>,
879 X86VectorVTInfo< 2, EltVT64, VR128X>,
880 null_frag, vextract128_extract, SchedRR, SchedMR>,
881 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
883 // Even with DQI we'd like to only use these instructions for masking.
884 let Predicates = [HasDQI] in {
885 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
886 X86VectorVTInfo< 8, EltVT64, VR512>,
887 X86VectorVTInfo< 2, EltVT64, VR128X>,
888 null_frag, vextract128_extract, SchedRR, SchedMR>,
889 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
890 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
891 X86VectorVTInfo<16, EltVT32, VR512>,
892 X86VectorVTInfo< 8, EltVT32, VR256X>,
893 null_frag, vextract256_extract, SchedRR, SchedMR>,
894 EVEX_V512, EVEX_CD8<32, CD8VT8>;
898 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
899 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
900 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
902 // extract_subvector codegen patterns with the alternative types.
903 // Even with AVX512DQ we'll still use these for unmasked operations.
904 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
905 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
906 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
907 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
909 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
910 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
911 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
912 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
914 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
915 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
916 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
917 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
919 // Codegen pattern with the alternative types extract VEC128 from VEC256
920 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
921 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
922 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
923 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
925 // Codegen pattern with the alternative types extract VEC128 from VEC512
926 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
927 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
928 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
929 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
930 // Codegen pattern with the alternative types extract VEC256 from VEC512
931 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
932 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
933 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
934 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
937 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
938 // smaller extract to enable EVEX->VEX.
939 let Predicates = [NoVLX] in {
940 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
941 (v2i64 (VEXTRACTI128rr
942 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
944 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
945 (v2f64 (VEXTRACTF128rr
946 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
948 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
949 (v4i32 (VEXTRACTI128rr
950 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
952 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
953 (v4f32 (VEXTRACTF128rr
954 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
956 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
957 (v8i16 (VEXTRACTI128rr
958 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
960 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
961 (v16i8 (VEXTRACTI128rr
962 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
966 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
967 // smaller extract to enable EVEX->VEX.
968 let Predicates = [HasVLX] in {
969 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
970 (v2i64 (VEXTRACTI32x4Z256rr
971 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
973 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
974 (v2f64 (VEXTRACTF32x4Z256rr
975 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
977 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
978 (v4i32 (VEXTRACTI32x4Z256rr
979 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
981 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
982 (v4f32 (VEXTRACTF32x4Z256rr
983 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
985 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
986 (v8i16 (VEXTRACTI32x4Z256rr
987 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
989 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
990 (v16i8 (VEXTRACTI32x4Z256rr
991 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
996 // Additional patterns for handling a bitcast between the vselect and the
997 // extract_subvector.
998 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
999 X86VectorVTInfo To, X86VectorVTInfo Cast,
1000 PatFrag vextract_extract,
1001 SDNodeXForm EXTRACT_get_vextract_imm,
1002 list<Predicate> p> {
1003 let Predicates = p in {
1004 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1006 (To.VT (vextract_extract:$ext
1007 (From.VT From.RC:$src), (iPTR imm)))),
1009 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1010 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1011 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1013 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1015 (To.VT (vextract_extract:$ext
1016 (From.VT From.RC:$src), (iPTR imm)))),
1017 Cast.ImmAllZerosV)),
1018 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1019 Cast.KRCWM:$mask, From.RC:$src,
1020 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1024 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1025 v4f32x_info, vextract128_extract,
1026 EXTRACT_get_vextract128_imm, [HasVLX]>;
1027 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1028 v2f64x_info, vextract128_extract,
1029 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1031 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1032 v4i32x_info, vextract128_extract,
1033 EXTRACT_get_vextract128_imm, [HasVLX]>;
1034 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1035 v4i32x_info, vextract128_extract,
1036 EXTRACT_get_vextract128_imm, [HasVLX]>;
1037 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1038 v4i32x_info, vextract128_extract,
1039 EXTRACT_get_vextract128_imm, [HasVLX]>;
1040 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1041 v2i64x_info, vextract128_extract,
1042 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1043 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1044 v2i64x_info, vextract128_extract,
1045 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1046 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1047 v2i64x_info, vextract128_extract,
1048 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1050 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1051 v4f32x_info, vextract128_extract,
1052 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1053 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1054 v2f64x_info, vextract128_extract,
1055 EXTRACT_get_vextract128_imm, [HasDQI]>;
1057 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1058 v4i32x_info, vextract128_extract,
1059 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1060 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1061 v4i32x_info, vextract128_extract,
1062 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1063 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1064 v4i32x_info, vextract128_extract,
1065 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1067 v2i64x_info, vextract128_extract,
1068 EXTRACT_get_vextract128_imm, [HasDQI]>;
1069 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1070 v2i64x_info, vextract128_extract,
1071 EXTRACT_get_vextract128_imm, [HasDQI]>;
1072 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1073 v2i64x_info, vextract128_extract,
1074 EXTRACT_get_vextract128_imm, [HasDQI]>;
1076 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1077 v8f32x_info, vextract256_extract,
1078 EXTRACT_get_vextract256_imm, [HasDQI]>;
1079 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1080 v4f64x_info, vextract256_extract,
1081 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1083 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1084 v8i32x_info, vextract256_extract,
1085 EXTRACT_get_vextract256_imm, [HasDQI]>;
1086 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1087 v8i32x_info, vextract256_extract,
1088 EXTRACT_get_vextract256_imm, [HasDQI]>;
1089 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1090 v8i32x_info, vextract256_extract,
1091 EXTRACT_get_vextract256_imm, [HasDQI]>;
1092 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1093 v4i64x_info, vextract256_extract,
1094 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1095 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1096 v4i64x_info, vextract256_extract,
1097 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1098 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1099 v4i64x_info, vextract256_extract,
1100 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1102 // vextractps - extract 32 bits from XMM
1103 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1104 (ins VR128X:$src1, u8imm:$src2),
1105 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1106 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1107 EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1109 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1110 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1111 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1112 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1114 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1116 //===---------------------------------------------------------------------===//
1117 // AVX-512 BROADCAST
1119 // broadcast with a scalar argument.
1120 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1122 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1123 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1124 (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1125 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1126 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1127 (X86VBroadcast SrcInfo.FRC:$src),
1128 DestInfo.RC:$src0)),
1129 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1130 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1131 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1132 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1133 (X86VBroadcast SrcInfo.FRC:$src),
1134 DestInfo.ImmAllZerosV)),
1135 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1136 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1139 // Split version to allow mask and broadcast node to be different types. This
1140 // helps support the 32x2 broadcasts.
1141 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1143 SchedWrite SchedRR, SchedWrite SchedRM,
1144 X86VectorVTInfo MaskInfo,
1145 X86VectorVTInfo DestInfo,
1146 X86VectorVTInfo SrcInfo,
1147 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1148 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1149 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1150 (outs MaskInfo.RC:$dst),
1151 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1155 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1159 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1160 T8PD, EVEX, Sched<[SchedRR]>;
1162 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1163 (outs MaskInfo.RC:$dst),
1164 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1167 (DestInfo.VT (UnmaskedOp
1168 (SrcInfo.ScalarLdFrag addr:$src))))),
1171 (DestInfo.VT (X86VBroadcast
1172 (SrcInfo.ScalarLdFrag addr:$src)))))>,
1173 T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1177 def : Pat<(MaskInfo.VT
1179 (DestInfo.VT (UnmaskedOp
1180 (SrcInfo.VT (scalar_to_vector
1181 (SrcInfo.ScalarLdFrag addr:$src))))))),
1182 (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
1183 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1187 (SrcInfo.VT (scalar_to_vector
1188 (SrcInfo.ScalarLdFrag addr:$src)))))),
1189 MaskInfo.RC:$src0)),
1190 (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
1191 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1192 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1196 (SrcInfo.VT (scalar_to_vector
1197 (SrcInfo.ScalarLdFrag addr:$src)))))),
1198 MaskInfo.ImmAllZerosV)),
1199 (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
1200 MaskInfo.KRCWM:$mask, addr:$src)>;
1203 // Helper class to force mask and broadcast result to same type.
1204 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1205 SchedWrite SchedRR, SchedWrite SchedRM,
1206 X86VectorVTInfo DestInfo,
1207 X86VectorVTInfo SrcInfo> :
1208 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1209 DestInfo, DestInfo, SrcInfo>;
1211 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1212 AVX512VLVectorVTInfo _> {
1213 let Predicates = [HasAVX512] in {
1214 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1215 WriteFShuffle256Ld, _.info512, _.info128>,
1216 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1221 let Predicates = [HasVLX] in {
1222 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1223 WriteFShuffle256Ld, _.info256, _.info128>,
1224 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1230 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1231 AVX512VLVectorVTInfo _> {
1232 let Predicates = [HasAVX512] in {
1233 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1234 WriteFShuffle256Ld, _.info512, _.info128>,
1235 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1240 let Predicates = [HasVLX] in {
1241 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1242 WriteFShuffle256Ld, _.info256, _.info128>,
1243 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1246 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1247 WriteFShuffle256Ld, _.info128, _.info128>,
1248 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1253 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1255 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1256 avx512vl_f64_info>, VEX_W1X;
1258 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1259 X86VectorVTInfo _, SDPatternOperator OpNode,
1260 RegisterClass SrcRC> {
1261 let ExeDomain = _.ExeDomain in
1262 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1264 "vpbroadcast"##_.Suffix, "$src", "$src",
1265 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1269 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1270 X86VectorVTInfo _, SDPatternOperator OpNode,
1271 RegisterClass SrcRC, SubRegIndex Subreg> {
1272 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1273 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1274 (outs _.RC:$dst), (ins GR32:$src),
1275 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1276 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1277 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1278 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1280 def : Pat <(_.VT (OpNode SrcRC:$src)),
1281 (!cast<Instruction>(Name#r)
1282 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1284 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1285 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1286 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1288 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1289 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1290 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1293 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1294 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1295 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1296 let Predicates = [prd] in
1297 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1298 OpNode, SrcRC, Subreg>, EVEX_V512;
1299 let Predicates = [prd, HasVLX] in {
1300 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1301 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1302 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1303 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1307 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1308 SDPatternOperator OpNode,
1309 RegisterClass SrcRC, Predicate prd> {
1310 let Predicates = [prd] in
1311 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1313 let Predicates = [prd, HasVLX] in {
1314 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1316 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1321 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1322 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1323 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1324 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1326 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1327 X86VBroadcast, GR32, HasAVX512>;
1328 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1329 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1331 // Provide aliases for broadcast from the same register class that
1332 // automatically does the extract.
1333 multiclass avx512_int_broadcast_rm_lowering<string Name,
1334 X86VectorVTInfo DestInfo,
1335 X86VectorVTInfo SrcInfo,
1336 X86VectorVTInfo ExtInfo> {
1337 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1338 (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
1339 (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>;
1342 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1343 AVX512VLVectorVTInfo _, Predicate prd> {
1344 let Predicates = [prd] in {
1345 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1346 WriteShuffle256Ld, _.info512, _.info128>,
1347 avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>,
1349 // Defined separately to avoid redefinition.
1350 defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>;
1352 let Predicates = [prd, HasVLX] in {
1353 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1354 WriteShuffle256Ld, _.info256, _.info128>,
1355 avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>,
1357 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1358 WriteShuffleXLd, _.info128, _.info128>,
1363 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1364 avx512vl_i8_info, HasBWI>;
1365 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1366 avx512vl_i16_info, HasBWI>;
1367 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1368 avx512vl_i32_info, HasAVX512>;
1369 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1370 avx512vl_i64_info, HasAVX512>, VEX_W1X;
1372 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1373 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1374 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1375 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1376 (_Dst.VT (X86SubVBroadcast
1377 (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1378 Sched<[SchedWriteShuffle.YMM.Folded]>,
1382 // This should be used for the AVX512DQ broadcast instructions. It disables
1383 // the unmasked patterns so that we only use the DQ instructions when masking
1385 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1386 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1387 let hasSideEffects = 0, mayLoad = 1 in
1388 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1389 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1391 (_Dst.VT (X86SubVBroadcast
1392 (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1393 Sched<[SchedWriteShuffle.YMM.Folded]>,
1397 let Predicates = [HasAVX512] in {
1398 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1399 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1400 (VPBROADCASTQZm addr:$src)>;
1403 let Predicates = [HasVLX] in {
1404 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1405 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1406 (VPBROADCASTQZ128m addr:$src)>;
1407 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1408 (VPBROADCASTQZ256m addr:$src)>;
1410 let Predicates = [HasVLX, HasBWI] in {
1411 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1412 // This means we'll encounter truncated i32 loads; match that here.
1413 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1414 (VPBROADCASTWZ128m addr:$src)>;
1415 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1416 (VPBROADCASTWZ256m addr:$src)>;
1417 def : Pat<(v8i16 (X86VBroadcast
1418 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1419 (VPBROADCASTWZ128m addr:$src)>;
1420 def : Pat<(v16i16 (X86VBroadcast
1421 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1422 (VPBROADCASTWZ256m addr:$src)>;
1425 //===----------------------------------------------------------------------===//
1426 // AVX-512 BROADCAST SUBVECTORS
1429 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1430 v16i32_info, v4i32x_info>,
1431 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1432 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1433 v16f32_info, v4f32x_info>,
1434 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1435 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1436 v8i64_info, v4i64x_info>, VEX_W,
1437 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1438 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1439 v8f64_info, v4f64x_info>, VEX_W,
1440 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1442 let Predicates = [HasAVX512] in {
1443 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1444 (VBROADCASTF64X4rm addr:$src)>;
1445 def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1446 (VBROADCASTI64X4rm addr:$src)>;
1447 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1448 (VBROADCASTI64X4rm addr:$src)>;
1449 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1450 (VBROADCASTI64X4rm addr:$src)>;
1452 // Provide fallback in case the load node that is used in the patterns above
1453 // is used by additional users, which prevents the pattern selection.
1454 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1455 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1456 (v4f64 VR256X:$src), 1)>;
1457 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1458 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1459 (v8f32 VR256X:$src), 1)>;
1460 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1461 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1462 (v4i64 VR256X:$src), 1)>;
1463 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1464 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1465 (v8i32 VR256X:$src), 1)>;
1466 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1467 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468 (v16i16 VR256X:$src), 1)>;
1469 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1470 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1471 (v32i8 VR256X:$src), 1)>;
1473 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1474 (VBROADCASTF32X4rm addr:$src)>;
1475 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1476 (VBROADCASTI32X4rm addr:$src)>;
1477 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1478 (VBROADCASTI32X4rm addr:$src)>;
1479 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1480 (VBROADCASTI32X4rm addr:$src)>;
1482 // Patterns for selects of bitcasted operations.
1483 def : Pat<(vselect VK16WM:$mask,
1484 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1485 (bc_v16f32 (v16i32 immAllZerosV))),
1486 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1487 def : Pat<(vselect VK16WM:$mask,
1488 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1490 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1491 def : Pat<(vselect VK16WM:$mask,
1492 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1493 (v16i32 immAllZerosV)),
1494 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1495 def : Pat<(vselect VK16WM:$mask,
1496 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1498 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1500 def : Pat<(vselect VK8WM:$mask,
1501 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1502 (bc_v8f64 (v16i32 immAllZerosV))),
1503 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1504 def : Pat<(vselect VK8WM:$mask,
1505 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1507 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1508 def : Pat<(vselect VK8WM:$mask,
1509 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1510 (bc_v8i64 (v16i32 immAllZerosV))),
1511 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1512 def : Pat<(vselect VK8WM:$mask,
1513 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1515 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1518 let Predicates = [HasVLX] in {
1519 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1520 v8i32x_info, v4i32x_info>,
1521 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1522 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1523 v8f32x_info, v4f32x_info>,
1524 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1526 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1527 (VBROADCASTF32X4Z256rm addr:$src)>;
1528 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1529 (VBROADCASTI32X4Z256rm addr:$src)>;
1530 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1531 (VBROADCASTI32X4Z256rm addr:$src)>;
1532 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1533 (VBROADCASTI32X4Z256rm addr:$src)>;
1535 // Patterns for selects of bitcasted operations.
1536 def : Pat<(vselect VK8WM:$mask,
1537 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1538 (bc_v8f32 (v8i32 immAllZerosV))),
1539 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1540 def : Pat<(vselect VK8WM:$mask,
1541 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1543 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1544 def : Pat<(vselect VK8WM:$mask,
1545 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1546 (v8i32 immAllZerosV)),
1547 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1548 def : Pat<(vselect VK8WM:$mask,
1549 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1551 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1554 // Provide fallback in case the load node that is used in the patterns above
1555 // is used by additional users, which prevents the pattern selection.
1556 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1557 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1558 (v2f64 VR128X:$src), 1)>;
1559 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1560 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1561 (v4f32 VR128X:$src), 1)>;
1562 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1563 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1564 (v2i64 VR128X:$src), 1)>;
1565 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1566 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1567 (v4i32 VR128X:$src), 1)>;
1568 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1569 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1570 (v8i16 VR128X:$src), 1)>;
1571 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1572 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1573 (v16i8 VR128X:$src), 1)>;
1576 let Predicates = [HasVLX, HasDQI] in {
1577 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1578 v4i64x_info, v2i64x_info>, VEX_W1X,
1579 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1580 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1581 v4f64x_info, v2f64x_info>, VEX_W1X,
1582 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1584 // Patterns for selects of bitcasted operations.
1585 def : Pat<(vselect VK4WM:$mask,
1586 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1587 (bc_v4f64 (v8i32 immAllZerosV))),
1588 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1589 def : Pat<(vselect VK4WM:$mask,
1590 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1592 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1593 def : Pat<(vselect VK4WM:$mask,
1594 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1595 (bc_v4i64 (v8i32 immAllZerosV))),
1596 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1597 def : Pat<(vselect VK4WM:$mask,
1598 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1600 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1603 let Predicates = [HasDQI] in {
1604 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1605 v8i64_info, v2i64x_info>, VEX_W,
1606 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1607 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1608 v16i32_info, v8i32x_info>,
1609 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1610 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1611 v8f64_info, v2f64x_info>, VEX_W,
1612 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1613 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1614 v16f32_info, v8f32x_info>,
1615 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1617 // Patterns for selects of bitcasted operations.
1618 def : Pat<(vselect VK16WM:$mask,
1619 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1620 (bc_v16f32 (v16i32 immAllZerosV))),
1621 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1622 def : Pat<(vselect VK16WM:$mask,
1623 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1625 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1626 def : Pat<(vselect VK16WM:$mask,
1627 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1628 (v16i32 immAllZerosV)),
1629 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1630 def : Pat<(vselect VK16WM:$mask,
1631 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1633 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1635 def : Pat<(vselect VK8WM:$mask,
1636 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1637 (bc_v8f64 (v16i32 immAllZerosV))),
1638 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1639 def : Pat<(vselect VK8WM:$mask,
1640 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1642 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1643 def : Pat<(vselect VK8WM:$mask,
1644 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1645 (bc_v8i64 (v16i32 immAllZerosV))),
1646 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1647 def : Pat<(vselect VK8WM:$mask,
1648 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1650 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1653 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1654 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1655 let Predicates = [HasDQI] in
1656 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1657 WriteShuffle256Ld, _Dst.info512,
1658 _Src.info512, _Src.info128, null_frag>,
1660 let Predicates = [HasDQI, HasVLX] in
1661 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1662 WriteShuffle256Ld, _Dst.info256,
1663 _Src.info256, _Src.info128, null_frag>,
1667 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1668 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1669 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1671 let Predicates = [HasDQI, HasVLX] in
1672 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1673 WriteShuffleXLd, _Dst.info128,
1674 _Src.info128, _Src.info128, null_frag>,
1678 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1679 avx512vl_i32_info, avx512vl_i64_info>;
1680 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1681 avx512vl_f32_info, avx512vl_f64_info>;
1683 let Predicates = [HasVLX] in {
1684 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1685 (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1686 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1687 (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1690 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1691 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>;
1692 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1693 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1695 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1696 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>;
1697 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1698 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1700 //===----------------------------------------------------------------------===//
1701 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1703 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1704 X86VectorVTInfo _, RegisterClass KRC> {
1705 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1706 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1707 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1708 EVEX, Sched<[WriteShuffle]>;
1711 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1712 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1713 let Predicates = [HasCDI] in
1714 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1715 let Predicates = [HasCDI, HasVLX] in {
1716 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1717 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1721 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1722 avx512vl_i32_info, VK16>;
1723 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1724 avx512vl_i64_info, VK8>, VEX_W;
1726 //===----------------------------------------------------------------------===//
1727 // -- VPERMI2 - 3 source operands form --
1728 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1729 X86FoldableSchedWrite sched,
1730 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1731 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1732 hasSideEffects = 0 in {
1733 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1734 (ins _.RC:$src2, _.RC:$src3),
1735 OpcodeStr, "$src3, $src2", "$src2, $src3",
1736 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1737 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1740 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1741 (ins _.RC:$src2, _.MemOp:$src3),
1742 OpcodeStr, "$src3, $src2", "$src2, $src3",
1743 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1744 (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
1745 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1749 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1750 X86FoldableSchedWrite sched,
1751 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1752 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1753 hasSideEffects = 0, mayLoad = 1 in
1754 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1755 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1756 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1757 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1758 (_.VT (X86VPermt2 _.RC:$src2,
1759 IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1760 AVX5128IBase, EVEX_4V, EVEX_B,
1761 Sched<[sched.Folded, sched.ReadAfterFold]>;
1764 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1765 X86FoldableSchedWrite sched,
1766 AVX512VLVectorVTInfo VTInfo,
1767 AVX512VLVectorVTInfo ShuffleMask> {
1768 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1769 ShuffleMask.info512>,
1770 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1771 ShuffleMask.info512>, EVEX_V512;
1772 let Predicates = [HasVLX] in {
1773 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1774 ShuffleMask.info128>,
1775 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1776 ShuffleMask.info128>, EVEX_V128;
1777 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1778 ShuffleMask.info256>,
1779 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1780 ShuffleMask.info256>, EVEX_V256;
1784 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1785 X86FoldableSchedWrite sched,
1786 AVX512VLVectorVTInfo VTInfo,
1787 AVX512VLVectorVTInfo Idx,
1789 let Predicates = [Prd] in
1790 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1791 Idx.info512>, EVEX_V512;
1792 let Predicates = [Prd, HasVLX] in {
1793 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1794 Idx.info128>, EVEX_V128;
1795 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1796 Idx.info256>, EVEX_V256;
1800 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1801 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1802 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1803 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1804 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1805 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1806 VEX_W, EVEX_CD8<16, CD8VF>;
1807 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1808 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1810 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1811 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1812 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1813 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1815 // Extra patterns to deal with extra bitcasts due to passthru and index being
1816 // different types on the fp versions.
1817 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1818 X86VectorVTInfo IdxVT,
1819 X86VectorVTInfo CastVT> {
1820 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1821 (X86VPermt2 (_.VT _.RC:$src2),
1822 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1823 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1824 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1825 _.RC:$src2, _.RC:$src3)>;
1826 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1827 (X86VPermt2 _.RC:$src2,
1828 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1829 (_.LdFrag addr:$src3)),
1830 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1831 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1832 _.RC:$src2, addr:$src3)>;
1833 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1834 (X86VPermt2 _.RC:$src2,
1835 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1836 (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1837 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1838 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1839 _.RC:$src2, addr:$src3)>;
1842 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1843 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1844 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1845 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1848 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1849 X86FoldableSchedWrite sched,
1850 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1851 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1852 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1853 (ins IdxVT.RC:$src2, _.RC:$src3),
1854 OpcodeStr, "$src3, $src2", "$src2, $src3",
1855 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1856 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1858 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1859 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1860 OpcodeStr, "$src3, $src2", "$src2, $src3",
1861 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1862 (bitconvert (_.LdFrag addr:$src3)))), 1>,
1863 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1866 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1867 X86FoldableSchedWrite sched,
1868 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1869 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1870 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1871 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1872 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1873 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1874 (_.VT (X86VPermt2 _.RC:$src1,
1875 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1876 AVX5128IBase, EVEX_4V, EVEX_B,
1877 Sched<[sched.Folded, sched.ReadAfterFold]>;
1880 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1881 X86FoldableSchedWrite sched,
1882 AVX512VLVectorVTInfo VTInfo,
1883 AVX512VLVectorVTInfo ShuffleMask> {
1884 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1885 ShuffleMask.info512>,
1886 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1887 ShuffleMask.info512>, EVEX_V512;
1888 let Predicates = [HasVLX] in {
1889 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1890 ShuffleMask.info128>,
1891 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1892 ShuffleMask.info128>, EVEX_V128;
1893 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1894 ShuffleMask.info256>,
1895 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1896 ShuffleMask.info256>, EVEX_V256;
1900 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1901 X86FoldableSchedWrite sched,
1902 AVX512VLVectorVTInfo VTInfo,
1903 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1904 let Predicates = [Prd] in
1905 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1906 Idx.info512>, EVEX_V512;
1907 let Predicates = [Prd, HasVLX] in {
1908 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1909 Idx.info128>, EVEX_V128;
1910 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1911 Idx.info256>, EVEX_V256;
1915 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1916 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1917 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1918 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1919 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1920 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1921 VEX_W, EVEX_CD8<16, CD8VF>;
1922 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1923 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1925 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1926 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1927 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1928 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1930 //===----------------------------------------------------------------------===//
1931 // AVX-512 - BLEND using mask
1934 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1935 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1936 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1937 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1938 (ins _.RC:$src1, _.RC:$src2),
1939 !strconcat(OpcodeStr,
1940 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1941 EVEX_4V, Sched<[sched]>;
1942 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1943 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1944 !strconcat(OpcodeStr,
1945 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1946 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1947 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1948 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1949 !strconcat(OpcodeStr,
1950 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1951 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1952 let mayLoad = 1 in {
1953 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1954 (ins _.RC:$src1, _.MemOp:$src2),
1955 !strconcat(OpcodeStr,
1956 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1957 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1958 Sched<[sched.Folded, sched.ReadAfterFold]>;
1959 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1960 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1961 !strconcat(OpcodeStr,
1962 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1963 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1964 Sched<[sched.Folded, sched.ReadAfterFold]>;
1965 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1966 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1967 !strconcat(OpcodeStr,
1968 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1969 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1970 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1974 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1975 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1976 let mayLoad = 1, hasSideEffects = 0 in {
1977 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1978 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1979 !strconcat(OpcodeStr,
1980 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1981 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1982 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1983 Sched<[sched.Folded, sched.ReadAfterFold]>;
1985 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1986 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1987 !strconcat(OpcodeStr,
1988 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1989 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1990 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1991 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1993 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1994 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1995 !strconcat(OpcodeStr,
1996 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1997 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1998 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1999 Sched<[sched.Folded, sched.ReadAfterFold]>;
2003 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2004 AVX512VLVectorVTInfo VTInfo> {
2005 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2006 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2009 let Predicates = [HasVLX] in {
2010 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2011 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2013 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2014 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2019 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2020 AVX512VLVectorVTInfo VTInfo> {
2021 let Predicates = [HasBWI] in
2022 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2025 let Predicates = [HasBWI, HasVLX] in {
2026 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2028 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2033 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2035 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2036 avx512vl_f64_info>, VEX_W;
2037 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2039 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2040 avx512vl_i64_info>, VEX_W;
2041 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2043 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2044 avx512vl_i16_info>, VEX_W;
2046 //===----------------------------------------------------------------------===//
2047 // Compare Instructions
2048 //===----------------------------------------------------------------------===//
2050 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2052 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
2053 X86FoldableSchedWrite sched> {
2054 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2056 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2057 "vcmp${cc}"#_.Suffix,
2058 "$src2, $src1", "$src1, $src2",
2059 (OpNode (_.VT _.RC:$src1),
2061 imm:$cc)>, EVEX_4V, Sched<[sched]>;
2063 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2065 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
2066 "vcmp${cc}"#_.Suffix,
2067 "$src2, $src1", "$src1, $src2",
2068 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2069 imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2070 Sched<[sched.Folded, sched.ReadAfterFold]>;
2072 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2074 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2075 "vcmp${cc}"#_.Suffix,
2076 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2077 (OpNodeRnd (_.VT _.RC:$src1),
2080 (i32 FROUND_NO_EXC))>,
2081 EVEX_4V, EVEX_B, Sched<[sched]>;
2082 // Accept explicit immediate argument form instead of comparison code.
2083 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2084 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2086 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2088 "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V,
2089 Sched<[sched]>, NotMemoryFoldable;
2091 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2093 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2095 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2096 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2097 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2099 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2101 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2103 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
2104 EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2105 }// let isAsmParserOnly = 1, hasSideEffects = 0
2107 let isCodeGenOnly = 1 in {
2108 let isCommutable = 1 in
2109 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2110 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
2111 !strconcat("vcmp${cc}", _.Suffix,
2112 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2113 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2116 EVEX_4V, Sched<[sched]>;
2117 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2119 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2120 !strconcat("vcmp${cc}", _.Suffix,
2121 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2122 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2123 (_.ScalarLdFrag addr:$src2),
2125 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2126 Sched<[sched.Folded, sched.ReadAfterFold]>;
2130 let Predicates = [HasAVX512] in {
2131 let ExeDomain = SSEPackedSingle in
2132 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
2133 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2134 let ExeDomain = SSEPackedDouble in
2135 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
2136 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2139 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2140 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2142 let isCommutable = IsCommutable in
2143 def rr : AVX512BI<opc, MRMSrcReg,
2144 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2145 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2146 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2147 EVEX_4V, Sched<[sched]>;
2148 def rm : AVX512BI<opc, MRMSrcMem,
2149 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2150 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2151 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2152 (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
2153 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2154 let isCommutable = IsCommutable in
2155 def rrk : AVX512BI<opc, MRMSrcReg,
2156 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2157 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2158 "$dst {${mask}}, $src1, $src2}"),
2159 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2160 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2161 EVEX_4V, EVEX_K, Sched<[sched]>;
2162 def rmk : AVX512BI<opc, MRMSrcMem,
2163 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2164 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2165 "$dst {${mask}}, $src1, $src2}"),
2166 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2167 (OpNode (_.VT _.RC:$src1),
2169 (_.LdFrag addr:$src2))))))]>,
2170 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2173 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2174 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2176 avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> {
2177 def rmb : AVX512BI<opc, MRMSrcMem,
2178 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2179 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2180 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2181 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2182 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2183 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2184 def rmbk : AVX512BI<opc, MRMSrcMem,
2185 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2186 _.ScalarMemOp:$src2),
2187 !strconcat(OpcodeStr,
2188 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2189 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2190 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2191 (OpNode (_.VT _.RC:$src1),
2193 (_.ScalarLdFrag addr:$src2)))))]>,
2194 EVEX_4V, EVEX_K, EVEX_B,
2195 Sched<[sched.Folded, sched.ReadAfterFold]>;
2198 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2199 X86SchedWriteWidths sched,
2200 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2201 bit IsCommutable = 0> {
2202 let Predicates = [prd] in
2203 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM,
2204 VTInfo.info512, IsCommutable>, EVEX_V512;
2206 let Predicates = [prd, HasVLX] in {
2207 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM,
2208 VTInfo.info256, IsCommutable>, EVEX_V256;
2209 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM,
2210 VTInfo.info128, IsCommutable>, EVEX_V128;
2214 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2215 PatFrag OpNode, X86SchedWriteWidths sched,
2216 AVX512VLVectorVTInfo VTInfo,
2217 Predicate prd, bit IsCommutable = 0> {
2218 let Predicates = [prd] in
2219 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM,
2220 VTInfo.info512, IsCommutable>, EVEX_V512;
2222 let Predicates = [prd, HasVLX] in {
2223 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM,
2224 VTInfo.info256, IsCommutable>, EVEX_V256;
2225 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM,
2226 VTInfo.info128, IsCommutable>, EVEX_V128;
2230 // This fragment treats X86cmpm as commutable to help match loads in both
2231 // operands for PCMPEQ.
2232 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2233 def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2234 (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2235 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2236 (setcc node:$src1, node:$src2, SETGT)>;
2238 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2239 // increase the pattern complexity the way an immediate would.
2240 let AddedComplexity = 2 in {
2241 // FIXME: Is there a better scheduler class for VPCMP?
2242 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
2243 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2244 EVEX_CD8<8, CD8VF>, VEX_WIG;
2246 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
2247 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2248 EVEX_CD8<16, CD8VF>, VEX_WIG;
2250 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
2251 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2252 EVEX_CD8<32, CD8VF>;
2254 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
2255 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2256 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2258 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
2259 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2260 EVEX_CD8<8, CD8VF>, VEX_WIG;
2262 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
2263 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2264 EVEX_CD8<16, CD8VF>, VEX_WIG;
2266 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
2267 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2268 EVEX_CD8<32, CD8VF>;
2270 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
2271 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2272 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2275 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2276 PatFrag CommFrag, X86FoldableSchedWrite sched,
2277 X86VectorVTInfo _, string Name> {
2278 let isCommutable = 1 in
2279 def rri : AVX512AIi8<opc, MRMSrcReg,
2280 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
2281 !strconcat("vpcmp${cc}", Suffix,
2282 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2283 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2286 EVEX_4V, Sched<[sched]>;
2287 def rmi : AVX512AIi8<opc, MRMSrcMem,
2288 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
2289 !strconcat("vpcmp${cc}", Suffix,
2290 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2291 [(set _.KRC:$dst, (_.KVT
2294 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2296 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2297 let isCommutable = 1 in
2298 def rrik : AVX512AIi8<opc, MRMSrcReg,
2299 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2301 !strconcat("vpcmp${cc}", Suffix,
2302 "\t{$src2, $src1, $dst {${mask}}|",
2303 "$dst {${mask}}, $src1, $src2}"),
2304 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2305 (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2308 EVEX_4V, EVEX_K, Sched<[sched]>;
2309 def rmik : AVX512AIi8<opc, MRMSrcMem,
2310 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2312 !strconcat("vpcmp${cc}", Suffix,
2313 "\t{$src2, $src1, $dst {${mask}}|",
2314 "$dst {${mask}}, $src1, $src2}"),
2315 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2320 (_.LdFrag addr:$src2))),
2322 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2324 // Accept explicit immediate argument form instead of comparison code.
2325 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2326 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2327 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2328 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2329 "$dst, $src1, $src2, $cc}"), []>,
2330 EVEX_4V, Sched<[sched]>, NotMemoryFoldable;
2332 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2333 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2334 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2335 "$dst, $src1, $src2, $cc}"), []>,
2336 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2337 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2338 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2340 !strconcat("vpcmp", Suffix,
2341 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2342 "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2343 EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable;
2345 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2346 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2348 !strconcat("vpcmp", Suffix,
2349 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2350 "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2351 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>,
2355 def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2356 (_.VT _.RC:$src1), cond)),
2357 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2358 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2360 def : Pat<(and _.KRCWM:$mask,
2361 (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2362 (_.VT _.RC:$src1), cond))),
2363 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2364 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2365 (CommFrag.OperandTransform $cc))>;
2368 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2369 PatFrag CommFrag, X86FoldableSchedWrite sched,
2370 X86VectorVTInfo _, string Name> :
2371 avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
2372 def rmib : AVX512AIi8<opc, MRMSrcMem,
2373 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2375 !strconcat("vpcmp${cc}", Suffix,
2376 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2377 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2378 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2381 (_.ScalarLdFrag addr:$src2)),
2383 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2384 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2385 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2386 _.ScalarMemOp:$src2, AVX512ICC:$cc),
2387 !strconcat("vpcmp${cc}", Suffix,
2388 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2389 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2390 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2394 (_.ScalarLdFrag addr:$src2)),
2396 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2398 // Accept explicit immediate argument form instead of comparison code.
2399 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2400 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2401 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2403 !strconcat("vpcmp", Suffix,
2404 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2405 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2406 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
2408 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2409 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2410 _.ScalarMemOp:$src2, u8imm:$cc),
2411 !strconcat("vpcmp", Suffix,
2412 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2413 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2414 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
2418 def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2419 (_.VT _.RC:$src1), cond)),
2420 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2421 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2423 def : Pat<(and _.KRCWM:$mask,
2424 (_.KVT (CommFrag:$cc (X86VBroadcast
2425 (_.ScalarLdFrag addr:$src2)),
2426 (_.VT _.RC:$src1), cond))),
2427 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2428 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2429 (CommFrag.OperandTransform $cc))>;
2432 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2433 PatFrag CommFrag, X86SchedWriteWidths sched,
2434 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2435 let Predicates = [prd] in
2436 defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
2437 VTInfo.info512, NAME>, EVEX_V512;
2439 let Predicates = [prd, HasVLX] in {
2440 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
2441 VTInfo.info256, NAME>, EVEX_V256;
2442 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
2443 VTInfo.info128, NAME>, EVEX_V128;
2447 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2448 PatFrag CommFrag, X86SchedWriteWidths sched,
2449 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2450 let Predicates = [prd] in
2451 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
2452 VTInfo.info512, NAME>, EVEX_V512;
2454 let Predicates = [prd, HasVLX] in {
2455 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
2456 VTInfo.info256, NAME>, EVEX_V256;
2457 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
2458 VTInfo.info128, NAME>, EVEX_V128;
2462 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2463 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2464 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2465 return getI8Imm(SSECC, SDLoc(N));
2468 // Swapped operand version of the above.
2469 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2470 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2471 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2472 SSECC = X86::getSwappedVPCMPImm(SSECC);
2473 return getI8Imm(SSECC, SDLoc(N));
2476 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2477 (setcc node:$src1, node:$src2, node:$cc), [{
2478 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2479 return !ISD::isUnsignedIntSetCC(CC);
2482 // Same as above, but commutes immediate. Use for load folding.
2483 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2484 (setcc node:$src1, node:$src2, node:$cc), [{
2485 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2486 return !ISD::isUnsignedIntSetCC(CC);
2487 }], X86pcmpm_imm_commute>;
2489 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2490 (setcc node:$src1, node:$src2, node:$cc), [{
2491 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2492 return ISD::isUnsignedIntSetCC(CC);
2495 // Same as above, but commutes immediate. Use for load folding.
2496 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2497 (setcc node:$src1, node:$src2, node:$cc), [{
2498 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2499 return ISD::isUnsignedIntSetCC(CC);
2500 }], X86pcmpm_imm_commute>;
2502 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2503 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
2504 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2506 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
2507 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2510 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
2511 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2512 VEX_W, EVEX_CD8<16, CD8VF>;
2513 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
2514 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2515 VEX_W, EVEX_CD8<16, CD8VF>;
2517 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
2518 SchedWriteVecALU, avx512vl_i32_info,
2519 HasAVX512>, EVEX_CD8<32, CD8VF>;
2520 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
2521 SchedWriteVecALU, avx512vl_i32_info,
2522 HasAVX512>, EVEX_CD8<32, CD8VF>;
2524 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
2525 SchedWriteVecALU, avx512vl_i64_info,
2526 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2527 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
2528 SchedWriteVecALU, avx512vl_i64_info,
2529 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2531 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2533 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2534 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2535 "vcmp${cc}"#_.Suffix,
2536 "$src2, $src1", "$src1, $src2",
2537 (X86cmpm (_.VT _.RC:$src1),
2542 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2543 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2544 "vcmp${cc}"#_.Suffix,
2545 "$src2, $src1", "$src1, $src2",
2546 (X86cmpm (_.VT _.RC:$src1),
2547 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2549 Sched<[sched.Folded, sched.ReadAfterFold]>;
2551 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2553 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2554 "vcmp${cc}"#_.Suffix,
2555 "${src2}"##_.BroadcastStr##", $src1",
2556 "$src1, ${src2}"##_.BroadcastStr,
2557 (X86cmpm (_.VT _.RC:$src1),
2558 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2560 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2561 // Accept explicit immediate argument form instead of comparison code.
2562 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2563 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2565 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2567 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2568 Sched<[sched]>, NotMemoryFoldable;
2570 let mayLoad = 1 in {
2571 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2573 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2575 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2576 Sched<[sched.Folded, sched.ReadAfterFold]>,
2579 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2581 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2583 "$cc, ${src2}"##_.BroadcastStr##", $src1",
2584 "$src1, ${src2}"##_.BroadcastStr##", $cc">,
2585 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
2590 // Patterns for selecting with loads in other operand.
2591 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2592 CommutableCMPCC:$cc),
2593 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2596 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2598 CommutableCMPCC:$cc)),
2599 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2600 _.RC:$src1, addr:$src2,
2603 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2604 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2605 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2608 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2609 (_.ScalarLdFrag addr:$src2)),
2611 CommutableCMPCC:$cc)),
2612 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2613 _.RC:$src1, addr:$src2,
2617 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2618 // comparison code form (VCMP[EQ/LT/LE/...]
2619 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2620 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2621 "vcmp${cc}"#_.Suffix,
2622 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2623 (X86cmpmRnd (_.VT _.RC:$src1),
2626 (i32 FROUND_NO_EXC))>,
2627 EVEX_B, Sched<[sched]>;
2629 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2630 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2632 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2634 "$cc, {sae}, $src2, $src1",
2635 "$src1, $src2, {sae}, $cc">,
2636 EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2640 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2641 let Predicates = [HasAVX512] in {
2642 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2643 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2646 let Predicates = [HasAVX512,HasVLX] in {
2647 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2648 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2652 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2653 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2654 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2655 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2657 // Patterns to select fp compares with load as first operand.
2658 let Predicates = [HasAVX512] in {
2659 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2660 CommutableCMPCC:$cc)),
2661 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2663 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2664 CommutableCMPCC:$cc)),
2665 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2668 // ----------------------------------------------------------------
2670 //handle fpclass instruction mask = op(reg_scalar,imm)
2671 // op(mem_scalar,imm)
2672 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2673 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2675 let Predicates = [prd], ExeDomain = _.ExeDomain in {
2676 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2677 (ins _.RC:$src1, i32u8imm:$src2),
2678 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2679 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2680 (i32 imm:$src2)))]>,
2682 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2683 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2684 OpcodeStr##_.Suffix#
2685 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2686 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2687 (OpNode (_.VT _.RC:$src1),
2688 (i32 imm:$src2))))]>,
2689 EVEX_K, Sched<[sched]>;
2690 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2691 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2692 OpcodeStr##_.Suffix##
2693 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2695 (OpNode _.ScalarIntMemCPat:$src1,
2696 (i32 imm:$src2)))]>,
2697 Sched<[sched.Folded, sched.ReadAfterFold]>;
2698 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2699 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2700 OpcodeStr##_.Suffix##
2701 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2702 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2703 (OpNode _.ScalarIntMemCPat:$src1,
2704 (i32 imm:$src2))))]>,
2705 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2709 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2710 // fpclass(reg_vec, mem_vec, imm)
2711 // fpclass(reg_vec, broadcast(eltVt), imm)
2712 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2713 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2714 string mem, string broadcast>{
2715 let ExeDomain = _.ExeDomain in {
2716 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2717 (ins _.RC:$src1, i32u8imm:$src2),
2718 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2719 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2720 (i32 imm:$src2)))]>,
2722 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2723 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2724 OpcodeStr##_.Suffix#
2725 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2726 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2727 (OpNode (_.VT _.RC:$src1),
2728 (i32 imm:$src2))))]>,
2729 EVEX_K, Sched<[sched]>;
2730 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2731 (ins _.MemOp:$src1, i32u8imm:$src2),
2732 OpcodeStr##_.Suffix##mem#
2733 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2734 [(set _.KRC:$dst,(OpNode
2735 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2736 (i32 imm:$src2)))]>,
2737 Sched<[sched.Folded, sched.ReadAfterFold]>;
2738 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2739 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2740 OpcodeStr##_.Suffix##mem#
2741 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2742 [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
2743 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2744 (i32 imm:$src2))))]>,
2745 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2746 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2747 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2748 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2749 _.BroadcastStr##", $dst|$dst, ${src1}"
2750 ##_.BroadcastStr##", $src2}",
2751 [(set _.KRC:$dst,(OpNode
2752 (_.VT (X86VBroadcast
2753 (_.ScalarLdFrag addr:$src1))),
2754 (i32 imm:$src2)))]>,
2755 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2756 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2757 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2758 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2759 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2760 _.BroadcastStr##", $src2}",
2761 [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
2762 (_.VT (X86VBroadcast
2763 (_.ScalarLdFrag addr:$src1))),
2764 (i32 imm:$src2))))]>,
2765 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2769 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2770 bits<8> opc, SDNode OpNode,
2771 X86SchedWriteWidths sched, Predicate prd,
2773 let Predicates = [prd] in {
2774 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
2775 _.info512, "{z}", broadcast>, EVEX_V512;
2777 let Predicates = [prd, HasVLX] in {
2778 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
2779 _.info128, "{x}", broadcast>, EVEX_V128;
2780 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
2781 _.info256, "{y}", broadcast>, EVEX_V256;
2785 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2786 bits<8> opcScalar, SDNode VecOpNode,
2787 SDNode ScalarOpNode, X86SchedWriteWidths sched,
2789 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2790 VecOpNode, sched, prd, "{l}">,
2791 EVEX_CD8<32, CD8VF>;
2792 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2793 VecOpNode, sched, prd, "{q}">,
2794 EVEX_CD8<64, CD8VF> , VEX_W;
2795 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2796 sched.Scl, f32x_info, prd>,
2797 EVEX_CD8<32, CD8VT1>;
2798 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2799 sched.Scl, f64x_info, prd>,
2800 EVEX_CD8<64, CD8VT1>, VEX_W;
2803 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2804 X86Vfpclasss, SchedWriteFCmp, HasDQI>,
2805 AVX512AIi8Base, EVEX;
2807 //-----------------------------------------------------------------
2808 // Mask register copy, including
2809 // - copy between mask registers
2810 // - load/store mask registers
2811 // - copy from GPR to mask register and vice versa
2813 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2814 string OpcodeStr, RegisterClass KRC,
2815 ValueType vvt, X86MemOperand x86memop> {
2816 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2817 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2818 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2820 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2821 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2822 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2824 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2825 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2826 [(store KRC:$src, addr:$dst)]>,
2827 Sched<[WriteStore]>;
2830 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2832 RegisterClass KRC, RegisterClass GRC> {
2833 let hasSideEffects = 0 in {
2834 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2835 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2837 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2838 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2843 let Predicates = [HasDQI] in
2844 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2845 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2848 let Predicates = [HasAVX512] in
2849 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2850 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2853 let Predicates = [HasBWI] in {
2854 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2856 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2858 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2860 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2864 // GR from/to mask register
2865 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2866 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2867 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2868 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2870 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2871 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2872 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2873 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2875 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2876 (KMOVWrk VK16:$src)>;
2877 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2878 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2880 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2881 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2882 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2883 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2885 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2886 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2887 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2888 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2889 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2890 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2891 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2892 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2895 let Predicates = [HasDQI] in {
2896 def : Pat<(store VK1:$src, addr:$dst),
2897 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2899 def : Pat<(v1i1 (load addr:$src)),
2900 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2901 def : Pat<(v2i1 (load addr:$src)),
2902 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2903 def : Pat<(v4i1 (load addr:$src)),
2904 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2907 let Predicates = [HasAVX512] in {
2908 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2909 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2912 let Predicates = [HasAVX512] in {
2913 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2914 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2915 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2917 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2918 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2921 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2922 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2923 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2924 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2925 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2926 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2927 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2929 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2930 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2933 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2937 // Mask unary operation
2939 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2940 RegisterClass KRC, SDPatternOperator OpNode,
2941 X86FoldableSchedWrite sched, Predicate prd> {
2942 let Predicates = [prd] in
2943 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2944 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2945 [(set KRC:$dst, (OpNode KRC:$src))]>,
2949 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2950 SDPatternOperator OpNode,
2951 X86FoldableSchedWrite sched> {
2952 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2953 sched, HasDQI>, VEX, PD;
2954 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2955 sched, HasAVX512>, VEX, PS;
2956 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2957 sched, HasBWI>, VEX, PD, VEX_W;
2958 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2959 sched, HasBWI>, VEX, PS, VEX_W;
2962 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2963 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2965 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2966 let Predicates = [HasAVX512, NoDQI] in
2967 def : Pat<(vnot VK8:$src),
2968 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2970 def : Pat<(vnot VK4:$src),
2971 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2972 def : Pat<(vnot VK2:$src),
2973 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2975 // Mask binary operation
2976 // - KAND, KANDN, KOR, KXNOR, KXOR
2977 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2978 RegisterClass KRC, SDPatternOperator OpNode,
2979 X86FoldableSchedWrite sched, Predicate prd,
2981 let Predicates = [prd], isCommutable = IsCommutable in
2982 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2983 !strconcat(OpcodeStr,
2984 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2985 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2989 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2990 SDPatternOperator OpNode,
2991 X86FoldableSchedWrite sched, bit IsCommutable,
2992 Predicate prdW = HasAVX512> {
2993 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2994 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2995 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2996 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2997 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2998 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2999 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3000 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3003 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
3004 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3005 // These nodes use 'vnot' instead of 'not' to support vectors.
3006 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3007 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3009 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3010 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
3011 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
3012 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
3013 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
3014 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
3015 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3017 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3019 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3020 // for the DQI set, this type is legal and KxxxB instruction is used
3021 let Predicates = [NoDQI] in
3022 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3024 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3025 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3027 // All types smaller than 8 bits require conversion anyway
3028 def : Pat<(OpNode VK1:$src1, VK1:$src2),
3029 (COPY_TO_REGCLASS (Inst
3030 (COPY_TO_REGCLASS VK1:$src1, VK16),
3031 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3032 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3033 (COPY_TO_REGCLASS (Inst
3034 (COPY_TO_REGCLASS VK2:$src1, VK16),
3035 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3036 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3037 (COPY_TO_REGCLASS (Inst
3038 (COPY_TO_REGCLASS VK4:$src1, VK16),
3039 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3042 defm : avx512_binop_pat<and, and, KANDWrr>;
3043 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3044 defm : avx512_binop_pat<or, or, KORWrr>;
3045 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3046 defm : avx512_binop_pat<xor, xor, KXORWrr>;
3049 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
3050 RegisterClass KRCSrc, X86FoldableSchedWrite sched,
3052 let Predicates = [prd] in {
3053 let hasSideEffects = 0 in
3054 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
3055 (ins KRC:$src1, KRC:$src2),
3056 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3057 VEX_4V, VEX_L, Sched<[sched]>;
3059 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
3060 (!cast<Instruction>(NAME##rr)
3061 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
3062 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
3066 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD;
3067 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS;
3068 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W;
3071 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3072 SDNode OpNode, X86FoldableSchedWrite sched,
3074 let Predicates = [prd], Defs = [EFLAGS] in
3075 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3076 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3077 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3081 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3082 X86FoldableSchedWrite sched,
3083 Predicate prdW = HasAVX512> {
3084 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3086 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3088 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3090 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3094 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3095 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3096 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3099 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3100 SDNode OpNode, X86FoldableSchedWrite sched> {
3101 let Predicates = [HasAVX512] in
3102 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3103 !strconcat(OpcodeStr,
3104 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3105 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3109 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3110 SDNode OpNode, X86FoldableSchedWrite sched> {
3111 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3112 sched>, VEX, TAPD, VEX_W;
3113 let Predicates = [HasDQI] in
3114 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3116 let Predicates = [HasBWI] in {
3117 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3118 sched>, VEX, TAPD, VEX_W;
3119 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3124 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3125 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3127 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3128 multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
3129 X86VectorVTInfo Narrow,
3130 X86VectorVTInfo Wide> {
3131 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3132 (Narrow.VT Narrow.RC:$src2))),
3134 (!cast<Instruction>(InstStr#"Zrr")
3135 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3136 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3139 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3140 (Frag (Narrow.VT Narrow.RC:$src1),
3141 (Narrow.VT Narrow.RC:$src2)))),
3143 (!cast<Instruction>(InstStr#"Zrrk")
3144 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3145 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3146 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3150 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3151 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag,
3153 X86VectorVTInfo Narrow,
3154 X86VectorVTInfo Wide> {
3155 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3156 (Narrow.VT Narrow.RC:$src2), cond)),
3158 (!cast<Instruction>(InstStr##Zrri)
3159 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3160 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3161 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3163 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3164 (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3165 (Narrow.VT Narrow.RC:$src2),
3167 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3168 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3169 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3170 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3171 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3174 // Same as above, but for fp types which don't use PatFrags.
3175 multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
3176 X86VectorVTInfo Narrow,
3177 X86VectorVTInfo Wide> {
3178 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3179 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3181 (!cast<Instruction>(InstStr##Zrri)
3182 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3183 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3184 imm:$cc), Narrow.KRC)>;
3186 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3187 (OpNode (Narrow.VT Narrow.RC:$src1),
3188 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3189 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3190 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3191 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3192 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3193 imm:$cc), Narrow.KRC)>;
3196 let Predicates = [HasAVX512, NoVLX] in {
3197 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3198 // increase the pattern complexity the way an immediate would.
3199 let AddedComplexity = 2 in {
3200 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
3201 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
3203 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
3204 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
3206 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3207 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3209 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3210 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3213 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3214 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>;
3216 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3217 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>;
3219 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3220 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>;
3222 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3223 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>;
3225 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3226 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3227 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3228 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3231 let Predicates = [HasBWI, NoVLX] in {
3232 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3233 // increase the pattern complexity the way an immediate would.
3234 let AddedComplexity = 2 in {
3235 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
3236 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
3238 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
3239 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
3241 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
3242 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
3244 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
3245 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
3248 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3249 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>;
3251 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3252 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>;
3254 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3255 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>;
3257 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3258 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>;
3261 // Mask setting all 0s or 1s
3262 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3263 let Predicates = [HasAVX512] in
3264 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3265 SchedRW = [WriteZero] in
3266 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3267 [(set KRC:$dst, (VT Val))]>;
3270 multiclass avx512_mask_setop_w<PatFrag Val> {
3271 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3272 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3273 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3276 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3277 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3279 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3280 let Predicates = [HasAVX512] in {
3281 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3282 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3283 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3284 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3285 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3286 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3287 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3288 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3291 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3292 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3293 RegisterClass RC, ValueType VT> {
3294 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3295 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3297 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3298 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3300 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3301 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3302 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3303 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3304 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3305 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3307 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3308 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3309 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3310 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3311 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3313 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3314 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3315 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3316 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3318 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3319 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3320 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3322 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3323 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3325 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3327 //===----------------------------------------------------------------------===//
3328 // AVX-512 - Aligned and unaligned load and store
3331 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3332 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3333 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3334 bit NoRMPattern = 0,
3335 SDPatternOperator SelectOprr = vselect> {
3336 let hasSideEffects = 0 in {
3337 let isMoveReg = 1 in
3338 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3339 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3340 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3341 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3342 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3343 (ins _.KRCWM:$mask, _.RC:$src),
3344 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3345 "${dst} {${mask}} {z}, $src}"),
3346 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3348 _.ImmAllZerosV)))], _.ExeDomain>,
3349 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3351 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3352 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3353 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3354 !if(NoRMPattern, [],
3356 (_.VT (bitconvert (ld_frag addr:$src))))]),
3357 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3358 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3360 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3361 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3362 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3363 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3364 "${dst} {${mask}}, $src1}"),
3365 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3367 (_.VT _.RC:$src0))))], _.ExeDomain>,
3368 EVEX, EVEX_K, Sched<[Sched.RR]>;
3369 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3370 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3371 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3372 "${dst} {${mask}}, $src1}"),
3373 [(set _.RC:$dst, (_.VT
3374 (vselect _.KRCWM:$mask,
3375 (_.VT (bitconvert (ld_frag addr:$src1))),
3376 (_.VT _.RC:$src0))))], _.ExeDomain>,
3377 EVEX, EVEX_K, Sched<[Sched.RM]>;
3379 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3380 (ins _.KRCWM:$mask, _.MemOp:$src),
3381 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3382 "${dst} {${mask}} {z}, $src}",
3383 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3384 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
3385 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3387 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3388 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3390 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3391 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3393 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3394 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3395 _.KRCWM:$mask, addr:$ptr)>;
3398 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3399 AVX512VLVectorVTInfo _, Predicate prd,
3400 X86SchedWriteMoveLSWidths Sched,
3401 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3402 let Predicates = [prd] in
3403 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3404 _.info512.AlignedLdFrag, masked_load_aligned512,
3405 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3407 let Predicates = [prd, HasVLX] in {
3408 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3409 _.info256.AlignedLdFrag, masked_load_aligned256,
3410 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3411 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3412 _.info128.AlignedLdFrag, masked_load_aligned128,
3413 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3417 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3418 AVX512VLVectorVTInfo _, Predicate prd,
3419 X86SchedWriteMoveLSWidths Sched,
3420 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3421 SDPatternOperator SelectOprr = vselect> {
3422 let Predicates = [prd] in
3423 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3424 masked_load_unaligned, Sched.ZMM, "",
3425 NoRMPattern, SelectOprr>, EVEX_V512;
3427 let Predicates = [prd, HasVLX] in {
3428 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3429 masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y",
3430 NoRMPattern, SelectOprr>, EVEX_V256;
3431 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3432 masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd,
3433 NoRMPattern, SelectOprr>, EVEX_V128;
3437 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3438 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3439 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3440 bit NoMRPattern = 0> {
3441 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3442 let isMoveReg = 1 in
3443 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3444 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3445 [], _.ExeDomain>, EVEX,
3446 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3447 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3448 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3449 (ins _.KRCWM:$mask, _.RC:$src),
3450 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3451 "${dst} {${mask}}, $src}",
3452 [], _.ExeDomain>, EVEX, EVEX_K,
3453 FoldGenData<BaseName#_.ZSuffix#rrk>,
3455 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3456 (ins _.KRCWM:$mask, _.RC:$src),
3457 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3458 "${dst} {${mask}} {z}, $src}",
3459 [], _.ExeDomain>, EVEX, EVEX_KZ,
3460 FoldGenData<BaseName#_.ZSuffix#rrkz>,
3464 let hasSideEffects = 0, mayStore = 1 in
3465 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3466 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3467 !if(NoMRPattern, [],
3468 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3469 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3470 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3471 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3472 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3473 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3474 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3477 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3478 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3479 _.KRCWM:$mask, _.RC:$src)>;
3481 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3482 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3483 _.RC:$dst, _.RC:$src), 0>;
3484 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3485 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3486 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3487 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3488 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3489 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3492 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3493 AVX512VLVectorVTInfo _, Predicate prd,
3494 X86SchedWriteMoveLSWidths Sched,
3495 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3496 let Predicates = [prd] in
3497 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3498 masked_store_unaligned, Sched.ZMM, "",
3499 NoMRPattern>, EVEX_V512;
3500 let Predicates = [prd, HasVLX] in {
3501 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3502 masked_store_unaligned, Sched.YMM,
3503 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3504 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3505 masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd,
3506 NoMRPattern>, EVEX_V128;
3510 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3511 AVX512VLVectorVTInfo _, Predicate prd,
3512 X86SchedWriteMoveLSWidths Sched,
3513 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3514 let Predicates = [prd] in
3515 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3516 masked_store_aligned512, Sched.ZMM, "",
3517 NoMRPattern>, EVEX_V512;
3519 let Predicates = [prd, HasVLX] in {
3520 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3521 masked_store_aligned256, Sched.YMM,
3522 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3523 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3524 masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd,
3525 NoMRPattern>, EVEX_V128;
3529 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3530 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3531 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3532 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3533 PS, EVEX_CD8<32, CD8VF>;
3535 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3536 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3537 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3538 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3539 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3541 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3542 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3543 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3544 SchedWriteFMoveLS, "VMOVUPS">,
3545 PS, EVEX_CD8<32, CD8VF>;
3547 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3548 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3549 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3550 SchedWriteFMoveLS, "VMOVUPD">,
3551 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3553 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3554 HasAVX512, SchedWriteVecMoveLS,
3556 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3557 HasAVX512, SchedWriteVecMoveLS,
3559 PD, EVEX_CD8<32, CD8VF>;
3561 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3562 HasAVX512, SchedWriteVecMoveLS,
3564 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3565 HasAVX512, SchedWriteVecMoveLS,
3567 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3569 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3570 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3571 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3572 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3573 XD, EVEX_CD8<8, CD8VF>;
3575 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3576 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3577 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3578 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3579 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3581 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3582 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3583 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3584 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3585 XS, EVEX_CD8<32, CD8VF>;
3587 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3588 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3589 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3590 SchedWriteVecMoveLS, "VMOVDQU">,
3591 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3593 // Special instructions to help with spilling when we don't have VLX. We need
3594 // to load or store from a ZMM register instead. These are converted in
3595 // expandPostRAPseudos.
3596 let isReMaterializable = 1, canFoldAsLoad = 1,
3597 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3598 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3599 "", []>, Sched<[WriteFLoadX]>;
3600 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3601 "", []>, Sched<[WriteFLoadY]>;
3602 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3603 "", []>, Sched<[WriteFLoadX]>;
3604 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3605 "", []>, Sched<[WriteFLoadY]>;
3608 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3609 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3610 "", []>, Sched<[WriteFStoreX]>;
3611 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3612 "", []>, Sched<[WriteFStoreY]>;
3613 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3614 "", []>, Sched<[WriteFStoreX]>;
3615 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3616 "", []>, Sched<[WriteFStoreY]>;
3619 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3620 (v8i64 VR512:$src))),
3621 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3624 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3625 (v16i32 VR512:$src))),
3626 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3628 // These patterns exist to prevent the above patterns from introducing a second
3629 // mask inversion when one already exists.
3630 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3631 (bc_v8i64 (v16i32 immAllZerosV)),
3632 (v8i64 VR512:$src))),
3633 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3634 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3635 (v16i32 immAllZerosV),
3636 (v16i32 VR512:$src))),
3637 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3639 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3640 X86VectorVTInfo Wide> {
3641 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3642 Narrow.RC:$src1, Narrow.RC:$src0)),
3645 (!cast<Instruction>(InstrStr#"rrk")
3646 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3647 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3648 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3651 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3652 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3655 (!cast<Instruction>(InstrStr#"rrkz")
3656 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3657 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3661 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3662 // available. Use a 512-bit operation and extract.
3663 let Predicates = [HasAVX512, NoVLX] in {
3664 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3665 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3666 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3667 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3669 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3670 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3671 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3672 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3675 let Predicates = [HasBWI, NoVLX] in {
3676 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3677 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3679 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3680 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3683 let Predicates = [HasAVX512] in {
3685 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3686 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3687 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3688 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3689 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3690 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3691 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3692 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3693 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3694 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3695 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3696 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3699 let Predicates = [HasVLX] in {
3701 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3702 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3703 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3704 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3705 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3706 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3707 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3708 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3709 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3710 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3711 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3712 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3715 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3716 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3717 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3718 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3719 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3720 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3721 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3722 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3723 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3724 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3725 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3726 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3729 multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3730 X86VectorVTInfo To, X86VectorVTInfo Cast> {
3731 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3733 (To.VT (extract_subvector
3734 (From.VT From.RC:$src), (iPTR 0)))),
3736 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3737 Cast.RC:$src0, Cast.KRCWM:$mask,
3738 (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
3740 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3742 (To.VT (extract_subvector
3743 (From.VT From.RC:$src), (iPTR 0)))),
3744 Cast.ImmAllZerosV)),
3745 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3747 (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
3751 let Predicates = [HasVLX] in {
3752 // A masked extract from the first 128-bits of a 256-bit vector can be
3753 // implemented with masked move.
3754 defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
3755 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
3756 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3757 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
3758 defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
3759 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
3760 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3761 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
3762 defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
3763 defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
3764 defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
3765 defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
3767 // A masked extract from the first 128-bits of a 512-bit vector can be
3768 // implemented with masked move.
3769 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
3770 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3771 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3772 defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
3773 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
3774 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3775 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3776 defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
3777 defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
3778 defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
3779 defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
3780 defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
3782 // A masked extract from the first 256-bits of a 512-bit vector can be
3783 // implemented with masked move.
3784 defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
3785 defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
3786 defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3787 defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
3788 defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
3789 defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
3790 defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3791 defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
3792 defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
3793 defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
3794 defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
3795 defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
3798 // Move Int Doubleword to Packed Double Int
3800 let ExeDomain = SSEPackedInt in {
3801 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3802 "vmovd\t{$src, $dst|$dst, $src}",
3804 (v4i32 (scalar_to_vector GR32:$src)))]>,
3805 EVEX, Sched<[WriteVecMoveFromGpr]>;
3806 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3807 "vmovd\t{$src, $dst|$dst, $src}",
3809 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3810 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3811 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3812 "vmovq\t{$src, $dst|$dst, $src}",
3814 (v2i64 (scalar_to_vector GR64:$src)))]>,
3815 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3816 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3817 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3819 "vmovq\t{$src, $dst|$dst, $src}", []>,
3820 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3821 let isCodeGenOnly = 1 in {
3822 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3823 "vmovq\t{$src, $dst|$dst, $src}",
3824 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3825 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3826 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3827 "vmovq\t{$src, $dst|$dst, $src}",
3828 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3829 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3830 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3831 "vmovq\t{$src, $dst|$dst, $src}",
3832 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3833 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3834 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3835 "vmovq\t{$src, $dst|$dst, $src}",
3836 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>,
3837 EVEX, VEX_W, Sched<[WriteVecStore]>,
3838 EVEX_CD8<64, CD8VT1>;
3840 } // ExeDomain = SSEPackedInt
3842 // Move Int Doubleword to Single Scalar
3844 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3845 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3846 "vmovd\t{$src, $dst|$dst, $src}",
3847 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3848 EVEX, Sched<[WriteVecMoveFromGpr]>;
3850 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3851 "vmovd\t{$src, $dst|$dst, $src}",
3852 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
3853 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3854 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3856 // Move doubleword from xmm register to r/m32
3858 let ExeDomain = SSEPackedInt in {
3859 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3860 "vmovd\t{$src, $dst|$dst, $src}",
3861 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3863 EVEX, Sched<[WriteVecMoveToGpr]>;
3864 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3865 (ins i32mem:$dst, VR128X:$src),
3866 "vmovd\t{$src, $dst|$dst, $src}",
3867 [(store (i32 (extractelt (v4i32 VR128X:$src),
3868 (iPTR 0))), addr:$dst)]>,
3869 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3870 } // ExeDomain = SSEPackedInt
3872 // Move quadword from xmm1 register to r/m64
3874 let ExeDomain = SSEPackedInt in {
3875 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3876 "vmovq\t{$src, $dst|$dst, $src}",
3877 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3879 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3880 Requires<[HasAVX512]>;
3882 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3883 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3884 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3885 EVEX, VEX_W, Sched<[WriteVecStore]>,
3886 Requires<[HasAVX512, In64BitMode]>;
3888 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3889 (ins i64mem:$dst, VR128X:$src),
3890 "vmovq\t{$src, $dst|$dst, $src}",
3891 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3893 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3894 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3896 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3897 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3899 "vmovq\t{$src, $dst|$dst, $src}", []>,
3900 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3901 } // ExeDomain = SSEPackedInt
3903 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3904 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3906 // Move Scalar Single to Double Int
3908 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3909 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3911 "vmovd\t{$src, $dst|$dst, $src}",
3912 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3913 EVEX, Sched<[WriteVecMoveToGpr]>;
3914 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3915 (ins i32mem:$dst, FR32X:$src),
3916 "vmovd\t{$src, $dst|$dst, $src}",
3917 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>,
3918 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3919 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3921 // Move Quadword Int to Packed Quadword Int
3923 let ExeDomain = SSEPackedInt in {
3924 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3926 "vmovq\t{$src, $dst|$dst, $src}",
3928 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3929 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3930 } // ExeDomain = SSEPackedInt
3932 // Allow "vmovd" but print "vmovq".
3933 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3934 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3935 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3936 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3938 //===----------------------------------------------------------------------===//
3939 // AVX-512 MOVSS, MOVSD
3940 //===----------------------------------------------------------------------===//
3942 multiclass avx512_move_scalar<string asm, SDNode OpNode,
3943 X86VectorVTInfo _> {
3944 let Predicates = [HasAVX512, OptForSize] in
3945 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3946 (ins _.RC:$src1, _.RC:$src2),
3947 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3948 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3949 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3950 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3951 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3952 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3953 "$dst {${mask}} {z}, $src1, $src2}"),
3954 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3955 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3957 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3958 let Constraints = "$src0 = $dst" in
3959 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3960 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3961 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3962 "$dst {${mask}}, $src1, $src2}"),
3963 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3964 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3965 (_.VT _.RC:$src0))))],
3966 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3967 let canFoldAsLoad = 1, isReMaterializable = 1 in
3968 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3969 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3970 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3971 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3972 let mayLoad = 1, hasSideEffects = 0 in {
3973 let Constraints = "$src0 = $dst" in
3974 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3975 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3976 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3977 "$dst {${mask}}, $src}"),
3978 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3979 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3980 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3981 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3982 "$dst {${mask}} {z}, $src}"),
3983 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3985 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3986 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3987 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
3988 EVEX, Sched<[WriteFStore]>;
3989 let mayStore = 1, hasSideEffects = 0 in
3990 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3991 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3992 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3993 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3997 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3998 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4000 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
4001 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4004 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4005 PatLeaf ZeroFP, X86VectorVTInfo _> {
4007 def : Pat<(_.VT (OpNode _.RC:$src0,
4008 (_.VT (scalar_to_vector
4009 (_.EltVT (X86selects VK1WM:$mask,
4010 (_.EltVT _.FRC:$src1),
4011 (_.EltVT _.FRC:$src2))))))),
4012 (!cast<Instruction>(InstrStr#rrk)
4013 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4016 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4018 def : Pat<(_.VT (OpNode _.RC:$src0,
4019 (_.VT (scalar_to_vector
4020 (_.EltVT (X86selects VK1WM:$mask,
4021 (_.EltVT _.FRC:$src1),
4022 (_.EltVT ZeroFP))))))),
4023 (!cast<Instruction>(InstrStr#rrkz)
4026 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4029 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4030 dag Mask, RegisterClass MaskRC> {
4032 def : Pat<(masked_store
4033 (_.info512.VT (insert_subvector undef,
4034 (_.info128.VT _.info128.RC:$src),
4035 (iPTR 0))), addr:$dst, Mask),
4036 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4037 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4038 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4042 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4043 AVX512VLVectorVTInfo _,
4044 dag Mask, RegisterClass MaskRC,
4045 SubRegIndex subreg> {
4047 def : Pat<(masked_store
4048 (_.info512.VT (insert_subvector undef,
4049 (_.info128.VT _.info128.RC:$src),
4050 (iPTR 0))), addr:$dst, Mask),
4051 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4052 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4053 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4057 // This matches the more recent codegen from clang that avoids emitting a 512
4058 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4059 // bits on AVX512F only targets.
4060 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4061 AVX512VLVectorVTInfo _,
4062 dag Mask512, dag Mask128,
4063 RegisterClass MaskRC,
4064 SubRegIndex subreg> {
4067 def : Pat<(masked_store
4068 (_.info512.VT (insert_subvector undef,
4069 (_.info128.VT _.info128.RC:$src),
4070 (iPTR 0))), addr:$dst, Mask512),
4071 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4072 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4073 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4075 // AVX512VL pattern.
4076 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4077 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4078 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4079 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4082 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4083 dag Mask, RegisterClass MaskRC> {
4085 def : Pat<(_.info128.VT (extract_subvector
4086 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4087 (_.info512.VT (bitconvert
4088 (v16i32 immAllZerosV))))),
4090 (!cast<Instruction>(InstrStr#rmkz)
4091 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4094 def : Pat<(_.info128.VT (extract_subvector
4095 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4096 (_.info512.VT (insert_subvector undef,
4097 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4100 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4101 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4106 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4107 AVX512VLVectorVTInfo _,
4108 dag Mask, RegisterClass MaskRC,
4109 SubRegIndex subreg> {
4111 def : Pat<(_.info128.VT (extract_subvector
4112 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4113 (_.info512.VT (bitconvert
4114 (v16i32 immAllZerosV))))),
4116 (!cast<Instruction>(InstrStr#rmkz)
4117 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4120 def : Pat<(_.info128.VT (extract_subvector
4121 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4122 (_.info512.VT (insert_subvector undef,
4123 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4126 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4127 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4132 // This matches the more recent codegen from clang that avoids emitting a 512
4133 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4134 // bits on AVX512F only targets.
4135 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4136 AVX512VLVectorVTInfo _,
4137 dag Mask512, dag Mask128,
4138 RegisterClass MaskRC,
4139 SubRegIndex subreg> {
4140 // AVX512F patterns.
4141 def : Pat<(_.info128.VT (extract_subvector
4142 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4143 (_.info512.VT (bitconvert
4144 (v16i32 immAllZerosV))))),
4146 (!cast<Instruction>(InstrStr#rmkz)
4147 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4150 def : Pat<(_.info128.VT (extract_subvector
4151 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4152 (_.info512.VT (insert_subvector undef,
4153 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4156 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4157 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4160 // AVX512Vl patterns.
4161 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4162 (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
4163 (!cast<Instruction>(InstrStr#rmkz)
4164 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4167 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4168 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4169 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4170 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4174 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4175 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4177 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4178 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4179 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4180 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4181 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4182 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4184 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4185 (v16i1 (insert_subvector
4186 (v16i1 immAllZerosV),
4187 (v4i1 (extract_subvector
4188 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4191 (v4i1 (extract_subvector
4192 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4193 (iPTR 0))), GR8, sub_8bit>;
4194 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4199 (v16i1 immAllZerosV),
4200 (v2i1 (extract_subvector
4201 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4205 (v2i1 (extract_subvector
4206 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4207 (iPTR 0))), GR8, sub_8bit>;
4209 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4210 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4211 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4212 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4213 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4214 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4216 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4217 (v16i1 (insert_subvector
4218 (v16i1 immAllZerosV),
4219 (v4i1 (extract_subvector
4220 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4223 (v4i1 (extract_subvector
4224 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4225 (iPTR 0))), GR8, sub_8bit>;
4226 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4231 (v16i1 immAllZerosV),
4232 (v2i1 (extract_subvector
4233 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4237 (v2i1 (extract_subvector
4238 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4239 (iPTR 0))), GR8, sub_8bit>;
4241 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4242 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4243 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4244 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4245 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4247 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4248 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4249 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4251 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4252 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4253 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4254 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4255 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4257 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
4258 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4259 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4261 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4262 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4263 (ins VR128X:$src1, VR128X:$src2),
4264 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4265 []>, XS, EVEX_4V, VEX_LIG,
4266 FoldGenData<"VMOVSSZrr">,
4267 Sched<[SchedWriteFShuffle.XMM]>;
4269 let Constraints = "$src0 = $dst" in
4270 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4271 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4272 VR128X:$src1, VR128X:$src2),
4273 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4274 "$dst {${mask}}, $src1, $src2}",
4275 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4276 FoldGenData<"VMOVSSZrrk">,
4277 Sched<[SchedWriteFShuffle.XMM]>;
4279 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4280 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4281 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4282 "$dst {${mask}} {z}, $src1, $src2}",
4283 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4284 FoldGenData<"VMOVSSZrrkz">,
4285 Sched<[SchedWriteFShuffle.XMM]>;
4287 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4288 (ins VR128X:$src1, VR128X:$src2),
4289 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4290 []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4291 FoldGenData<"VMOVSDZrr">,
4292 Sched<[SchedWriteFShuffle.XMM]>;
4294 let Constraints = "$src0 = $dst" in
4295 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4296 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4297 VR128X:$src1, VR128X:$src2),
4298 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4299 "$dst {${mask}}, $src1, $src2}",
4300 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4301 VEX_W, FoldGenData<"VMOVSDZrrk">,
4302 Sched<[SchedWriteFShuffle.XMM]>;
4304 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4305 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4307 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4308 "$dst {${mask}} {z}, $src1, $src2}",
4309 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4310 VEX_W, FoldGenData<"VMOVSDZrrkz">,
4311 Sched<[SchedWriteFShuffle.XMM]>;
4314 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4315 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4316 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4317 "$dst {${mask}}, $src1, $src2}",
4318 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4319 VR128X:$src1, VR128X:$src2), 0>;
4320 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4321 "$dst {${mask}} {z}, $src1, $src2}",
4322 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4323 VR128X:$src1, VR128X:$src2), 0>;
4324 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4325 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4326 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4327 "$dst {${mask}}, $src1, $src2}",
4328 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4329 VR128X:$src1, VR128X:$src2), 0>;
4330 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4331 "$dst {${mask}} {z}, $src1, $src2}",
4332 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4333 VR128X:$src1, VR128X:$src2), 0>;
4335 let Predicates = [HasAVX512, OptForSize] in {
4336 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4337 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4338 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4339 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4341 // Move low f32 and clear high bits.
4342 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4343 (SUBREG_TO_REG (i32 0),
4344 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4345 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4346 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4347 (SUBREG_TO_REG (i32 0),
4348 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4349 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4351 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4352 (SUBREG_TO_REG (i32 0),
4353 (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4354 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>;
4355 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4356 (SUBREG_TO_REG (i32 0),
4357 (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4358 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>;
4360 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4361 (SUBREG_TO_REG (i32 0),
4362 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4363 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4364 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4365 (SUBREG_TO_REG (i32 0),
4366 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4367 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4369 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4370 (SUBREG_TO_REG (i32 0),
4371 (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4372 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>;
4374 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4375 (SUBREG_TO_REG (i32 0),
4376 (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4377 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>;
4381 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4382 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4383 let Predicates = [HasAVX512, OptForSpeed] in {
4384 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4385 (SUBREG_TO_REG (i32 0),
4386 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4387 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4388 (i8 1))), sub_xmm)>;
4389 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4390 (SUBREG_TO_REG (i32 0),
4391 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4392 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4393 (i8 3))), sub_xmm)>;
4395 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4396 (SUBREG_TO_REG (i32 0),
4397 (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
4398 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)),
4399 (i8 1))), sub_xmm)>;
4400 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4401 (SUBREG_TO_REG (i32 0),
4402 (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
4403 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)),
4404 (i8 0xf))), sub_xmm)>;
4407 let Predicates = [HasAVX512] in {
4409 // MOVSSrm zeros the high parts of the register; represent this
4410 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4411 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4412 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4413 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4414 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4415 def : Pat<(v4f32 (X86vzload addr:$src)),
4416 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4418 // MOVSDrm zeros the high parts of the register; represent this
4419 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4420 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4421 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4422 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4423 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4424 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4425 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4426 def : Pat<(v2f64 (X86vzload addr:$src)),
4427 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4429 // Represent the same patterns above but in the form they appear for
4431 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4432 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4433 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4434 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4435 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4436 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4437 def : Pat<(v8f32 (X86vzload addr:$src)),
4438 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4439 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4440 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4441 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4442 def : Pat<(v4f64 (X86vzload addr:$src)),
4443 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4445 // Represent the same patterns above but in the form they appear for
4447 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4448 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4449 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4450 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4451 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4452 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4453 def : Pat<(v16f32 (X86vzload addr:$src)),
4454 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4455 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4456 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4457 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4458 def : Pat<(v8f64 (X86vzload addr:$src)),
4459 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4461 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4462 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
4463 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4465 // Extract and store.
4466 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
4468 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
4471 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4472 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4474 "vmovq\t{$src, $dst|$dst, $src}",
4475 [(set VR128X:$dst, (v2i64 (X86vzmovl
4476 (v2i64 VR128X:$src))))]>,
4480 let Predicates = [HasAVX512] in {
4481 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4482 (VMOVDI2PDIZrr GR32:$src)>;
4484 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4485 (VMOV64toPQIZrr GR64:$src)>;
4487 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4488 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4489 (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
4491 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4492 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4493 (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
4495 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4496 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4497 (VMOVDI2PDIZrm addr:$src)>;
4498 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4499 (VMOVDI2PDIZrm addr:$src)>;
4500 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4501 (VMOVDI2PDIZrm addr:$src)>;
4502 def : Pat<(v4i32 (X86vzload addr:$src)),
4503 (VMOVDI2PDIZrm addr:$src)>;
4504 def : Pat<(v8i32 (X86vzload addr:$src)),
4505 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4506 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4507 (VMOVQI2PQIZrm addr:$src)>;
4508 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4509 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4510 def : Pat<(v2i64 (X86vzload addr:$src)),
4511 (VMOVQI2PQIZrm addr:$src)>;
4512 def : Pat<(v4i64 (X86vzload addr:$src)),
4513 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4515 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4516 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4517 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4518 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
4519 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4520 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4521 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
4523 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4524 def : Pat<(v16i32 (X86vzload addr:$src)),
4525 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4526 def : Pat<(v8i64 (X86vzload addr:$src)),
4527 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4530 //===----------------------------------------------------------------------===//
4531 // AVX-512 - Non-temporals
4532 //===----------------------------------------------------------------------===//
4534 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4535 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4536 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4537 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4539 let Predicates = [HasVLX] in {
4540 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4542 "vmovntdqa\t{$src, $dst|$dst, $src}",
4543 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4544 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4546 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4548 "vmovntdqa\t{$src, $dst|$dst, $src}",
4549 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4550 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4553 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4554 X86SchedWriteMoveLS Sched,
4555 PatFrag st_frag = alignednontemporalstore> {
4556 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4557 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4558 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4559 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4560 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4563 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4564 AVX512VLVectorVTInfo VTInfo,
4565 X86SchedWriteMoveLSWidths Sched> {
4566 let Predicates = [HasAVX512] in
4567 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4569 let Predicates = [HasAVX512, HasVLX] in {
4570 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4571 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4575 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4576 SchedWriteVecMoveLSNT>, PD;
4577 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4578 SchedWriteFMoveLSNT>, PD, VEX_W;
4579 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4580 SchedWriteFMoveLSNT>, PS;
4582 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4583 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4584 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4585 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4586 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4587 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4588 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4590 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4591 (VMOVNTDQAZrm addr:$src)>;
4592 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4593 (VMOVNTDQAZrm addr:$src)>;
4594 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4595 (VMOVNTDQAZrm addr:$src)>;
4598 let Predicates = [HasVLX], AddedComplexity = 400 in {
4599 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4600 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4601 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4602 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4603 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4604 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4606 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4607 (VMOVNTDQAZ256rm addr:$src)>;
4608 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4609 (VMOVNTDQAZ256rm addr:$src)>;
4610 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4611 (VMOVNTDQAZ256rm addr:$src)>;
4613 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4614 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4615 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4616 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4617 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4618 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4620 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4621 (VMOVNTDQAZ128rm addr:$src)>;
4622 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4623 (VMOVNTDQAZ128rm addr:$src)>;
4624 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4625 (VMOVNTDQAZ128rm addr:$src)>;
4628 //===----------------------------------------------------------------------===//
4629 // AVX-512 - Integer arithmetic
4631 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4632 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4633 bit IsCommutable = 0> {
4634 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4635 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4636 "$src2, $src1", "$src1, $src2",
4637 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4638 IsCommutable>, AVX512BIBase, EVEX_4V,
4641 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4642 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4643 "$src2, $src1", "$src1, $src2",
4644 (_.VT (OpNode _.RC:$src1,
4645 (bitconvert (_.LdFrag addr:$src2))))>,
4646 AVX512BIBase, EVEX_4V,
4647 Sched<[sched.Folded, sched.ReadAfterFold]>;
4650 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4651 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4652 bit IsCommutable = 0> :
4653 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4654 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4655 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4656 "${src2}"##_.BroadcastStr##", $src1",
4657 "$src1, ${src2}"##_.BroadcastStr,
4658 (_.VT (OpNode _.RC:$src1,
4660 (_.ScalarLdFrag addr:$src2))))>,
4661 AVX512BIBase, EVEX_4V, EVEX_B,
4662 Sched<[sched.Folded, sched.ReadAfterFold]>;
4665 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4666 AVX512VLVectorVTInfo VTInfo,
4667 X86SchedWriteWidths sched, Predicate prd,
4668 bit IsCommutable = 0> {
4669 let Predicates = [prd] in
4670 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4671 IsCommutable>, EVEX_V512;
4673 let Predicates = [prd, HasVLX] in {
4674 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4675 sched.YMM, IsCommutable>, EVEX_V256;
4676 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4677 sched.XMM, IsCommutable>, EVEX_V128;
4681 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4682 AVX512VLVectorVTInfo VTInfo,
4683 X86SchedWriteWidths sched, Predicate prd,
4684 bit IsCommutable = 0> {
4685 let Predicates = [prd] in
4686 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4687 IsCommutable>, EVEX_V512;
4689 let Predicates = [prd, HasVLX] in {
4690 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4691 sched.YMM, IsCommutable>, EVEX_V256;
4692 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4693 sched.XMM, IsCommutable>, EVEX_V128;
4697 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4698 X86SchedWriteWidths sched, Predicate prd,
4699 bit IsCommutable = 0> {
4700 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4701 sched, prd, IsCommutable>,
4702 VEX_W, EVEX_CD8<64, CD8VF>;
4705 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4706 X86SchedWriteWidths sched, Predicate prd,
4707 bit IsCommutable = 0> {
4708 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4709 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4712 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4713 X86SchedWriteWidths sched, Predicate prd,
4714 bit IsCommutable = 0> {
4715 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4716 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4720 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4721 X86SchedWriteWidths sched, Predicate prd,
4722 bit IsCommutable = 0> {
4723 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4724 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4728 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4729 SDNode OpNode, X86SchedWriteWidths sched,
4730 Predicate prd, bit IsCommutable = 0> {
4731 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4734 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4738 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4739 SDNode OpNode, X86SchedWriteWidths sched,
4740 Predicate prd, bit IsCommutable = 0> {
4741 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4744 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4748 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4749 bits<8> opc_d, bits<8> opc_q,
4750 string OpcodeStr, SDNode OpNode,
4751 X86SchedWriteWidths sched,
4752 bit IsCommutable = 0> {
4753 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4754 sched, HasAVX512, IsCommutable>,
4755 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4756 sched, HasBWI, IsCommutable>;
4759 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4760 X86FoldableSchedWrite sched,
4761 SDNode OpNode,X86VectorVTInfo _Src,
4762 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4763 bit IsCommutable = 0> {
4764 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4765 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4766 "$src2, $src1","$src1, $src2",
4768 (_Src.VT _Src.RC:$src1),
4769 (_Src.VT _Src.RC:$src2))),
4771 AVX512BIBase, EVEX_4V, Sched<[sched]>;
4772 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4773 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4774 "$src2, $src1", "$src1, $src2",
4775 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4776 (bitconvert (_Src.LdFrag addr:$src2))))>,
4777 AVX512BIBase, EVEX_4V,
4778 Sched<[sched.Folded, sched.ReadAfterFold]>;
4780 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4781 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4783 "${src2}"##_Brdct.BroadcastStr##", $src1",
4784 "$src1, ${src2}"##_Brdct.BroadcastStr,
4785 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4786 (_Brdct.VT (X86VBroadcast
4787 (_Brdct.ScalarLdFrag addr:$src2))))))>,
4788 AVX512BIBase, EVEX_4V, EVEX_B,
4789 Sched<[sched.Folded, sched.ReadAfterFold]>;
4792 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4793 SchedWriteVecALU, 1>;
4794 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4795 SchedWriteVecALU, 0>;
4796 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4797 SchedWriteVecALU, HasBWI, 1>;
4798 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4799 SchedWriteVecALU, HasBWI, 0>;
4800 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4801 SchedWriteVecALU, HasBWI, 1>;
4802 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4803 SchedWriteVecALU, HasBWI, 0>;
4804 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4805 SchedWritePMULLD, HasAVX512, 1>, T8PD;
4806 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4807 SchedWriteVecIMul, HasBWI, 1>;
4808 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4809 SchedWriteVecIMul, HasDQI, 1>, T8PD,
4810 NotEVEX2VEXConvertible;
4811 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4813 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4815 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4816 SchedWriteVecIMul, HasBWI, 1>, T8PD;
4817 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4818 SchedWriteVecALU, HasBWI, 1>;
4819 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4820 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4821 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4822 SchedWriteVecIMul, HasAVX512, 1>;
4824 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4825 X86SchedWriteWidths sched,
4826 AVX512VLVectorVTInfo _SrcVTInfo,
4827 AVX512VLVectorVTInfo _DstVTInfo,
4828 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4829 let Predicates = [prd] in
4830 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4831 _SrcVTInfo.info512, _DstVTInfo.info512,
4832 v8i64_info, IsCommutable>,
4833 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4834 let Predicates = [HasVLX, prd] in {
4835 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4836 _SrcVTInfo.info256, _DstVTInfo.info256,
4837 v4i64x_info, IsCommutable>,
4838 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4839 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4840 _SrcVTInfo.info128, _DstVTInfo.info128,
4841 v2i64x_info, IsCommutable>,
4842 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4846 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4847 avx512vl_i8_info, avx512vl_i8_info,
4848 X86multishift, HasVBMI, 0>, T8PD;
4850 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4851 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4852 X86FoldableSchedWrite sched> {
4853 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4854 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4856 "${src2}"##_Src.BroadcastStr##", $src1",
4857 "$src1, ${src2}"##_Src.BroadcastStr,
4858 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4859 (_Src.VT (X86VBroadcast
4860 (_Src.ScalarLdFrag addr:$src2))))))>,
4861 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4862 Sched<[sched.Folded, sched.ReadAfterFold]>;
4865 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4866 SDNode OpNode,X86VectorVTInfo _Src,
4867 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4868 bit IsCommutable = 0> {
4869 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4870 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4871 "$src2, $src1","$src1, $src2",
4873 (_Src.VT _Src.RC:$src1),
4874 (_Src.VT _Src.RC:$src2))),
4876 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4877 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4878 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4879 "$src2, $src1", "$src1, $src2",
4880 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4881 (bitconvert (_Src.LdFrag addr:$src2))))>,
4882 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4883 Sched<[sched.Folded, sched.ReadAfterFold]>;
4886 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4888 let Predicates = [HasBWI] in
4889 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4890 v32i16_info, SchedWriteShuffle.ZMM>,
4891 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4892 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4893 let Predicates = [HasBWI, HasVLX] in {
4894 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4895 v16i16x_info, SchedWriteShuffle.YMM>,
4896 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4897 v16i16x_info, SchedWriteShuffle.YMM>,
4899 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4900 v8i16x_info, SchedWriteShuffle.XMM>,
4901 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4902 v8i16x_info, SchedWriteShuffle.XMM>,
4906 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4908 let Predicates = [HasBWI] in
4909 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4910 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4911 let Predicates = [HasBWI, HasVLX] in {
4912 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4913 v32i8x_info, SchedWriteShuffle.YMM>,
4915 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4916 v16i8x_info, SchedWriteShuffle.XMM>,
4921 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4922 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4923 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4924 let Predicates = [HasBWI] in
4925 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4926 _Dst.info512, SchedWriteVecIMul.ZMM,
4927 IsCommutable>, EVEX_V512;
4928 let Predicates = [HasBWI, HasVLX] in {
4929 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4930 _Dst.info256, SchedWriteVecIMul.YMM,
4931 IsCommutable>, EVEX_V256;
4932 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4933 _Dst.info128, SchedWriteVecIMul.XMM,
4934 IsCommutable>, EVEX_V128;
4938 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4939 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4940 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4941 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4943 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4944 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4945 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4946 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4948 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4949 SchedWriteVecALU, HasBWI, 1>, T8PD;
4950 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4951 SchedWriteVecALU, HasBWI, 1>;
4952 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4953 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4954 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4955 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4956 NotEVEX2VEXConvertible;
4958 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4959 SchedWriteVecALU, HasBWI, 1>;
4960 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4961 SchedWriteVecALU, HasBWI, 1>, T8PD;
4962 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4963 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4964 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4965 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4966 NotEVEX2VEXConvertible;
4968 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4969 SchedWriteVecALU, HasBWI, 1>, T8PD;
4970 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4971 SchedWriteVecALU, HasBWI, 1>;
4972 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4973 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4974 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4975 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4976 NotEVEX2VEXConvertible;
4978 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4979 SchedWriteVecALU, HasBWI, 1>;
4980 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4981 SchedWriteVecALU, HasBWI, 1>, T8PD;
4982 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4983 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4984 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4985 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4986 NotEVEX2VEXConvertible;
4988 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4989 let Predicates = [HasDQI, NoVLX] in {
4990 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4993 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4994 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4997 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5000 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5001 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5005 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5006 let Predicates = [HasDQI, NoVLX] in {
5007 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5010 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5011 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5014 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5017 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5018 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5022 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
5023 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5026 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5027 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5030 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5033 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5034 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5038 let Predicates = [HasAVX512, NoVLX] in {
5039 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
5040 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
5041 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
5042 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
5045 //===----------------------------------------------------------------------===//
5046 // AVX-512 Logical Instructions
5047 //===----------------------------------------------------------------------===//
5049 // OpNodeMsk is the OpNode to use when element size is important. OpNode will
5050 // be set to null_frag for 32-bit elements.
5051 multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
5052 SDPatternOperator OpNode,
5053 SDNode OpNodeMsk, X86FoldableSchedWrite sched,
5054 X86VectorVTInfo _, bit IsCommutable = 0> {
5055 let hasSideEffects = 0 in
5056 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
5057 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5058 "$src2, $src1", "$src1, $src2",
5059 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5060 (bitconvert (_.VT _.RC:$src2)))),
5061 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5063 IsCommutable>, AVX512BIBase, EVEX_4V,
5066 let hasSideEffects = 0, mayLoad = 1 in
5067 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5068 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5069 "$src2, $src1", "$src1, $src2",
5070 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5071 (bitconvert (_.LdFrag addr:$src2)))),
5072 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5073 (bitconvert (_.LdFrag addr:$src2))))))>,
5074 AVX512BIBase, EVEX_4V,
5075 Sched<[sched.Folded, sched.ReadAfterFold]>;
5078 // OpNodeMsk is the OpNode to use where element size is important. So use
5079 // for all of the broadcast patterns.
5080 multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
5081 SDPatternOperator OpNode,
5082 SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _,
5083 bit IsCommutable = 0> :
5084 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _,
5086 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5087 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5088 "${src2}"##_.BroadcastStr##", $src1",
5089 "$src1, ${src2}"##_.BroadcastStr,
5090 (_.i64VT (OpNodeMsk _.RC:$src1,
5092 (_.VT (X86VBroadcast
5093 (_.ScalarLdFrag addr:$src2)))))),
5094 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5096 (_.VT (X86VBroadcast
5097 (_.ScalarLdFrag addr:$src2))))))))>,
5098 AVX512BIBase, EVEX_4V, EVEX_B,
5099 Sched<[sched.Folded, sched.ReadAfterFold]>;
5102 multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
5103 SDPatternOperator OpNode,
5104 SDNode OpNodeMsk, X86SchedWriteWidths sched,
5105 AVX512VLVectorVTInfo VTInfo,
5106 bit IsCommutable = 0> {
5107 let Predicates = [HasAVX512] in
5108 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
5109 VTInfo.info512, IsCommutable>, EVEX_V512;
5111 let Predicates = [HasAVX512, HasVLX] in {
5112 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
5113 VTInfo.info256, IsCommutable>, EVEX_V256;
5114 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
5115 VTInfo.info128, IsCommutable>, EVEX_V128;
5119 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
5120 SDNode OpNode, X86SchedWriteWidths sched,
5121 bit IsCommutable = 0> {
5122 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
5123 avx512vl_i64_info, IsCommutable>,
5124 VEX_W, EVEX_CD8<64, CD8VF>;
5125 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched,
5126 avx512vl_i32_info, IsCommutable>,
5127 EVEX_CD8<32, CD8VF>;
5130 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5131 SchedWriteVecLogic, 1>;
5132 defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5133 SchedWriteVecLogic, 1>;
5134 defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5135 SchedWriteVecLogic, 1>;
5136 defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5137 SchedWriteVecLogic>;
5139 //===----------------------------------------------------------------------===//
5140 // AVX-512 FP arithmetic
5141 //===----------------------------------------------------------------------===//
5143 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5144 SDNode OpNode, SDNode VecNode,
5145 X86FoldableSchedWrite sched, bit IsCommutable> {
5146 let ExeDomain = _.ExeDomain in {
5147 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5148 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5149 "$src2, $src1", "$src1, $src2",
5150 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
5151 (i32 FROUND_CURRENT)))>,
5154 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5155 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5156 "$src2, $src1", "$src1, $src2",
5157 (_.VT (VecNode _.RC:$src1,
5158 _.ScalarIntMemCPat:$src2,
5159 (i32 FROUND_CURRENT)))>,
5160 Sched<[sched.Folded, sched.ReadAfterFold]>;
5161 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5162 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5163 (ins _.FRC:$src1, _.FRC:$src2),
5164 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5165 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5167 let isCommutable = IsCommutable;
5169 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5170 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5171 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5172 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5173 (_.ScalarLdFrag addr:$src2)))]>,
5174 Sched<[sched.Folded, sched.ReadAfterFold]>;
5179 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5180 SDNode VecNode, X86FoldableSchedWrite sched,
5181 bit IsCommutable = 0> {
5182 let ExeDomain = _.ExeDomain in
5183 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5184 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5185 "$rc, $src2, $src1", "$src1, $src2, $rc",
5186 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5187 (i32 imm:$rc)), IsCommutable>,
5188 EVEX_B, EVEX_RC, Sched<[sched]>;
5190 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5191 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5192 X86FoldableSchedWrite sched, bit IsCommutable> {
5193 let ExeDomain = _.ExeDomain in {
5194 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5195 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5196 "$src2, $src1", "$src1, $src2",
5197 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5200 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5201 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5202 "$src2, $src1", "$src1, $src2",
5203 (_.VT (VecNode _.RC:$src1,
5204 _.ScalarIntMemCPat:$src2))>,
5205 Sched<[sched.Folded, sched.ReadAfterFold]>;
5207 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5208 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5209 (ins _.FRC:$src1, _.FRC:$src2),
5210 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5211 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5213 let isCommutable = IsCommutable;
5215 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5216 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5217 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5218 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5219 (_.ScalarLdFrag addr:$src2)))]>,
5220 Sched<[sched.Folded, sched.ReadAfterFold]>;
5223 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5224 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5225 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5226 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5227 (i32 FROUND_NO_EXC))>, EVEX_B,
5232 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5233 SDNode VecNode, X86SchedWriteSizes sched,
5235 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5236 sched.PS.Scl, IsCommutable>,
5237 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
5238 sched.PS.Scl, IsCommutable>,
5239 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5240 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5241 sched.PD.Scl, IsCommutable>,
5242 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
5243 sched.PD.Scl, IsCommutable>,
5244 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5247 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5248 SDNode VecNode, SDNode SaeNode,
5249 X86SchedWriteSizes sched, bit IsCommutable> {
5250 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5251 VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5252 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5253 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5254 VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5255 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5257 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
5258 SchedWriteFAddSizes, 1>;
5259 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
5260 SchedWriteFMulSizes, 1>;
5261 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
5262 SchedWriteFAddSizes, 0>;
5263 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
5264 SchedWriteFDivSizes, 0>;
5265 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
5266 SchedWriteFCmpSizes, 0>;
5267 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
5268 SchedWriteFCmpSizes, 0>;
5270 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5271 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5272 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5273 X86VectorVTInfo _, SDNode OpNode,
5274 X86FoldableSchedWrite sched> {
5275 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5276 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5277 (ins _.FRC:$src1, _.FRC:$src2),
5278 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5279 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5281 let isCommutable = 1;
5283 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5284 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5285 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5286 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5287 (_.ScalarLdFrag addr:$src2)))]>,
5288 Sched<[sched.Folded, sched.ReadAfterFold]>;
5291 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5292 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5293 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5295 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5296 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5297 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5299 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5300 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5301 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5303 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5304 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5305 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5307 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5308 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5310 bit IsKZCommutable = IsCommutable> {
5311 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5312 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5313 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5314 "$src2, $src1", "$src1, $src2",
5315 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 0,
5317 EVEX_4V, Sched<[sched]>;
5318 let mayLoad = 1 in {
5319 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5320 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5321 "$src2, $src1", "$src1, $src2",
5322 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5323 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5324 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5325 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5326 "${src2}"##_.BroadcastStr##", $src1",
5327 "$src1, ${src2}"##_.BroadcastStr,
5328 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5329 (_.ScalarLdFrag addr:$src2))))>,
5331 Sched<[sched.Folded, sched.ReadAfterFold]>;
5336 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5337 SDPatternOperator OpNodeRnd,
5338 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5339 let ExeDomain = _.ExeDomain in
5340 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5341 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5342 "$rc, $src2, $src1", "$src1, $src2, $rc",
5343 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
5344 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5347 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5348 SDPatternOperator OpNodeRnd,
5349 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5350 let ExeDomain = _.ExeDomain in
5351 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5352 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5353 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5354 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
5355 EVEX_4V, EVEX_B, Sched<[sched]>;
5358 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5359 Predicate prd, X86SchedWriteSizes sched,
5360 bit IsCommutable = 0,
5361 bit IsPD128Commutable = IsCommutable> {
5362 let Predicates = [prd] in {
5363 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5364 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5365 EVEX_CD8<32, CD8VF>;
5366 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5367 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5368 EVEX_CD8<64, CD8VF>;
5371 // Define only if AVX512VL feature is present.
5372 let Predicates = [prd, HasVLX] in {
5373 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5374 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5375 EVEX_CD8<32, CD8VF>;
5376 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5377 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5378 EVEX_CD8<32, CD8VF>;
5379 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5380 sched.PD.XMM, IsPD128Commutable,
5381 IsCommutable>, EVEX_V128, PD, VEX_W,
5382 EVEX_CD8<64, CD8VF>;
5383 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5384 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5385 EVEX_CD8<64, CD8VF>;
5389 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5390 X86SchedWriteSizes sched> {
5391 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5393 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5394 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5396 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5399 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5400 X86SchedWriteSizes sched> {
5401 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5403 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5404 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5406 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5409 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5410 SchedWriteFAddSizes, 1>,
5411 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5412 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5413 SchedWriteFMulSizes, 1>,
5414 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5415 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5416 SchedWriteFAddSizes>,
5417 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5418 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5419 SchedWriteFDivSizes>,
5420 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5421 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5422 SchedWriteFCmpSizes, 0>,
5423 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
5424 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5425 SchedWriteFCmpSizes, 0>,
5426 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
5427 let isCodeGenOnly = 1 in {
5428 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5429 SchedWriteFCmpSizes, 1>;
5430 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5431 SchedWriteFCmpSizes, 1>;
5433 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5434 SchedWriteFLogicSizes, 1>;
5435 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5436 SchedWriteFLogicSizes, 0>;
5437 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5438 SchedWriteFLogicSizes, 1>;
5439 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5440 SchedWriteFLogicSizes, 1>;
5442 // Patterns catch floating point selects with bitcasted integer logic ops.
5443 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5444 X86VectorVTInfo _, Predicate prd> {
5445 let Predicates = [prd] in {
5446 // Masked register-register logical operations.
5447 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5448 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5450 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5451 _.RC:$src1, _.RC:$src2)>;
5452 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5453 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5455 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5457 // Masked register-memory logical operations.
5458 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5459 (bitconvert (_.i64VT (OpNode _.RC:$src1,
5460 (load addr:$src2)))),
5462 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5463 _.RC:$src1, addr:$src2)>;
5464 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5465 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5467 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5469 // Register-broadcast logical operations.
5470 def : Pat<(_.i64VT (OpNode _.RC:$src1,
5471 (bitconvert (_.VT (X86VBroadcast
5472 (_.ScalarLdFrag addr:$src2)))))),
5473 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5474 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5476 (_.i64VT (OpNode _.RC:$src1,
5479 (_.ScalarLdFrag addr:$src2))))))),
5481 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5482 _.RC:$src1, addr:$src2)>;
5483 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5485 (_.i64VT (OpNode _.RC:$src1,
5488 (_.ScalarLdFrag addr:$src2))))))),
5490 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5491 _.RC:$src1, addr:$src2)>;
5495 multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5496 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5497 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5498 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5499 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5500 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5501 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
5504 defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5505 defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5506 defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5507 defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5509 let Predicates = [HasVLX,HasDQI] in {
5510 // Use packed logical operations for scalar ops.
5511 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5513 (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5514 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5516 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5518 (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5519 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5521 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5523 (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5524 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5526 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5528 (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5529 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5532 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5534 (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5535 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5537 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5539 (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5540 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5542 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5544 (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5545 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5547 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5549 (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5550 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5554 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5555 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5556 let ExeDomain = _.ExeDomain in {
5557 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5558 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5559 "$src2, $src1", "$src1, $src2",
5560 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5561 EVEX_4V, Sched<[sched]>;
5562 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5563 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5564 "$src2, $src1", "$src1, $src2",
5565 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
5566 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5567 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5568 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5569 "${src2}"##_.BroadcastStr##", $src1",
5570 "$src1, ${src2}"##_.BroadcastStr,
5571 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5572 (_.ScalarLdFrag addr:$src2))),
5573 (i32 FROUND_CURRENT))>,
5574 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5578 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5579 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5580 let ExeDomain = _.ExeDomain in {
5581 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5582 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5583 "$src2, $src1", "$src1, $src2",
5584 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5586 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5587 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5588 "$src2, $src1", "$src1, $src2",
5589 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
5590 (i32 FROUND_CURRENT))>,
5591 Sched<[sched.Folded, sched.ReadAfterFold]>;
5595 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5596 SDNode OpNode, SDNode OpNodeScal,
5597 X86SchedWriteWidths sched> {
5598 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5599 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5600 EVEX_V512, EVEX_CD8<32, CD8VF>;
5601 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5602 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5603 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5604 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
5605 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
5606 EVEX_4V,EVEX_CD8<32, CD8VT1>;
5607 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
5608 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
5609 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5611 // Define only if AVX512VL feature is present.
5612 let Predicates = [HasVLX] in {
5613 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
5614 EVEX_V128, EVEX_CD8<32, CD8VF>;
5615 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
5616 EVEX_V256, EVEX_CD8<32, CD8VF>;
5617 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
5618 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5619 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
5620 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5623 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
5624 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5626 //===----------------------------------------------------------------------===//
5627 // AVX-512 VPTESTM instructions
5628 //===----------------------------------------------------------------------===//
5630 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5631 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5633 let ExeDomain = _.ExeDomain in {
5634 let isCommutable = 1 in
5635 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5636 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5637 "$src2, $src1", "$src1, $src2",
5638 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5640 EVEX_4V, Sched<[sched]>;
5641 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5642 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5643 "$src2, $src1", "$src1, $src2",
5645 (_.i64VT (and _.RC:$src1,
5646 (bitconvert (_.LdFrag addr:$src2))))),
5648 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5649 Sched<[sched.Folded, sched.ReadAfterFold]>;
5652 // Patterns for compare with 0 that just use the same source twice.
5653 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5654 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr")
5655 _.RC:$src, _.RC:$src))>;
5657 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5658 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk")
5659 _.KRC:$mask, _.RC:$src, _.RC:$src))>;
5662 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5663 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5664 let ExeDomain = _.ExeDomain in
5665 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5666 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5667 "${src2}"##_.BroadcastStr##", $src1",
5668 "$src1, ${src2}"##_.BroadcastStr,
5669 (OpNode (and _.RC:$src1,
5671 (_.ScalarLdFrag addr:$src2))),
5673 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5674 Sched<[sched.Folded, sched.ReadAfterFold]>;
5677 // Use 512bit version to implement 128/256 bit in case NoVLX.
5678 multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
5679 X86VectorVTInfo _, string Name> {
5680 def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5682 (_.KVT (COPY_TO_REGCLASS
5683 (!cast<Instruction>(Name # "Zrr")
5684 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5685 _.RC:$src1, _.SubRegIdx),
5686 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5687 _.RC:$src2, _.SubRegIdx)),
5690 def : Pat<(_.KVT (and _.KRC:$mask,
5691 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5694 (!cast<Instruction>(Name # "Zrrk")
5695 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5696 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5697 _.RC:$src1, _.SubRegIdx),
5698 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5699 _.RC:$src2, _.SubRegIdx)),
5702 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5703 (_.KVT (COPY_TO_REGCLASS
5704 (!cast<Instruction>(Name # "Zrr")
5705 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5706 _.RC:$src, _.SubRegIdx),
5707 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5708 _.RC:$src, _.SubRegIdx)),
5711 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5713 (!cast<Instruction>(Name # "Zrrk")
5714 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5715 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5716 _.RC:$src, _.SubRegIdx),
5717 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5718 _.RC:$src, _.SubRegIdx)),
5722 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5723 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5724 let Predicates = [HasAVX512] in
5725 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>,
5726 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5728 let Predicates = [HasAVX512, HasVLX] in {
5729 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>,
5730 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5731 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>,
5732 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5734 let Predicates = [HasAVX512, NoVLX] in {
5735 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>;
5736 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>;
5740 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5741 X86SchedWriteWidths sched> {
5742 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
5744 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
5745 avx512vl_i64_info>, VEX_W;
5748 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5749 PatFrag OpNode, X86SchedWriteWidths sched> {
5750 let Predicates = [HasBWI] in {
5751 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
5752 v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5753 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
5754 v64i8_info, NAME#"B">, EVEX_V512;
5756 let Predicates = [HasVLX, HasBWI] in {
5758 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
5759 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5760 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
5761 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5762 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
5763 v32i8x_info, NAME#"B">, EVEX_V256;
5764 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
5765 v16i8x_info, NAME#"B">, EVEX_V128;
5768 let Predicates = [HasAVX512, NoVLX] in {
5769 defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
5770 defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
5771 defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
5772 defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">;
5776 // These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
5777 // as commutable here because we already canonicalized all zeros vectors to the
5778 // RHS during lowering.
5779 def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
5780 (setcc node:$src1, node:$src2, SETEQ)>;
5781 def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
5782 (setcc node:$src1, node:$src2, SETNE)>;
5784 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5785 PatFrag OpNode, X86SchedWriteWidths sched> :
5786 avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
5787 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
5789 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
5790 SchedWriteVecLogic>, T8PD;
5791 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
5792 SchedWriteVecLogic>, T8XS;
5794 //===----------------------------------------------------------------------===//
5795 // AVX-512 Shift instructions
5796 //===----------------------------------------------------------------------===//
5798 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5799 string OpcodeStr, SDNode OpNode,
5800 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5801 let ExeDomain = _.ExeDomain in {
5802 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5803 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5804 "$src2, $src1", "$src1, $src2",
5805 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
5807 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5808 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5809 "$src2, $src1", "$src1, $src2",
5810 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5812 Sched<[sched.Folded]>;
5816 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5817 string OpcodeStr, SDNode OpNode,
5818 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5819 let ExeDomain = _.ExeDomain in
5820 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5821 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5822 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5823 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
5824 EVEX_B, Sched<[sched.Folded]>;
5827 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5828 X86FoldableSchedWrite sched, ValueType SrcVT,
5829 PatFrag bc_frag, X86VectorVTInfo _> {
5830 // src2 is always 128-bit
5831 let ExeDomain = _.ExeDomain in {
5832 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5833 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5834 "$src2, $src1", "$src1, $src2",
5835 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5836 AVX512BIBase, EVEX_4V, Sched<[sched]>;
5837 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5838 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5839 "$src2, $src1", "$src1, $src2",
5840 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>,
5842 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5846 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5847 X86SchedWriteWidths sched, ValueType SrcVT,
5848 PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo,
5850 let Predicates = [prd] in
5851 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5852 bc_frag, VTInfo.info512>, EVEX_V512,
5853 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5854 let Predicates = [prd, HasVLX] in {
5855 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5856 bc_frag, VTInfo.info256>, EVEX_V256,
5857 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5858 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5859 bc_frag, VTInfo.info128>, EVEX_V128,
5860 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5864 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5865 string OpcodeStr, SDNode OpNode,
5866 X86SchedWriteWidths sched,
5867 bit NotEVEX2VEXConvertibleQ = 0> {
5868 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5869 bc_v4i32, avx512vl_i32_info, HasAVX512>;
5870 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5871 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5872 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5873 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5874 bc_v2i64, avx512vl_i16_info, HasBWI>;
5877 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5878 string OpcodeStr, SDNode OpNode,
5879 X86SchedWriteWidths sched,
5880 AVX512VLVectorVTInfo VTInfo> {
5881 let Predicates = [HasAVX512] in
5882 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5883 sched.ZMM, VTInfo.info512>,
5884 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5885 VTInfo.info512>, EVEX_V512;
5886 let Predicates = [HasAVX512, HasVLX] in {
5887 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5888 sched.YMM, VTInfo.info256>,
5889 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5890 VTInfo.info256>, EVEX_V256;
5891 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5892 sched.XMM, VTInfo.info128>,
5893 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5894 VTInfo.info128>, EVEX_V128;
5898 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5899 string OpcodeStr, SDNode OpNode,
5900 X86SchedWriteWidths sched> {
5901 let Predicates = [HasBWI] in
5902 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5903 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5904 let Predicates = [HasVLX, HasBWI] in {
5905 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5906 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5907 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5908 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5912 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5913 Format ImmFormR, Format ImmFormM,
5914 string OpcodeStr, SDNode OpNode,
5915 X86SchedWriteWidths sched,
5916 bit NotEVEX2VEXConvertibleQ = 0> {
5917 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5918 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5919 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5920 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5921 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5924 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5925 SchedWriteVecShiftImm>,
5926 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5927 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5929 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5930 SchedWriteVecShiftImm>,
5931 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5932 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5934 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5935 SchedWriteVecShiftImm, 1>,
5936 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5937 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5939 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5940 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5941 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5942 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5944 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5945 SchedWriteVecShift>;
5946 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5947 SchedWriteVecShift, 1>;
5948 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5949 SchedWriteVecShift>;
5951 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5952 let Predicates = [HasAVX512, NoVLX] in {
5953 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5954 (EXTRACT_SUBREG (v8i64
5956 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5957 VR128X:$src2)), sub_ymm)>;
5959 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5960 (EXTRACT_SUBREG (v8i64
5962 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5963 VR128X:$src2)), sub_xmm)>;
5965 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5966 (EXTRACT_SUBREG (v8i64
5968 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5969 imm:$src2)), sub_ymm)>;
5971 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5972 (EXTRACT_SUBREG (v8i64
5974 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5975 imm:$src2)), sub_xmm)>;
5978 //===-------------------------------------------------------------------===//
5979 // Variable Bit Shifts
5980 //===-------------------------------------------------------------------===//
5982 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5983 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5984 let ExeDomain = _.ExeDomain in {
5985 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5986 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5987 "$src2, $src1", "$src1, $src2",
5988 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5989 AVX5128IBase, EVEX_4V, Sched<[sched]>;
5990 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5991 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5992 "$src2, $src1", "$src1, $src2",
5993 (_.VT (OpNode _.RC:$src1,
5994 (_.VT (bitconvert (_.LdFrag addr:$src2)))))>,
5995 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5996 Sched<[sched.Folded, sched.ReadAfterFold]>;
6000 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6001 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6002 let ExeDomain = _.ExeDomain in
6003 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6004 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6005 "${src2}"##_.BroadcastStr##", $src1",
6006 "$src1, ${src2}"##_.BroadcastStr,
6007 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
6008 (_.ScalarLdFrag addr:$src2)))))>,
6009 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6010 Sched<[sched.Folded, sched.ReadAfterFold]>;
6013 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6014 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6015 let Predicates = [HasAVX512] in
6016 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6017 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6019 let Predicates = [HasAVX512, HasVLX] in {
6020 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6021 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6022 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6023 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6027 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6028 SDNode OpNode, X86SchedWriteWidths sched> {
6029 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6031 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6032 avx512vl_i64_info>, VEX_W;
6035 // Use 512bit version to implement 128/256 bit in case NoVLX.
6036 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6037 SDNode OpNode, list<Predicate> p> {
6038 let Predicates = p in {
6039 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6040 (_.info256.VT _.info256.RC:$src2))),
6042 (!cast<Instruction>(OpcodeStr#"Zrr")
6043 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6044 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6047 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6048 (_.info128.VT _.info128.RC:$src2))),
6050 (!cast<Instruction>(OpcodeStr#"Zrr")
6051 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6052 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6056 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6057 SDNode OpNode, X86SchedWriteWidths sched> {
6058 let Predicates = [HasBWI] in
6059 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6061 let Predicates = [HasVLX, HasBWI] in {
6063 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6065 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6070 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>,
6071 avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>;
6073 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>,
6074 avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>;
6076 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>,
6077 avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>;
6079 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6080 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6082 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
6083 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
6084 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
6085 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
6087 // Special handing for handling VPSRAV intrinsics.
6088 multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
6089 list<Predicate> p> {
6090 let Predicates = p in {
6091 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
6092 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
6094 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
6095 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
6096 _.RC:$src1, addr:$src2)>;
6097 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6098 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
6099 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
6100 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
6101 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6102 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6104 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
6105 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6106 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6107 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
6108 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
6109 _.RC:$src1, _.RC:$src2)>;
6110 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6111 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6113 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
6114 _.RC:$src1, addr:$src2)>;
6118 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
6119 list<Predicate> p> :
6120 avx512_var_shift_int_lowering<InstrStr, _, p> {
6121 let Predicates = p in {
6122 def : Pat<(_.VT (X86vsrav _.RC:$src1,
6123 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
6124 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
6125 _.RC:$src1, addr:$src2)>;
6126 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6127 (X86vsrav _.RC:$src1,
6128 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6130 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
6131 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6132 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6133 (X86vsrav _.RC:$src1,
6134 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6136 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
6137 _.RC:$src1, addr:$src2)>;
6141 defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
6142 defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
6143 defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
6144 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
6145 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
6146 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
6147 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
6148 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
6149 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
6151 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6152 let Predicates = [HasAVX512, NoVLX] in {
6153 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6154 (EXTRACT_SUBREG (v8i64
6156 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6157 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6159 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6160 (EXTRACT_SUBREG (v8i64
6162 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6163 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6166 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6167 (EXTRACT_SUBREG (v16i32
6169 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6170 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6172 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6173 (EXTRACT_SUBREG (v16i32
6175 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6176 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6179 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6180 (EXTRACT_SUBREG (v8i64
6182 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6183 imm:$src2)), sub_xmm)>;
6184 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6185 (EXTRACT_SUBREG (v8i64
6187 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6188 imm:$src2)), sub_ymm)>;
6190 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6191 (EXTRACT_SUBREG (v16i32
6193 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6194 imm:$src2)), sub_xmm)>;
6195 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6196 (EXTRACT_SUBREG (v16i32
6198 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6199 imm:$src2)), sub_ymm)>;
6202 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6203 let Predicates = [HasAVX512, NoVLX] in {
6204 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6205 (EXTRACT_SUBREG (v8i64
6207 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6208 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6210 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6211 (EXTRACT_SUBREG (v8i64
6213 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6214 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6217 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6218 (EXTRACT_SUBREG (v16i32
6220 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6221 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6223 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6224 (EXTRACT_SUBREG (v16i32
6226 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6227 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6230 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6231 (EXTRACT_SUBREG (v8i64
6233 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6234 imm:$src2)), sub_xmm)>;
6235 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6236 (EXTRACT_SUBREG (v8i64
6238 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6239 imm:$src2)), sub_ymm)>;
6241 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6242 (EXTRACT_SUBREG (v16i32
6244 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6245 imm:$src2)), sub_xmm)>;
6246 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6247 (EXTRACT_SUBREG (v16i32
6249 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6250 imm:$src2)), sub_ymm)>;
6253 //===-------------------------------------------------------------------===//
6254 // 1-src variable permutation VPERMW/D/Q
6255 //===-------------------------------------------------------------------===//
6257 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6258 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6259 let Predicates = [HasAVX512] in
6260 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6261 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6263 let Predicates = [HasAVX512, HasVLX] in
6264 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6265 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6268 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6269 string OpcodeStr, SDNode OpNode,
6270 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6271 let Predicates = [HasAVX512] in
6272 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6273 sched, VTInfo.info512>,
6274 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6275 sched, VTInfo.info512>, EVEX_V512;
6276 let Predicates = [HasAVX512, HasVLX] in
6277 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6278 sched, VTInfo.info256>,
6279 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6280 sched, VTInfo.info256>, EVEX_V256;
6283 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6284 Predicate prd, SDNode OpNode,
6285 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6286 let Predicates = [prd] in
6287 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6289 let Predicates = [HasVLX, prd] in {
6290 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6292 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6297 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6298 WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6299 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6300 WriteVarShuffle256, avx512vl_i8_info>;
6302 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6303 WriteVarShuffle256, avx512vl_i32_info>;
6304 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6305 WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6306 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6307 WriteFVarShuffle256, avx512vl_f32_info>;
6308 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6309 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6311 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6312 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6313 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6314 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6315 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6316 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6318 //===----------------------------------------------------------------------===//
6319 // AVX-512 - VPERMIL
6320 //===----------------------------------------------------------------------===//
6322 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6323 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6324 X86VectorVTInfo Ctrl> {
6325 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6326 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6327 "$src2, $src1", "$src1, $src2",
6328 (_.VT (OpNode _.RC:$src1,
6329 (Ctrl.VT Ctrl.RC:$src2)))>,
6330 T8PD, EVEX_4V, Sched<[sched]>;
6331 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6332 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6333 "$src2, $src1", "$src1, $src2",
6336 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
6337 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6338 Sched<[sched.Folded, sched.ReadAfterFold]>;
6339 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6340 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6341 "${src2}"##_.BroadcastStr##", $src1",
6342 "$src1, ${src2}"##_.BroadcastStr,
6345 (Ctrl.VT (X86VBroadcast
6346 (Ctrl.ScalarLdFrag addr:$src2)))))>,
6347 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6348 Sched<[sched.Folded, sched.ReadAfterFold]>;
6351 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6352 X86SchedWriteWidths sched,
6353 AVX512VLVectorVTInfo _,
6354 AVX512VLVectorVTInfo Ctrl> {
6355 let Predicates = [HasAVX512] in {
6356 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6357 _.info512, Ctrl.info512>, EVEX_V512;
6359 let Predicates = [HasAVX512, HasVLX] in {
6360 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6361 _.info128, Ctrl.info128>, EVEX_V128;
6362 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6363 _.info256, Ctrl.info256>, EVEX_V256;
6367 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6368 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6369 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6371 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6372 X86VPermilpi, SchedWriteFShuffle, _>,
6373 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6376 let ExeDomain = SSEPackedSingle in
6377 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6379 let ExeDomain = SSEPackedDouble in
6380 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6381 avx512vl_i64_info>, VEX_W1X;
6383 //===----------------------------------------------------------------------===//
6384 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6385 //===----------------------------------------------------------------------===//
6387 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6388 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6389 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6390 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6391 X86PShufhw, SchedWriteShuffle>,
6392 EVEX, AVX512XSIi8Base;
6393 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6394 X86PShuflw, SchedWriteShuffle>,
6395 EVEX, AVX512XDIi8Base;
6397 //===----------------------------------------------------------------------===//
6398 // AVX-512 - VPSHUFB
6399 //===----------------------------------------------------------------------===//
6401 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6402 X86SchedWriteWidths sched> {
6403 let Predicates = [HasBWI] in
6404 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6407 let Predicates = [HasVLX, HasBWI] in {
6408 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6410 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6415 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6416 SchedWriteVarShuffle>, VEX_WIG;
6418 //===----------------------------------------------------------------------===//
6419 // Move Low to High and High to Low packed FP Instructions
6420 //===----------------------------------------------------------------------===//
6422 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6423 (ins VR128X:$src1, VR128X:$src2),
6424 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6425 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6426 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6427 let isCommutable = 1 in
6428 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6429 (ins VR128X:$src1, VR128X:$src2),
6430 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6431 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6432 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6434 //===----------------------------------------------------------------------===//
6435 // VMOVHPS/PD VMOVLPS Instructions
6436 // All patterns was taken from SSS implementation.
6437 //===----------------------------------------------------------------------===//
6439 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6440 SDPatternOperator OpNode,
6441 X86VectorVTInfo _> {
6442 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6443 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6444 (ins _.RC:$src1, f64mem:$src2),
6445 !strconcat(OpcodeStr,
6446 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6450 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6451 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6454 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6455 // SSE1. And MOVLPS pattern is even more complex.
6456 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6457 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6458 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6459 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6460 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6461 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6462 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6463 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6465 let Predicates = [HasAVX512] in {
6467 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6468 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6469 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6472 let SchedRW = [WriteFStore] in {
6473 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6474 (ins f64mem:$dst, VR128X:$src),
6475 "vmovhps\t{$src, $dst|$dst, $src}",
6476 [(store (f64 (extractelt
6477 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6478 (bc_v2f64 (v4f32 VR128X:$src))),
6479 (iPTR 0))), addr:$dst)]>,
6480 EVEX, EVEX_CD8<32, CD8VT2>;
6481 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6482 (ins f64mem:$dst, VR128X:$src),
6483 "vmovhpd\t{$src, $dst|$dst, $src}",
6484 [(store (f64 (extractelt
6485 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6486 (iPTR 0))), addr:$dst)]>,
6487 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6488 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6489 (ins f64mem:$dst, VR128X:$src),
6490 "vmovlps\t{$src, $dst|$dst, $src}",
6491 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
6492 (iPTR 0))), addr:$dst)]>,
6493 EVEX, EVEX_CD8<32, CD8VT2>;
6494 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6495 (ins f64mem:$dst, VR128X:$src),
6496 "vmovlpd\t{$src, $dst|$dst, $src}",
6497 [(store (f64 (extractelt (v2f64 VR128X:$src),
6498 (iPTR 0))), addr:$dst)]>,
6499 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6502 let Predicates = [HasAVX512] in {
6504 def : Pat<(store (f64 (extractelt
6505 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6506 (iPTR 0))), addr:$dst),
6507 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6509 //===----------------------------------------------------------------------===//
6510 // FMA - Fused Multiply Operations
6513 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6514 X86FoldableSchedWrite sched,
6515 X86VectorVTInfo _, string Suff> {
6516 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6517 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6518 (ins _.RC:$src2, _.RC:$src3),
6519 OpcodeStr, "$src3, $src2", "$src2, $src3",
6520 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6521 AVX512FMA3Base, Sched<[sched]>;
6523 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6524 (ins _.RC:$src2, _.MemOp:$src3),
6525 OpcodeStr, "$src3, $src2", "$src2, $src3",
6526 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6527 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6529 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6530 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6531 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6532 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6534 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6535 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6539 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6540 X86FoldableSchedWrite sched,
6541 X86VectorVTInfo _, string Suff> {
6542 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6543 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6544 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6545 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6546 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
6547 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6550 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6551 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6552 AVX512VLVectorVTInfo _, string Suff> {
6553 let Predicates = [HasAVX512] in {
6554 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6556 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6558 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6560 let Predicates = [HasVLX, HasAVX512] in {
6561 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6563 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6564 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6566 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6570 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6572 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6573 SchedWriteFMA, avx512vl_f32_info, "PS">;
6574 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6575 SchedWriteFMA, avx512vl_f64_info, "PD">,
6579 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6580 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6581 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6582 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6583 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6584 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6587 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6588 X86FoldableSchedWrite sched,
6589 X86VectorVTInfo _, string Suff> {
6590 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6591 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6592 (ins _.RC:$src2, _.RC:$src3),
6593 OpcodeStr, "$src3, $src2", "$src2, $src3",
6594 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6595 vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6597 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6598 (ins _.RC:$src2, _.MemOp:$src3),
6599 OpcodeStr, "$src3, $src2", "$src2, $src3",
6600 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6601 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6603 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6604 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6605 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6606 "$src2, ${src3}"##_.BroadcastStr,
6607 (_.VT (OpNode _.RC:$src2,
6608 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6609 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6610 Sched<[sched.Folded, sched.ReadAfterFold]>;
6614 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6615 X86FoldableSchedWrite sched,
6616 X86VectorVTInfo _, string Suff> {
6617 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6618 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6619 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6620 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6621 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6623 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6626 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6627 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6628 AVX512VLVectorVTInfo _, string Suff> {
6629 let Predicates = [HasAVX512] in {
6630 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6632 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6634 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6636 let Predicates = [HasVLX, HasAVX512] in {
6637 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6639 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6640 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6642 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6646 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6647 SDNode OpNodeRnd > {
6648 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6649 SchedWriteFMA, avx512vl_f32_info, "PS">;
6650 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6651 SchedWriteFMA, avx512vl_f64_info, "PD">,
6655 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6656 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6657 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6658 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6659 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6660 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6662 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6663 X86FoldableSchedWrite sched,
6664 X86VectorVTInfo _, string Suff> {
6665 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6666 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6667 (ins _.RC:$src2, _.RC:$src3),
6668 OpcodeStr, "$src3, $src2", "$src2, $src3",
6669 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6670 AVX512FMA3Base, Sched<[sched]>;
6672 // Pattern is 312 order so that the load is in a different place from the
6673 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6674 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6675 (ins _.RC:$src2, _.MemOp:$src3),
6676 OpcodeStr, "$src3, $src2", "$src2, $src3",
6677 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6678 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6680 // Pattern is 312 order so that the load is in a different place from the
6681 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6682 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6683 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6684 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6685 "$src2, ${src3}"##_.BroadcastStr,
6686 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6687 _.RC:$src1, _.RC:$src2)), 1, 0>,
6688 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6692 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6693 X86FoldableSchedWrite sched,
6694 X86VectorVTInfo _, string Suff> {
6695 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6696 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6697 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6698 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6699 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6701 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6704 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6705 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6706 AVX512VLVectorVTInfo _, string Suff> {
6707 let Predicates = [HasAVX512] in {
6708 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6710 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6712 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6714 let Predicates = [HasVLX, HasAVX512] in {
6715 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6717 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6718 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6720 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6724 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6725 SDNode OpNodeRnd > {
6726 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6727 SchedWriteFMA, avx512vl_f32_info, "PS">;
6728 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6729 SchedWriteFMA, avx512vl_f64_info, "PD">,
6733 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6734 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6735 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6736 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6737 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6738 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6741 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6742 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6743 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6744 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6745 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6746 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6747 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6750 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6751 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6752 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6753 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6755 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6756 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6757 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6758 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6760 let isCodeGenOnly = 1, isCommutable = 1 in {
6761 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6762 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6763 !strconcat(OpcodeStr,
6764 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6765 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6766 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6767 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6768 !strconcat(OpcodeStr,
6769 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6770 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6772 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6773 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6774 !strconcat(OpcodeStr,
6775 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6776 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6777 Sched<[SchedWriteFMA.Scl]>;
6778 }// isCodeGenOnly = 1
6779 }// Constraints = "$src1 = $dst"
6782 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6783 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6784 X86VectorVTInfo _, string SUFF> {
6785 let ExeDomain = _.ExeDomain in {
6786 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6787 // Operands for intrinsic are in 123 order to preserve passthu
6789 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6791 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6792 (_.ScalarLdFrag addr:$src3)))),
6793 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6794 _.FRC:$src3, (i32 imm:$rc)))), 0>;
6796 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6797 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6799 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6800 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6801 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6802 _.FRC:$src1, (i32 imm:$rc)))), 1>;
6804 // One pattern is 312 order so that the load is in a different place from the
6805 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6806 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6807 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6809 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6810 _.FRC:$src1, _.FRC:$src2))),
6811 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6812 _.FRC:$src2, (i32 imm:$rc)))), 1>;
6816 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6817 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6818 let Predicates = [HasAVX512] in {
6819 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6820 OpNodeRnd, f32x_info, "SS">,
6821 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6822 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6823 OpNodeRnd, f64x_info, "SD">,
6824 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6828 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6829 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6830 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6831 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6833 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6834 string Suffix, SDNode Move,
6835 X86VectorVTInfo _, PatLeaf ZeroFP> {
6836 let Predicates = [HasAVX512] in {
6837 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6839 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6841 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6842 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6843 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6845 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6846 (Op _.FRC:$src2, _.FRC:$src3,
6847 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6848 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6849 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6850 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6852 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6854 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6855 (_.ScalarLdFrag addr:$src3)))))),
6856 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6857 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6860 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6861 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6862 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6863 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6864 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6867 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6868 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6869 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6870 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6871 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6874 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6875 (X86selects VK1WM:$mask,
6877 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6879 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6880 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6881 VR128X:$src1, VK1WM:$mask,
6882 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6883 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6885 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6886 (X86selects VK1WM:$mask,
6888 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6889 (_.ScalarLdFrag addr:$src3)),
6890 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6891 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6892 VR128X:$src1, VK1WM:$mask,
6893 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6895 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6896 (X86selects VK1WM:$mask,
6897 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6898 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6899 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6900 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6901 VR128X:$src1, VK1WM:$mask,
6902 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6904 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6905 (X86selects VK1WM:$mask,
6906 (Op _.FRC:$src2, _.FRC:$src3,
6907 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6908 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6909 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6910 VR128X:$src1, VK1WM:$mask,
6911 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6912 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6914 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6915 (X86selects VK1WM:$mask,
6916 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6917 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6918 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6919 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6920 VR128X:$src1, VK1WM:$mask,
6921 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6923 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6924 (X86selects VK1WM:$mask,
6926 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6928 (_.EltVT ZeroFP)))))),
6929 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6930 VR128X:$src1, VK1WM:$mask,
6931 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6932 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6934 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6935 (X86selects VK1WM:$mask,
6936 (Op _.FRC:$src2, _.FRC:$src3,
6937 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6938 (_.EltVT ZeroFP)))))),
6939 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6940 VR128X:$src1, VK1WM:$mask,
6941 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6942 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6944 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6945 (X86selects VK1WM:$mask,
6947 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6948 (_.ScalarLdFrag addr:$src3)),
6949 (_.EltVT ZeroFP)))))),
6950 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6951 VR128X:$src1, VK1WM:$mask,
6952 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6954 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6955 (X86selects VK1WM:$mask,
6956 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6957 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6958 (_.EltVT ZeroFP)))))),
6959 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6960 VR128X:$src1, VK1WM:$mask,
6961 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6963 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6964 (X86selects VK1WM:$mask,
6965 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6966 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6967 (_.EltVT ZeroFP)))))),
6968 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6969 VR128X:$src1, VK1WM:$mask,
6970 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6972 // Patterns with rounding mode.
6973 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6975 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6976 _.FRC:$src3, (i32 imm:$rc)))))),
6977 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6978 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6979 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
6981 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6982 (RndOp _.FRC:$src2, _.FRC:$src3,
6983 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6985 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6986 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6987 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
6989 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6990 (X86selects VK1WM:$mask,
6992 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6993 _.FRC:$src3, (i32 imm:$rc)),
6994 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6995 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6996 VR128X:$src1, VK1WM:$mask,
6997 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6998 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7000 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7001 (X86selects VK1WM:$mask,
7002 (RndOp _.FRC:$src2, _.FRC:$src3,
7003 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7005 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7006 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7007 VR128X:$src1, VK1WM:$mask,
7008 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7009 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7011 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7012 (X86selects VK1WM:$mask,
7014 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7015 _.FRC:$src3, (i32 imm:$rc)),
7016 (_.EltVT ZeroFP)))))),
7017 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7018 VR128X:$src1, VK1WM:$mask,
7019 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7020 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7022 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7023 (X86selects VK1WM:$mask,
7024 (RndOp _.FRC:$src2, _.FRC:$src3,
7025 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7027 (_.EltVT ZeroFP)))))),
7028 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7029 VR128X:$src1, VK1WM:$mask,
7030 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7031 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7035 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
7036 X86Movss, v4f32x_info, fp32imm0>;
7037 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
7038 X86Movss, v4f32x_info, fp32imm0>;
7039 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
7040 X86Movss, v4f32x_info, fp32imm0>;
7041 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
7042 X86Movss, v4f32x_info, fp32imm0>;
7044 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
7045 X86Movsd, v2f64x_info, fp64imm0>;
7046 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
7047 X86Movsd, v2f64x_info, fp64imm0>;
7048 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
7049 X86Movsd, v2f64x_info, fp64imm0>;
7050 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
7051 X86Movsd, v2f64x_info, fp64imm0>;
7053 //===----------------------------------------------------------------------===//
7054 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7055 //===----------------------------------------------------------------------===//
7056 let Constraints = "$src1 = $dst" in {
7057 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7058 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7059 // NOTE: The SDNode have the multiply operands first with the add last.
7060 // This enables commuted load patterns to be autogenerated by tablegen.
7061 let ExeDomain = _.ExeDomain in {
7062 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7063 (ins _.RC:$src2, _.RC:$src3),
7064 OpcodeStr, "$src3, $src2", "$src2, $src3",
7065 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7066 AVX512FMA3Base, Sched<[sched]>;
7068 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7069 (ins _.RC:$src2, _.MemOp:$src3),
7070 OpcodeStr, "$src3, $src2", "$src2, $src3",
7071 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7072 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
7074 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7075 (ins _.RC:$src2, _.ScalarMemOp:$src3),
7076 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
7077 !strconcat("$src2, ${src3}", _.BroadcastStr ),
7079 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
7081 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7084 } // Constraints = "$src1 = $dst"
7086 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7087 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7088 let Predicates = [HasIFMA] in {
7089 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7090 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7092 let Predicates = [HasVLX, HasIFMA] in {
7093 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7094 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7095 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7096 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7100 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7101 SchedWriteVecIMul, avx512vl_i64_info>,
7103 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7104 SchedWriteVecIMul, avx512vl_i64_info>,
7107 //===----------------------------------------------------------------------===//
7108 // AVX-512 Scalar convert from sign integer to float/double
7109 //===----------------------------------------------------------------------===//
7111 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched,
7112 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7113 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7114 let hasSideEffects = 0 in {
7115 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7116 (ins DstVT.FRC:$src1, SrcRC:$src),
7117 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7118 EVEX_4V, Sched<[sched]>;
7120 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7121 (ins DstVT.FRC:$src1, x86memop:$src),
7122 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7123 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7124 } // hasSideEffects = 0
7125 let isCodeGenOnly = 1 in {
7126 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7127 (ins DstVT.RC:$src1, SrcRC:$src2),
7128 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7129 [(set DstVT.RC:$dst,
7130 (OpNode (DstVT.VT DstVT.RC:$src1),
7132 (i32 FROUND_CURRENT)))]>,
7133 EVEX_4V, Sched<[sched]>;
7135 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7136 (ins DstVT.RC:$src1, x86memop:$src2),
7137 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7138 [(set DstVT.RC:$dst,
7139 (OpNode (DstVT.VT DstVT.RC:$src1),
7140 (ld_frag addr:$src2),
7141 (i32 FROUND_CURRENT)))]>,
7142 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7143 }//isCodeGenOnly = 1
7146 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7147 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7148 X86VectorVTInfo DstVT, string asm> {
7149 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7150 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7152 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7153 [(set DstVT.RC:$dst,
7154 (OpNode (DstVT.VT DstVT.RC:$src1),
7157 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
7160 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode,
7161 X86FoldableSchedWrite sched,
7162 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7163 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7164 defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>,
7165 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7166 ld_frag, asm>, VEX_LIG;
7169 let Predicates = [HasAVX512] in {
7170 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32,
7171 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
7172 XS, EVEX_CD8<32, CD8VT1>;
7173 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64,
7174 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
7175 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7176 defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32,
7177 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
7178 XD, EVEX_CD8<32, CD8VT1>;
7179 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64,
7180 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
7181 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7183 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7184 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7185 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7186 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7188 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7189 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7190 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7191 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7192 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7193 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7194 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7195 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7197 def : Pat<(f32 (sint_to_fp GR32:$src)),
7198 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7199 def : Pat<(f32 (sint_to_fp GR64:$src)),
7200 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7201 def : Pat<(f64 (sint_to_fp GR32:$src)),
7202 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7203 def : Pat<(f64 (sint_to_fp GR64:$src)),
7204 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7206 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32,
7207 v4f32x_info, i32mem, loadi32,
7208 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
7209 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64,
7210 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
7211 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7212 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info,
7213 i32mem, loadi32, "cvtusi2sd{l}">,
7214 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7215 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64,
7216 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
7217 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7219 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7220 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7221 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7222 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7224 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7225 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7226 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7227 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7228 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7229 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7230 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7231 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7233 def : Pat<(f32 (uint_to_fp GR32:$src)),
7234 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7235 def : Pat<(f32 (uint_to_fp GR64:$src)),
7236 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7237 def : Pat<(f64 (uint_to_fp GR32:$src)),
7238 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7239 def : Pat<(f64 (uint_to_fp GR64:$src)),
7240 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7243 //===----------------------------------------------------------------------===//
7244 // AVX-512 Scalar convert from float/double to integer
7245 //===----------------------------------------------------------------------===//
7247 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7248 X86VectorVTInfo DstVT, SDNode OpNode,
7250 X86FoldableSchedWrite sched, string asm,
7252 bit CodeGenOnly = 1> {
7253 let Predicates = [HasAVX512] in {
7254 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7255 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7256 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7257 EVEX, VEX_LIG, Sched<[sched]>;
7258 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7259 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7260 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
7261 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7263 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7264 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7265 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7266 [(set DstVT.RC:$dst, (OpNode
7267 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7268 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7270 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7271 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7272 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7273 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7274 } // Predicates = [HasAVX512]
7277 multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
7278 X86VectorVTInfo DstVT, SDNode OpNode,
7280 X86FoldableSchedWrite sched, string asm,
7282 avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, OpNodeRnd, sched, asm, aliasStr, 0> {
7283 let Predicates = [HasAVX512] in {
7284 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7285 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7286 SrcVT.IntScalarMemOp:$src), 0, "att">;
7287 } // Predicates = [HasAVX512]
7290 // Convert float/double to signed/unsigned int 32/64
7291 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7292 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7293 XS, EVEX_CD8<32, CD8VT1>;
7294 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7295 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7296 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7297 defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, X86cvts2usi,
7298 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7299 XS, EVEX_CD8<32, CD8VT1>;
7300 defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, X86cvts2usi,
7301 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7302 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7303 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7304 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7305 XD, EVEX_CD8<64, CD8VT1>;
7306 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7307 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7308 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7309 defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, X86cvts2usi,
7310 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7311 XD, EVEX_CD8<64, CD8VT1>;
7312 defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, X86cvts2usi,
7313 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7314 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7316 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7317 // which produce unnecessary vmovs{s,d} instructions
7318 let Predicates = [HasAVX512] in {
7319 def : Pat<(v4f32 (X86Movss
7320 (v4f32 VR128X:$dst),
7321 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7322 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7324 def : Pat<(v4f32 (X86Movss
7325 (v4f32 VR128X:$dst),
7326 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7327 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7329 def : Pat<(v4f32 (X86Movss
7330 (v4f32 VR128X:$dst),
7331 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7332 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7334 def : Pat<(v4f32 (X86Movss
7335 (v4f32 VR128X:$dst),
7336 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7337 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7339 def : Pat<(v2f64 (X86Movsd
7340 (v2f64 VR128X:$dst),
7341 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7342 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7344 def : Pat<(v2f64 (X86Movsd
7345 (v2f64 VR128X:$dst),
7346 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7347 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7349 def : Pat<(v2f64 (X86Movsd
7350 (v2f64 VR128X:$dst),
7351 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7352 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7354 def : Pat<(v2f64 (X86Movsd
7355 (v2f64 VR128X:$dst),
7356 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7357 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7359 def : Pat<(v4f32 (X86Movss
7360 (v4f32 VR128X:$dst),
7361 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7362 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7364 def : Pat<(v4f32 (X86Movss
7365 (v4f32 VR128X:$dst),
7366 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7367 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7369 def : Pat<(v4f32 (X86Movss
7370 (v4f32 VR128X:$dst),
7371 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7372 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7374 def : Pat<(v4f32 (X86Movss
7375 (v4f32 VR128X:$dst),
7376 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7377 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7379 def : Pat<(v2f64 (X86Movsd
7380 (v2f64 VR128X:$dst),
7381 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7382 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7384 def : Pat<(v2f64 (X86Movsd
7385 (v2f64 VR128X:$dst),
7386 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7387 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7389 def : Pat<(v2f64 (X86Movsd
7390 (v2f64 VR128X:$dst),
7391 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7392 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7394 def : Pat<(v2f64 (X86Movsd
7395 (v2f64 VR128X:$dst),
7396 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7397 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7398 } // Predicates = [HasAVX512]
7400 // Convert float/double to signed/unsigned int 32/64 with truncation
7401 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7402 X86VectorVTInfo _DstRC, SDNode OpNode,
7403 SDNode OpNodeInt, SDNode OpNodeRnd,
7404 X86FoldableSchedWrite sched, string aliasStr,
7405 bit CodeGenOnly = 1>{
7406 let Predicates = [HasAVX512] in {
7407 let isCodeGenOnly = 1 in {
7408 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7409 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7410 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7411 EVEX, Sched<[sched]>;
7412 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7413 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7414 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7415 EVEX, Sched<[sched.Folded, sched.ReadAfterFold]>;
7418 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7419 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7420 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7421 EVEX, VEX_LIG, Sched<[sched]>;
7422 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7423 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7424 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
7425 (i32 FROUND_NO_EXC)))]>,
7426 EVEX,VEX_LIG , EVEX_B, Sched<[sched]>;
7427 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7428 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7429 (ins _SrcRC.IntScalarMemOp:$src),
7430 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7431 [(set _DstRC.RC:$dst,
7432 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7433 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7435 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7436 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7437 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7438 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7442 multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
7443 X86VectorVTInfo _SrcRC,
7444 X86VectorVTInfo _DstRC, SDNode OpNode,
7445 SDNode OpNodeInt, SDNode OpNodeRnd,
7446 X86FoldableSchedWrite sched,
7448 avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeInt, OpNodeRnd, sched,
7450 let Predicates = [HasAVX512] in {
7451 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7452 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7453 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7457 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7458 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
7459 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7460 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7461 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
7462 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7463 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7464 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
7465 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7466 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7467 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
7468 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7470 defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
7471 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
7472 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7473 defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
7474 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
7475 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7476 defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7477 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
7478 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7479 defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7480 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
7481 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7483 //===----------------------------------------------------------------------===//
7484 // AVX-512 Convert form float to double and back
7485 //===----------------------------------------------------------------------===//
7487 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7488 X86VectorVTInfo _Src, SDNode OpNode,
7489 X86FoldableSchedWrite sched> {
7490 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7491 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7492 "$src2, $src1", "$src1, $src2",
7493 (_.VT (OpNode (_.VT _.RC:$src1),
7494 (_Src.VT _Src.RC:$src2),
7495 (i32 FROUND_CURRENT)))>,
7496 EVEX_4V, VEX_LIG, Sched<[sched]>;
7497 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7498 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7499 "$src2, $src1", "$src1, $src2",
7500 (_.VT (OpNode (_.VT _.RC:$src1),
7501 (_Src.VT _Src.ScalarIntMemCPat:$src2),
7502 (i32 FROUND_CURRENT)))>,
7504 Sched<[sched.Folded, sched.ReadAfterFold]>;
7506 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7507 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7508 (ins _.FRC:$src1, _Src.FRC:$src2),
7509 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7510 EVEX_4V, VEX_LIG, Sched<[sched]>;
7512 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7513 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7514 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7515 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7519 // Scalar Coversion with SAE - suppress all exceptions
7520 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7521 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7522 X86FoldableSchedWrite sched> {
7523 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7524 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7525 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7526 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7527 (_Src.VT _Src.RC:$src2),
7528 (i32 FROUND_NO_EXC)))>,
7529 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7532 // Scalar Conversion with rounding control (RC)
7533 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7534 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7535 X86FoldableSchedWrite sched> {
7536 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7537 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7538 "$rc, $src2, $src1", "$src1, $src2, $rc",
7539 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7540 (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
7541 EVEX_4V, VEX_LIG, Sched<[sched]>,
7544 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7545 SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7546 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7547 let Predicates = [HasAVX512] in {
7548 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7549 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7550 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7554 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
7555 X86FoldableSchedWrite sched,
7556 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7557 let Predicates = [HasAVX512] in {
7558 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7559 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7560 EVEX_CD8<32, CD8VT1>, XS;
7563 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
7564 X86froundRnd, WriteCvtSD2SS, f64x_info,
7566 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
7567 X86fpextRnd, WriteCvtSS2SD, f32x_info,
7570 def : Pat<(f64 (fpextend FR32X:$src)),
7571 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7572 Requires<[HasAVX512]>;
7573 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7574 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7575 Requires<[HasAVX512, OptForSize]>;
7577 def : Pat<(f64 (extloadf32 addr:$src)),
7578 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7579 Requires<[HasAVX512, OptForSize]>;
7581 def : Pat<(f64 (extloadf32 addr:$src)),
7582 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
7583 Requires<[HasAVX512, OptForSpeed]>;
7585 def : Pat<(f32 (fpround FR64X:$src)),
7586 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7587 Requires<[HasAVX512]>;
7589 def : Pat<(v4f32 (X86Movss
7590 (v4f32 VR128X:$dst),
7591 (v4f32 (scalar_to_vector
7592 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7593 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7594 Requires<[HasAVX512]>;
7596 def : Pat<(v2f64 (X86Movsd
7597 (v2f64 VR128X:$dst),
7598 (v2f64 (scalar_to_vector
7599 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7600 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7601 Requires<[HasAVX512]>;
7603 //===----------------------------------------------------------------------===//
7604 // AVX-512 Vector convert from signed/unsigned integer to float/double
7605 // and from float/double to signed/unsigned integer
7606 //===----------------------------------------------------------------------===//
7608 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7609 X86VectorVTInfo _Src, SDNode OpNode,
7610 X86FoldableSchedWrite sched,
7611 string Broadcast = _.BroadcastStr,
7612 string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
7614 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7615 (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
7616 (_.VT (OpNode (_Src.VT _Src.RC:$src)))>,
7617 EVEX, Sched<[sched]>;
7619 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7620 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
7621 (_.VT (OpNode (_Src.VT
7622 (bitconvert (_Src.LdFrag addr:$src)))))>,
7623 EVEX, Sched<[sched.Folded]>;
7625 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7626 (ins _Src.ScalarMemOp:$src), OpcodeStr,
7627 "${src}"##Broadcast, "${src}"##Broadcast,
7628 (_.VT (OpNode (_Src.VT
7629 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
7631 Sched<[sched.Folded]>;
7633 // Coversion with SAE - suppress all exceptions
7634 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7635 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7636 X86FoldableSchedWrite sched> {
7637 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7638 (ins _Src.RC:$src), OpcodeStr,
7639 "{sae}, $src", "$src, {sae}",
7640 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
7641 (i32 FROUND_NO_EXC)))>,
7642 EVEX, EVEX_B, Sched<[sched]>;
7645 // Conversion with rounding control (RC)
7646 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7647 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7648 X86FoldableSchedWrite sched> {
7649 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7650 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7651 "$rc, $src", "$src, $rc",
7652 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
7653 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7656 // Extend Float to Double
7657 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7658 X86SchedWriteWidths sched> {
7659 let Predicates = [HasAVX512] in {
7660 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
7661 fpextend, sched.ZMM>,
7662 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7663 X86vfpextRnd, sched.ZMM>, EVEX_V512;
7665 let Predicates = [HasVLX] in {
7666 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7667 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7668 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7669 sched.YMM>, EVEX_V256;
7673 // Truncate Double to Float
7674 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7675 let Predicates = [HasAVX512] in {
7676 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
7677 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7678 X86vfproundRnd, sched.ZMM>, EVEX_V512;
7680 let Predicates = [HasVLX] in {
7681 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7682 X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7683 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
7684 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7686 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7687 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7688 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7689 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7690 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7691 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7692 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7693 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7697 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7698 VEX_W, PD, EVEX_CD8<64, CD8VF>;
7699 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7700 PS, EVEX_CD8<32, CD8VH>;
7702 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7703 (VCVTPS2PDZrm addr:$src)>;
7705 let Predicates = [HasVLX] in {
7706 def : Pat<(X86vzmovl (v2f64 (bitconvert
7707 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
7708 (VCVTPD2PSZ128rr VR128X:$src)>;
7709 def : Pat<(X86vzmovl (v2f64 (bitconvert
7710 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
7711 (VCVTPD2PSZ128rm addr:$src)>;
7712 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
7713 (VCVTPS2PDZ128rm addr:$src)>;
7714 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
7715 (VCVTPS2PDZ256rm addr:$src)>;
7718 // Convert Signed/Unsigned Doubleword to Double
7719 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7720 SDNode OpNode128, X86SchedWriteWidths sched> {
7721 // No rounding in this op
7722 let Predicates = [HasAVX512] in
7723 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7724 sched.ZMM>, EVEX_V512;
7726 let Predicates = [HasVLX] in {
7727 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7728 OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
7729 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7730 sched.YMM>, EVEX_V256;
7734 // Convert Signed/Unsigned Doubleword to Float
7735 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7736 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7737 let Predicates = [HasAVX512] in
7738 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7740 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7741 OpNodeRnd, sched.ZMM>, EVEX_V512;
7743 let Predicates = [HasVLX] in {
7744 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7745 sched.XMM>, EVEX_V128;
7746 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7747 sched.YMM>, EVEX_V256;
7751 // Convert Float to Signed/Unsigned Doubleword with truncation
7752 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7753 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7754 let Predicates = [HasAVX512] in {
7755 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7757 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7758 OpNodeRnd, sched.ZMM>, EVEX_V512;
7760 let Predicates = [HasVLX] in {
7761 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7762 sched.XMM>, EVEX_V128;
7763 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7764 sched.YMM>, EVEX_V256;
7768 // Convert Float to Signed/Unsigned Doubleword
7769 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7770 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7771 let Predicates = [HasAVX512] in {
7772 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7774 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7775 OpNodeRnd, sched.ZMM>, EVEX_V512;
7777 let Predicates = [HasVLX] in {
7778 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7779 sched.XMM>, EVEX_V128;
7780 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7781 sched.YMM>, EVEX_V256;
7785 // Convert Double to Signed/Unsigned Doubleword with truncation
7786 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7787 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7788 let Predicates = [HasAVX512] in {
7789 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7791 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7792 OpNodeRnd, sched.ZMM>, EVEX_V512;
7794 let Predicates = [HasVLX] in {
7795 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7796 // memory forms of these instructions in Asm Parser. They have the same
7797 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7798 // due to the same reason.
7799 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7800 OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7801 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7802 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7804 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7805 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7806 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7807 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
7808 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7809 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7810 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7811 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
7815 // Convert Double to Signed/Unsigned Doubleword
7816 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7817 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7818 let Predicates = [HasAVX512] in {
7819 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7821 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7822 OpNodeRnd, sched.ZMM>, EVEX_V512;
7824 let Predicates = [HasVLX] in {
7825 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7826 // memory forms of these instructions in Asm Parcer. They have the same
7827 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7828 // due to the same reason.
7829 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
7830 sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7831 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7832 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7834 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7835 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7836 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7837 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7838 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7839 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7840 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7841 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7845 // Convert Double to Signed/Unsigned Quardword
7846 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7847 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7848 let Predicates = [HasDQI] in {
7849 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7851 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7852 OpNodeRnd, sched.ZMM>, EVEX_V512;
7854 let Predicates = [HasDQI, HasVLX] in {
7855 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7856 sched.XMM>, EVEX_V128;
7857 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7858 sched.YMM>, EVEX_V256;
7862 // Convert Double to Signed/Unsigned Quardword with truncation
7863 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7864 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7865 let Predicates = [HasDQI] in {
7866 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7868 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7869 OpNodeRnd, sched.ZMM>, EVEX_V512;
7871 let Predicates = [HasDQI, HasVLX] in {
7872 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7873 sched.XMM>, EVEX_V128;
7874 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7875 sched.YMM>, EVEX_V256;
7879 // Convert Signed/Unsigned Quardword to Double
7880 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7881 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7882 let Predicates = [HasDQI] in {
7883 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7885 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7886 OpNodeRnd, sched.ZMM>, EVEX_V512;
7888 let Predicates = [HasDQI, HasVLX] in {
7889 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7890 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7891 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7892 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7896 // Convert Float to Signed/Unsigned Quardword
7897 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7898 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7899 let Predicates = [HasDQI] in {
7900 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7902 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7903 OpNodeRnd, sched.ZMM>, EVEX_V512;
7905 let Predicates = [HasDQI, HasVLX] in {
7906 // Explicitly specified broadcast string, since we take only 2 elements
7907 // from v4f32x_info source
7908 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7909 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7910 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7911 sched.YMM>, EVEX_V256;
7915 // Convert Float to Signed/Unsigned Quardword with truncation
7916 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7917 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7918 let Predicates = [HasDQI] in {
7919 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7920 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7921 OpNodeRnd, sched.ZMM>, EVEX_V512;
7923 let Predicates = [HasDQI, HasVLX] in {
7924 // Explicitly specified broadcast string, since we take only 2 elements
7925 // from v4f32x_info source
7926 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7927 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7928 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7929 sched.YMM>, EVEX_V256;
7933 // Convert Signed/Unsigned Quardword to Float
7934 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7935 SDNode OpNode128, SDNode OpNodeRnd,
7936 X86SchedWriteWidths sched> {
7937 let Predicates = [HasDQI] in {
7938 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7940 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7941 OpNodeRnd, sched.ZMM>, EVEX_V512;
7943 let Predicates = [HasDQI, HasVLX] in {
7944 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7945 // memory forms of these instructions in Asm Parcer. They have the same
7946 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7947 // due to the same reason.
7948 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
7949 sched.XMM, "{1to2}", "{x}">, EVEX_V128,
7950 NotEVEX2VEXConvertible;
7951 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7952 sched.YMM, "{1to4}", "{y}">, EVEX_V256,
7953 NotEVEX2VEXConvertible;
7955 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7956 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7957 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7958 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
7959 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7960 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7961 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7962 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
7966 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7967 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
7969 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
7970 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
7971 PS, EVEX_CD8<32, CD8VF>;
7973 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
7974 X86cvttp2siRnd, SchedWriteCvtPS2DQ>,
7975 XS, EVEX_CD8<32, CD8VF>;
7977 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
7978 X86cvttp2siRnd, SchedWriteCvtPD2DQ>,
7979 PD, VEX_W, EVEX_CD8<64, CD8VF>;
7981 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
7982 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS,
7983 EVEX_CD8<32, CD8VF>;
7985 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
7986 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
7987 PS, VEX_W, EVEX_CD8<64, CD8VF>;
7989 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7990 X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
7991 EVEX_CD8<32, CD8VH>;
7993 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
7994 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
7995 EVEX_CD8<32, CD8VF>;
7997 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
7998 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
7999 EVEX_CD8<32, CD8VF>;
8001 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8002 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8003 VEX_W, EVEX_CD8<64, CD8VF>;
8005 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8006 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8007 PS, EVEX_CD8<32, CD8VF>;
8009 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8010 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8011 PS, EVEX_CD8<64, CD8VF>;
8013 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8014 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8015 PD, EVEX_CD8<64, CD8VF>;
8017 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8018 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8019 EVEX_CD8<32, CD8VH>;
8021 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8022 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8023 PD, EVEX_CD8<64, CD8VF>;
8025 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8026 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8027 EVEX_CD8<32, CD8VH>;
8029 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8030 X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W,
8031 PD, EVEX_CD8<64, CD8VF>;
8033 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8034 X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD,
8035 EVEX_CD8<32, CD8VH>;
8037 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8038 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W,
8039 PD, EVEX_CD8<64, CD8VF>;
8041 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8042 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD,
8043 EVEX_CD8<32, CD8VH>;
8045 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8046 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8047 EVEX_CD8<64, CD8VF>;
8049 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8050 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8051 EVEX_CD8<64, CD8VF>;
8053 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
8054 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8055 EVEX_CD8<64, CD8VF>;
8057 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
8058 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8059 EVEX_CD8<64, CD8VF>;
8061 let Predicates = [HasAVX512] in {
8062 def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))),
8063 (VCVTTPS2DQZrr VR512:$src)>;
8064 def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))),
8065 (VCVTTPS2DQZrm addr:$src)>;
8067 def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))),
8068 (VCVTTPS2UDQZrr VR512:$src)>;
8069 def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))),
8070 (VCVTTPS2UDQZrm addr:$src)>;
8072 def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))),
8073 (VCVTTPD2DQZrr VR512:$src)>;
8074 def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))),
8075 (VCVTTPD2DQZrm addr:$src)>;
8077 def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))),
8078 (VCVTTPD2UDQZrr VR512:$src)>;
8079 def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))),
8080 (VCVTTPD2UDQZrm addr:$src)>;
8083 let Predicates = [HasVLX] in {
8084 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))),
8085 (VCVTTPS2DQZ128rr VR128X:$src)>;
8086 def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
8087 (VCVTTPS2DQZ128rm addr:$src)>;
8089 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))),
8090 (VCVTTPS2UDQZ128rr VR128X:$src)>;
8091 def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))),
8092 (VCVTTPS2UDQZ128rm addr:$src)>;
8094 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))),
8095 (VCVTTPS2DQZ256rr VR256X:$src)>;
8096 def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
8097 (VCVTTPS2DQZ256rm addr:$src)>;
8099 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))),
8100 (VCVTTPS2UDQZ256rr VR256X:$src)>;
8101 def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))),
8102 (VCVTTPS2UDQZ256rm addr:$src)>;
8104 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))),
8105 (VCVTTPD2DQZ256rr VR256X:$src)>;
8106 def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
8107 (VCVTTPD2DQZ256rm addr:$src)>;
8109 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))),
8110 (VCVTTPD2UDQZ256rr VR256X:$src)>;
8111 def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
8112 (VCVTTPD2UDQZ256rm addr:$src)>;
8115 let Predicates = [HasDQI] in {
8116 def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))),
8117 (VCVTTPS2QQZrr VR256X:$src)>;
8118 def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))),
8119 (VCVTTPS2QQZrm addr:$src)>;
8121 def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))),
8122 (VCVTTPS2UQQZrr VR256X:$src)>;
8123 def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))),
8124 (VCVTTPS2UQQZrm addr:$src)>;
8126 def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))),
8127 (VCVTTPD2QQZrr VR512:$src)>;
8128 def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))),
8129 (VCVTTPD2QQZrm addr:$src)>;
8131 def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))),
8132 (VCVTTPD2UQQZrr VR512:$src)>;
8133 def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))),
8134 (VCVTTPD2UQQZrm addr:$src)>;
8137 let Predicates = [HasDQI, HasVLX] in {
8138 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
8139 (VCVTTPS2QQZ256rr VR128X:$src)>;
8140 def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
8141 (VCVTTPS2QQZ256rm addr:$src)>;
8143 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))),
8144 (VCVTTPS2UQQZ256rr VR128X:$src)>;
8145 def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))),
8146 (VCVTTPS2UQQZ256rm addr:$src)>;
8148 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))),
8149 (VCVTTPD2QQZ128rr VR128X:$src)>;
8150 def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))),
8151 (VCVTTPD2QQZ128rm addr:$src)>;
8153 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))),
8154 (VCVTTPD2UQQZ128rr VR128X:$src)>;
8155 def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))),
8156 (VCVTTPD2UQQZ128rm addr:$src)>;
8158 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))),
8159 (VCVTTPD2QQZ256rr VR256X:$src)>;
8160 def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))),
8161 (VCVTTPD2QQZ256rm addr:$src)>;
8163 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))),
8164 (VCVTTPD2UQQZ256rr VR256X:$src)>;
8165 def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))),
8166 (VCVTTPD2UQQZ256rm addr:$src)>;
8169 let Predicates = [HasAVX512, NoVLX] in {
8170 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
8171 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8172 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8173 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8175 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
8176 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8177 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8178 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8180 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
8181 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8182 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8183 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8185 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8186 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8187 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8188 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8190 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8191 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8192 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8193 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8195 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8196 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8197 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8198 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8200 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8201 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8202 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8203 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8206 let Predicates = [HasAVX512, HasVLX] in {
8207 def : Pat<(X86vzmovl (v2i64 (bitconvert
8208 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
8209 (VCVTPD2DQZ128rr VR128X:$src)>;
8210 def : Pat<(X86vzmovl (v2i64 (bitconvert
8211 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
8212 (VCVTPD2DQZ128rm addr:$src)>;
8213 def : Pat<(X86vzmovl (v2i64 (bitconvert
8214 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
8215 (VCVTPD2UDQZ128rr VR128X:$src)>;
8216 def : Pat<(X86vzmovl (v2i64 (bitconvert
8217 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
8218 (VCVTTPD2DQZ128rr VR128X:$src)>;
8219 def : Pat<(X86vzmovl (v2i64 (bitconvert
8220 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
8221 (VCVTTPD2DQZ128rm addr:$src)>;
8222 def : Pat<(X86vzmovl (v2i64 (bitconvert
8223 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
8224 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8226 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8227 (VCVTDQ2PDZ128rm addr:$src)>;
8228 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8229 (VCVTDQ2PDZ128rm addr:$src)>;
8231 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8232 (VCVTUDQ2PDZ128rm addr:$src)>;
8233 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8234 (VCVTUDQ2PDZ128rm addr:$src)>;
8237 let Predicates = [HasAVX512] in {
8238 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
8239 (VCVTPD2PSZrm addr:$src)>;
8240 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
8241 (VCVTPS2PDZrm addr:$src)>;
8244 let Predicates = [HasDQI, HasVLX] in {
8245 def : Pat<(X86vzmovl (v2f64 (bitconvert
8246 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
8247 (VCVTQQ2PSZ128rr VR128X:$src)>;
8248 def : Pat<(X86vzmovl (v2f64 (bitconvert
8249 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
8250 (VCVTUQQ2PSZ128rr VR128X:$src)>;
8253 let Predicates = [HasDQI, NoVLX] in {
8254 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
8255 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8256 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8257 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8259 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
8260 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8261 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8262 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8264 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
8265 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8266 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8267 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8269 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
8270 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8271 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8272 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8274 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
8275 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8276 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8277 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8279 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
8280 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8281 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8282 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8284 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8285 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8286 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8287 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8289 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8290 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8291 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8292 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8294 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8295 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8296 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8297 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8299 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8300 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8301 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8302 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8304 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8305 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8306 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8307 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8309 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8310 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8311 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8312 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8315 //===----------------------------------------------------------------------===//
8316 // Half precision conversion instructions
8317 //===----------------------------------------------------------------------===//
8319 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8320 X86MemOperand x86memop, PatFrag ld_frag,
8321 X86FoldableSchedWrite sched> {
8322 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8323 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8324 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8325 T8PD, Sched<[sched]>;
8326 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8327 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8328 (X86cvtph2ps (_src.VT
8330 (ld_frag addr:$src))))>,
8331 T8PD, Sched<[sched.Folded]>;
8334 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8335 X86FoldableSchedWrite sched> {
8336 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8337 (ins _src.RC:$src), "vcvtph2ps",
8338 "{sae}, $src", "$src, {sae}",
8339 (X86cvtph2psRnd (_src.VT _src.RC:$src),
8340 (i32 FROUND_NO_EXC))>,
8341 T8PD, EVEX_B, Sched<[sched]>;
8344 let Predicates = [HasAVX512] in
8345 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
8347 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8348 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8350 let Predicates = [HasVLX] in {
8351 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8352 loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8353 EVEX_CD8<32, CD8VH>;
8354 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8355 loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
8356 EVEX_CD8<32, CD8VH>;
8358 // Pattern match vcvtph2ps of a scalar i64 load.
8359 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
8360 (VCVTPH2PSZ128rm addr:$src)>;
8361 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
8362 (VCVTPH2PSZ128rm addr:$src)>;
8363 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8364 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8365 (VCVTPH2PSZ128rm addr:$src)>;
8368 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8369 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8370 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
8371 (ins _src.RC:$src1, i32u8imm:$src2),
8372 "vcvtps2ph", "$src2, $src1", "$src1, $src2",
8373 (X86cvtps2ph (_src.VT _src.RC:$src1),
8374 (i32 imm:$src2)), 0, 0>,
8375 AVX512AIi8Base, Sched<[RR]>;
8376 let hasSideEffects = 0, mayStore = 1 in {
8377 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8378 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8379 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8381 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8382 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8383 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8384 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8388 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8390 let hasSideEffects = 0 in
8391 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8392 (outs _dest.RC:$dst),
8393 (ins _src.RC:$src1, i32u8imm:$src2),
8394 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8395 EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8398 let Predicates = [HasAVX512] in {
8399 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8400 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8401 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8402 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8403 let Predicates = [HasVLX] in {
8404 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8405 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8406 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8407 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8408 WriteCvtPS2PH, WriteCvtPS2PHSt>,
8409 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8412 def : Pat<(store (f64 (extractelt
8413 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8414 (iPTR 0))), addr:$dst),
8415 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8416 def : Pat<(store (i64 (extractelt
8417 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8418 (iPTR 0))), addr:$dst),
8419 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8420 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
8421 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
8422 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
8423 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
8426 // Patterns for matching conversions from float to half-float and vice versa.
8427 let Predicates = [HasVLX] in {
8428 // Use MXCSR.RC for rounding instead of explicitly specifying the default
8429 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8430 // configurations we support (the default). However, falling back to MXCSR is
8431 // more consistent with other instructions, which are always controlled by it.
8432 // It's encoded as 0b100.
8433 def : Pat<(fp_to_f16 FR32X:$src),
8434 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8435 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8437 def : Pat<(f16_to_fp GR16:$src),
8438 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8439 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8441 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8442 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8443 (v8i16 (VCVTPS2PHZ128rr
8444 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8447 // Unordered/Ordered scalar fp compare with Sea and set EFLAGS
8448 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8449 string OpcodeStr, X86FoldableSchedWrite sched> {
8450 let hasSideEffects = 0 in
8451 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8452 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8453 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8456 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8457 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8458 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8459 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8460 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8461 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8462 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8463 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8464 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8467 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8468 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8469 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8470 EVEX_CD8<32, CD8VT1>;
8471 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8472 "ucomisd", WriteFCom>, PD, EVEX,
8473 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8474 let Pattern = []<dag> in {
8475 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8476 "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8477 EVEX_CD8<32, CD8VT1>;
8478 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8479 "comisd", WriteFCom>, PD, EVEX,
8480 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8482 let isCodeGenOnly = 1 in {
8483 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8484 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8485 EVEX_CD8<32, CD8VT1>;
8486 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8487 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8488 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8490 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8491 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8492 EVEX_CD8<32, CD8VT1>;
8493 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8494 sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8495 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8499 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8500 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8501 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8502 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8503 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8504 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8505 "$src2, $src1", "$src1, $src2",
8506 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8507 EVEX_4V, Sched<[sched]>;
8508 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8509 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8510 "$src2, $src1", "$src1, $src2",
8511 (OpNode (_.VT _.RC:$src1),
8512 _.ScalarIntMemCPat:$src2)>, EVEX_4V,
8513 Sched<[sched.Folded, sched.ReadAfterFold]>;
8517 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8518 f32x_info>, EVEX_CD8<32, CD8VT1>,
8520 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8521 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8523 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8524 SchedWriteFRsqrt.Scl, f32x_info>,
8525 EVEX_CD8<32, CD8VT1>, T8PD;
8526 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8527 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8528 EVEX_CD8<64, CD8VT1>, T8PD;
8530 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8531 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8532 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8533 let ExeDomain = _.ExeDomain in {
8534 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8535 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8536 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8538 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8539 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8541 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8542 Sched<[sched.Folded, sched.ReadAfterFold]>;
8543 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8544 (ins _.ScalarMemOp:$src), OpcodeStr,
8545 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8547 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8548 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8552 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8553 X86SchedWriteWidths sched> {
8554 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8555 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8556 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8557 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8559 // Define only if AVX512VL feature is present.
8560 let Predicates = [HasVLX] in {
8561 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8562 OpNode, sched.XMM, v4f32x_info>,
8563 EVEX_V128, EVEX_CD8<32, CD8VF>;
8564 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8565 OpNode, sched.YMM, v8f32x_info>,
8566 EVEX_V256, EVEX_CD8<32, CD8VF>;
8567 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8568 OpNode, sched.XMM, v2f64x_info>,
8569 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8570 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8571 OpNode, sched.YMM, v4f64x_info>,
8572 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8576 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8577 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8579 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8580 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8581 SDNode OpNode, X86FoldableSchedWrite sched> {
8582 let ExeDomain = _.ExeDomain in {
8583 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8584 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8585 "$src2, $src1", "$src1, $src2",
8586 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8587 (i32 FROUND_CURRENT))>,
8590 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8591 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8592 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8593 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8594 (i32 FROUND_NO_EXC))>, EVEX_B,
8597 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8598 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8599 "$src2, $src1", "$src1, $src2",
8600 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
8601 (i32 FROUND_CURRENT))>,
8602 Sched<[sched.Folded, sched.ReadAfterFold]>;
8606 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8607 X86FoldableSchedWrite sched> {
8608 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>,
8609 EVEX_CD8<32, CD8VT1>;
8610 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>,
8611 EVEX_CD8<64, CD8VT1>, VEX_W;
8614 let Predicates = [HasERI] in {
8615 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
8617 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
8618 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8621 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
8622 SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8623 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8625 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8626 SDNode OpNode, X86FoldableSchedWrite sched> {
8627 let ExeDomain = _.ExeDomain in {
8628 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8629 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8630 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>,
8633 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8634 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8636 (bitconvert (_.LdFrag addr:$src))),
8637 (i32 FROUND_CURRENT))>,
8638 Sched<[sched.Folded, sched.ReadAfterFold]>;
8640 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8641 (ins _.ScalarMemOp:$src), OpcodeStr,
8642 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8644 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
8645 (i32 FROUND_CURRENT))>, EVEX_B,
8646 Sched<[sched.Folded, sched.ReadAfterFold]>;
8649 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8650 SDNode OpNode, X86FoldableSchedWrite sched> {
8651 let ExeDomain = _.ExeDomain in
8652 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8653 (ins _.RC:$src), OpcodeStr,
8654 "{sae}, $src", "$src, {sae}",
8655 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>,
8656 EVEX_B, Sched<[sched]>;
8659 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8660 X86SchedWriteWidths sched> {
8661 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8662 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8663 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8664 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8665 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8666 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8669 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8670 SDNode OpNode, X86SchedWriteWidths sched> {
8671 // Define only if AVX512VL feature is present.
8672 let Predicates = [HasVLX] in {
8673 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
8674 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8675 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
8676 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8677 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
8678 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8679 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
8680 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8684 let Predicates = [HasERI] in {
8685 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
8686 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
8687 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
8689 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
8690 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
8691 SchedWriteFRnd>, EVEX;
8693 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8694 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8695 let ExeDomain = _.ExeDomain in
8696 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8697 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8698 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>,
8699 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8702 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8703 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8704 let ExeDomain = _.ExeDomain in {
8705 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8706 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8707 (_.VT (fsqrt _.RC:$src))>, EVEX,
8709 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8710 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8712 (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8713 Sched<[sched.Folded, sched.ReadAfterFold]>;
8714 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8715 (ins _.ScalarMemOp:$src), OpcodeStr,
8716 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8718 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8719 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8723 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8724 X86SchedWriteSizes sched> {
8725 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8726 sched.PS.ZMM, v16f32_info>,
8727 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8728 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8729 sched.PD.ZMM, v8f64_info>,
8730 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8731 // Define only if AVX512VL feature is present.
8732 let Predicates = [HasVLX] in {
8733 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8734 sched.PS.XMM, v4f32x_info>,
8735 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8736 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8737 sched.PS.YMM, v8f32x_info>,
8738 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8739 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8740 sched.PD.XMM, v2f64x_info>,
8741 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8742 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8743 sched.PD.YMM, v4f64x_info>,
8744 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8748 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8749 X86SchedWriteSizes sched> {
8750 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8751 sched.PS.ZMM, v16f32_info>,
8752 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8753 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8754 sched.PD.ZMM, v8f64_info>,
8755 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8758 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8759 X86VectorVTInfo _, string Name> {
8760 let ExeDomain = _.ExeDomain in {
8761 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8762 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8763 "$src2, $src1", "$src1, $src2",
8764 (X86fsqrtRnds (_.VT _.RC:$src1),
8766 (i32 FROUND_CURRENT))>,
8768 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8769 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8770 "$src2, $src1", "$src1, $src2",
8771 (X86fsqrtRnds (_.VT _.RC:$src1),
8772 _.ScalarIntMemCPat:$src2,
8773 (i32 FROUND_CURRENT))>,
8774 Sched<[sched.Folded, sched.ReadAfterFold]>;
8775 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8776 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8777 "$rc, $src2, $src1", "$src1, $src2, $rc",
8778 (X86fsqrtRnds (_.VT _.RC:$src1),
8781 EVEX_B, EVEX_RC, Sched<[sched]>;
8783 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8784 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8785 (ins _.FRC:$src1, _.FRC:$src2),
8786 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8789 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8790 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8791 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8792 Sched<[sched.Folded, sched.ReadAfterFold]>;
8796 let Predicates = [HasAVX512] in {
8797 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8798 (!cast<Instruction>(Name#Zr)
8799 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8802 let Predicates = [HasAVX512, OptForSize] in {
8803 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8804 (!cast<Instruction>(Name#Zm)
8805 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8809 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8810 X86SchedWriteSizes sched> {
8811 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8812 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8813 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
8814 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
8817 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
8818 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
8820 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
8822 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8823 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8824 let ExeDomain = _.ExeDomain in {
8825 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8826 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8827 "$src3, $src2, $src1", "$src1, $src2, $src3",
8828 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8832 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8833 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8834 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
8835 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8836 (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B,
8839 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8840 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
8842 "$src3, $src2, $src1", "$src1, $src2, $src3",
8843 (_.VT (X86RndScales _.RC:$src1,
8844 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
8845 Sched<[sched.Folded, sched.ReadAfterFold]>;
8847 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
8848 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8849 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8850 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8851 []>, Sched<[sched]>;
8854 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8855 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8856 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8857 []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
8861 let Predicates = [HasAVX512] in {
8862 def : Pat<(ffloor _.FRC:$src),
8863 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8864 _.FRC:$src, (i32 0x9)))>;
8865 def : Pat<(fceil _.FRC:$src),
8866 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8867 _.FRC:$src, (i32 0xa)))>;
8868 def : Pat<(ftrunc _.FRC:$src),
8869 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8870 _.FRC:$src, (i32 0xb)))>;
8871 def : Pat<(frint _.FRC:$src),
8872 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8873 _.FRC:$src, (i32 0x4)))>;
8874 def : Pat<(fnearbyint _.FRC:$src),
8875 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8876 _.FRC:$src, (i32 0xc)))>;
8879 let Predicates = [HasAVX512, OptForSize] in {
8880 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8881 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8882 addr:$src, (i32 0x9)))>;
8883 def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8884 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8885 addr:$src, (i32 0xa)))>;
8886 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8887 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8888 addr:$src, (i32 0xb)))>;
8889 def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8890 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8891 addr:$src, (i32 0x4)))>;
8892 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8893 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8894 addr:$src, (i32 0xc)))>;
8898 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
8899 SchedWriteFRnd.Scl, f32x_info>,
8900 AVX512AIi8Base, EVEX_4V,
8901 EVEX_CD8<32, CD8VT1>;
8903 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
8904 SchedWriteFRnd.Scl, f64x_info>,
8905 VEX_W, AVX512AIi8Base, EVEX_4V,
8906 EVEX_CD8<64, CD8VT1>;
8908 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
8909 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
8910 dag OutMask, Predicate BasePredicate> {
8911 let Predicates = [BasePredicate] in {
8912 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8913 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8914 (extractelt _.VT:$dst, (iPTR 0))))),
8915 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
8916 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
8918 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8919 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8921 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
8922 OutMask, _.VT:$src2, _.VT:$src1)>;
8926 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
8927 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
8928 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8929 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
8930 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
8931 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8933 multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
8934 X86VectorVTInfo _, PatLeaf ZeroFP,
8935 bits<8> ImmV, Predicate BasePredicate> {
8936 let Predicates = [BasePredicate] in {
8937 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
8938 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8939 (extractelt _.VT:$dst, (iPTR 0))))),
8940 (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
8941 _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8943 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
8944 (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
8945 (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
8946 VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8950 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
8951 v4f32x_info, fp32imm0, 0x01, HasAVX512>;
8952 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
8953 v4f32x_info, fp32imm0, 0x02, HasAVX512>;
8954 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
8955 v2f64x_info, fp64imm0, 0x01, HasAVX512>;
8956 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
8957 v2f64x_info, fp64imm0, 0x02, HasAVX512>;
8960 //-------------------------------------------------
8961 // Integer truncate and extend operations
8962 //-------------------------------------------------
8964 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
8965 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
8966 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
8967 let ExeDomain = DestInfo.ExeDomain in
8968 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8969 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
8970 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
8971 EVEX, T8XS, Sched<[sched]>;
8973 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
8974 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8975 (ins x86memop:$dst, SrcInfo.RC:$src),
8976 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
8977 EVEX, Sched<[sched.Folded]>;
8979 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8980 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
8981 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
8982 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
8983 }//mayStore = 1, hasSideEffects = 0
8986 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8987 X86VectorVTInfo DestInfo,
8988 PatFrag truncFrag, PatFrag mtruncFrag,
8991 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8992 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
8993 addr:$dst, SrcInfo.RC:$src)>;
8995 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
8996 SrcInfo.KRCWM:$mask),
8997 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
8998 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9001 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9002 SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched,
9003 AVX512VLVectorVTInfo VTSrcInfo,
9004 X86VectorVTInfo DestInfoZ128,
9005 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9006 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9007 X86MemOperand x86memopZ, PatFrag truncFrag,
9008 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9010 let Predicates = [HasVLX, prd] in {
9011 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, sched,
9012 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9013 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9014 truncFrag, mtruncFrag, NAME>, EVEX_V128;
9016 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, sched,
9017 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9018 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9019 truncFrag, mtruncFrag, NAME>, EVEX_V256;
9021 let Predicates = [prd] in
9022 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, sched,
9023 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9024 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9025 truncFrag, mtruncFrag, NAME>, EVEX_V512;
9028 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9029 X86FoldableSchedWrite sched, PatFrag StoreNode,
9030 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9031 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched,
9032 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9033 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9034 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9037 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9038 X86FoldableSchedWrite sched, PatFrag StoreNode,
9039 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9040 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
9041 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9042 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9043 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9046 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9047 X86FoldableSchedWrite sched, PatFrag StoreNode,
9048 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9049 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9050 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9051 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9052 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9055 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9056 X86FoldableSchedWrite sched, PatFrag StoreNode,
9057 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9058 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
9059 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9060 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9061 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9064 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9065 X86FoldableSchedWrite sched, PatFrag StoreNode,
9066 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9067 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9068 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9069 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9070 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9073 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9074 X86FoldableSchedWrite sched, PatFrag StoreNode,
9075 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9076 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9077 sched, avx512vl_i16_info, v16i8x_info, v16i8x_info,
9078 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9079 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9082 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, WriteShuffle256,
9083 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9084 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, WriteShuffle256,
9085 truncstore_s_vi8, masked_truncstore_s_vi8>;
9086 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256,
9087 truncstore_us_vi8, masked_truncstore_us_vi8>;
9089 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, WriteShuffle256,
9090 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9091 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, WriteShuffle256,
9092 truncstore_s_vi16, masked_truncstore_s_vi16>;
9093 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256,
9094 truncstore_us_vi16, masked_truncstore_us_vi16>;
9096 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, WriteShuffle256,
9097 truncstorevi32, masked_truncstorevi32, X86vtrunc>;
9098 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, WriteShuffle256,
9099 truncstore_s_vi32, masked_truncstore_s_vi32>;
9100 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256,
9101 truncstore_us_vi32, masked_truncstore_us_vi32>;
9103 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256,
9104 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9105 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, WriteShuffle256,
9106 truncstore_s_vi8, masked_truncstore_s_vi8>;
9107 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, WriteShuffle256,
9108 truncstore_us_vi8, masked_truncstore_us_vi8>;
9110 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256,
9111 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9112 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, WriteShuffle256,
9113 truncstore_s_vi16, masked_truncstore_s_vi16>;
9114 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, WriteShuffle256,
9115 truncstore_us_vi16, masked_truncstore_us_vi16>;
9117 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256,
9118 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9119 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, WriteShuffle256,
9120 truncstore_s_vi8, masked_truncstore_s_vi8>;
9121 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, WriteShuffle256,
9122 truncstore_us_vi8, masked_truncstore_us_vi8>;
9124 let Predicates = [HasAVX512, NoVLX] in {
9125 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9126 (v8i16 (EXTRACT_SUBREG
9127 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9128 VR256X:$src, sub_ymm)))), sub_xmm))>;
9129 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9130 (v4i32 (EXTRACT_SUBREG
9131 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9132 VR256X:$src, sub_ymm)))), sub_xmm))>;
9135 let Predicates = [HasBWI, NoVLX] in {
9136 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9137 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9138 VR256X:$src, sub_ymm))), sub_xmm))>;
9141 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9142 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9143 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9144 let ExeDomain = DestInfo.ExeDomain in {
9145 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9146 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9147 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9148 EVEX, Sched<[sched]>;
9150 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9151 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9152 (DestInfo.VT (LdFrag addr:$src))>,
9153 EVEX, Sched<[sched.Folded]>;
9157 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9158 SDNode OpNode, SDNode InVecNode, string ExtTy,
9159 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9160 let Predicates = [HasVLX, HasBWI] in {
9161 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9162 v16i8x_info, i64mem, LdFrag, InVecNode>,
9163 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9165 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9166 v16i8x_info, i128mem, LdFrag, OpNode>,
9167 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9169 let Predicates = [HasBWI] in {
9170 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9171 v32i8x_info, i256mem, LdFrag, OpNode>,
9172 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9176 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9177 SDNode OpNode, SDNode InVecNode, string ExtTy,
9178 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9179 let Predicates = [HasVLX, HasAVX512] in {
9180 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9181 v16i8x_info, i32mem, LdFrag, InVecNode>,
9182 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9184 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9185 v16i8x_info, i64mem, LdFrag, OpNode>,
9186 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9188 let Predicates = [HasAVX512] in {
9189 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9190 v16i8x_info, i128mem, LdFrag, OpNode>,
9191 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9195 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9196 SDNode OpNode, SDNode InVecNode, string ExtTy,
9197 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9198 let Predicates = [HasVLX, HasAVX512] in {
9199 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9200 v16i8x_info, i16mem, LdFrag, InVecNode>,
9201 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9203 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9204 v16i8x_info, i32mem, LdFrag, OpNode>,
9205 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9207 let Predicates = [HasAVX512] in {
9208 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9209 v16i8x_info, i64mem, LdFrag, OpNode>,
9210 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9214 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9215 SDNode OpNode, SDNode InVecNode, string ExtTy,
9216 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9217 let Predicates = [HasVLX, HasAVX512] in {
9218 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9219 v8i16x_info, i64mem, LdFrag, InVecNode>,
9220 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9222 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9223 v8i16x_info, i128mem, LdFrag, OpNode>,
9224 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9226 let Predicates = [HasAVX512] in {
9227 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9228 v16i16x_info, i256mem, LdFrag, OpNode>,
9229 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9233 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9234 SDNode OpNode, SDNode InVecNode, string ExtTy,
9235 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9236 let Predicates = [HasVLX, HasAVX512] in {
9237 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9238 v8i16x_info, i32mem, LdFrag, InVecNode>,
9239 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9241 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9242 v8i16x_info, i64mem, LdFrag, OpNode>,
9243 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9245 let Predicates = [HasAVX512] in {
9246 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9247 v8i16x_info, i128mem, LdFrag, OpNode>,
9248 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9252 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9253 SDNode OpNode, SDNode InVecNode, string ExtTy,
9254 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9256 let Predicates = [HasVLX, HasAVX512] in {
9257 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9258 v4i32x_info, i64mem, LdFrag, InVecNode>,
9259 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9261 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9262 v4i32x_info, i128mem, LdFrag, OpNode>,
9263 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9265 let Predicates = [HasAVX512] in {
9266 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9267 v8i32x_info, i256mem, LdFrag, OpNode>,
9268 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9272 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>;
9273 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>;
9274 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>;
9275 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>;
9276 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>;
9277 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>;
9279 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>;
9280 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>;
9281 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>;
9282 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>;
9283 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>;
9284 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>;
9287 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9290 let Predicates = [HasVLX, HasBWI] in {
9291 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9292 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9293 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9294 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9295 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9296 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9297 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9298 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9299 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9300 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9302 let Predicates = [HasVLX] in {
9303 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9304 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9305 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9306 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9307 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9308 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9309 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9310 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9312 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9313 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9314 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9315 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9316 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9317 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9318 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9319 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9321 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9322 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9323 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9324 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9325 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9326 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9327 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9328 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9329 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9330 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9332 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9333 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9334 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
9335 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9336 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9337 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9338 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9339 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9341 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9342 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9343 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9344 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9345 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9346 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9347 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
9348 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9349 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
9350 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9353 let Predicates = [HasVLX, HasBWI] in {
9354 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9355 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9356 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9357 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9358 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9359 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9361 let Predicates = [HasVLX] in {
9362 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9363 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9364 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9365 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9366 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9367 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9368 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9369 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9371 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9372 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9373 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9374 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9375 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9376 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9377 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9378 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9380 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9381 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9382 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9383 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9384 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9385 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9387 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9388 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9389 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9390 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9391 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9392 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9393 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9394 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9396 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
9397 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9398 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9399 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9400 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
9401 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9404 let Predicates = [HasBWI] in {
9405 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
9406 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9408 let Predicates = [HasAVX512] in {
9409 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9410 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9412 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9413 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9414 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9415 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9417 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
9418 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9420 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9421 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9423 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
9424 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9428 defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>;
9429 defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>;
9431 //===----------------------------------------------------------------------===//
9432 // GATHER - SCATTER Operations
9434 // FIXME: Improve scheduling of gather/scatter instructions.
9435 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9436 X86MemOperand memop, PatFrag GatherNode,
9437 RegisterClass MaskRC = _.KRCWM> {
9438 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9439 ExeDomain = _.ExeDomain in
9440 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9441 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9442 !strconcat(OpcodeStr#_.Suffix,
9443 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9444 [(set _.RC:$dst, MaskRC:$mask_wb,
9445 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
9446 vectoraddr:$src2))]>, EVEX, EVEX_K,
9447 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9450 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9451 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9452 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9453 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9454 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9455 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
9456 let Predicates = [HasVLX] in {
9457 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9458 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9459 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9460 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9461 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9462 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9463 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9464 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9468 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9469 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9470 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9471 mgatherv16i32>, EVEX_V512;
9472 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9473 mgatherv8i64>, EVEX_V512;
9474 let Predicates = [HasVLX] in {
9475 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9476 vy256xmem, mgatherv8i32>, EVEX_V256;
9477 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9478 vy128xmem, mgatherv4i64>, EVEX_V256;
9479 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9480 vx128xmem, mgatherv4i32>, EVEX_V128;
9481 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9482 vx64xmem, mgatherv2i64, VK2WM>,
9488 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9489 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9491 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9492 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9494 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9495 X86MemOperand memop, PatFrag ScatterNode,
9496 RegisterClass MaskRC = _.KRCWM> {
9498 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9500 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9501 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9502 !strconcat(OpcodeStr#_.Suffix,
9503 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9504 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9505 MaskRC:$mask, vectoraddr:$dst))]>,
9506 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9507 Sched<[WriteStore]>;
9510 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9511 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9512 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9513 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9514 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9515 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
9516 let Predicates = [HasVLX] in {
9517 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9518 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9519 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9520 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9521 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9522 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9523 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9524 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9528 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9529 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9530 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9531 mscatterv16i32>, EVEX_V512;
9532 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9533 mscatterv8i64>, EVEX_V512;
9534 let Predicates = [HasVLX] in {
9535 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9536 vy256xmem, mscatterv8i32>, EVEX_V256;
9537 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9538 vy128xmem, mscatterv4i64>, EVEX_V256;
9539 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9540 vx128xmem, mscatterv4i32>, EVEX_V128;
9541 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9542 vx64xmem, mscatterv2i64, VK2WM>,
9547 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9548 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9550 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9551 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9554 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9555 RegisterClass KRC, X86MemOperand memop> {
9556 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9557 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9558 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9559 EVEX, EVEX_K, Sched<[WriteLoad]>;
9562 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9563 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9565 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9566 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9568 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9569 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9571 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9572 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9574 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9575 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9577 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9578 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9580 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9581 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9583 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9584 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9586 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9587 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9589 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9590 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9592 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9593 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9595 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9596 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9598 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9599 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9601 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9602 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9604 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9605 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9607 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9608 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9610 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9611 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9612 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9613 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9614 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9617 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9618 string OpcodeStr, Predicate prd> {
9619 let Predicates = [prd] in
9620 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9622 let Predicates = [prd, HasVLX] in {
9623 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9624 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9628 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9629 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9630 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9631 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9633 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9634 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9635 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9636 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9637 EVEX, Sched<[WriteMove]>;
9640 // Use 512bit version to implement 128/256 bit in case NoVLX.
9641 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9645 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9646 (_.KVT (COPY_TO_REGCLASS
9647 (!cast<Instruction>(Name#"Zrr")
9648 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9649 _.RC:$src, _.SubRegIdx)),
9653 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9654 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9655 let Predicates = [prd] in
9656 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9659 let Predicates = [prd, HasVLX] in {
9660 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9662 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9665 let Predicates = [prd, NoVLX] in {
9666 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9667 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9671 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9672 avx512vl_i8_info, HasBWI>;
9673 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9674 avx512vl_i16_info, HasBWI>, VEX_W;
9675 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9676 avx512vl_i32_info, HasDQI>;
9677 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9678 avx512vl_i64_info, HasDQI>, VEX_W;
9680 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9681 // is available, but BWI is not. We can't handle this in lowering because
9682 // a target independent DAG combine likes to combine sext and trunc.
9683 let Predicates = [HasDQI, NoBWI] in {
9684 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9685 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9686 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9687 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9690 let Predicates = [HasDQI, NoBWI, HasVLX] in {
9691 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9692 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9695 //===----------------------------------------------------------------------===//
9696 // AVX-512 - COMPRESS and EXPAND
9699 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9700 string OpcodeStr, X86FoldableSchedWrite sched> {
9701 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9702 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9703 (_.VT (X86compress _.RC:$src1))>, AVX5128IBase,
9706 let mayStore = 1, hasSideEffects = 0 in
9707 def mr : AVX5128I<opc, MRMDestMem, (outs),
9708 (ins _.MemOp:$dst, _.RC:$src),
9709 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9710 []>, EVEX_CD8<_.EltSize, CD8VT1>,
9711 Sched<[sched.Folded]>;
9713 def mrk : AVX5128I<opc, MRMDestMem, (outs),
9714 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9715 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9717 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9718 Sched<[sched.Folded]>;
9721 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9722 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9723 (!cast<Instruction>(Name#_.ZSuffix##mrk)
9724 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9727 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9728 X86FoldableSchedWrite sched,
9729 AVX512VLVectorVTInfo VTInfo,
9730 Predicate Pred = HasAVX512> {
9731 let Predicates = [Pred] in
9732 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9733 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9735 let Predicates = [Pred, HasVLX] in {
9736 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9737 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9738 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9739 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9743 // FIXME: Is there a better scheduler class for VPCOMPRESS?
9744 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9745 avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9746 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9747 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9748 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9749 avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9750 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9751 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9754 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9755 string OpcodeStr, X86FoldableSchedWrite sched> {
9756 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9757 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9758 (_.VT (X86expand _.RC:$src1))>, AVX5128IBase,
9761 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9762 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9763 (_.VT (X86expand (_.VT (bitconvert
9764 (_.LdFrag addr:$src1)))))>,
9765 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9766 Sched<[sched.Folded, sched.ReadAfterFold]>;
9769 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9771 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9772 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9773 _.KRCWM:$mask, addr:$src)>;
9775 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
9776 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9777 _.KRCWM:$mask, addr:$src)>;
9779 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9780 (_.VT _.RC:$src0))),
9781 (!cast<Instruction>(Name#_.ZSuffix##rmk)
9782 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9785 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
9786 X86FoldableSchedWrite sched,
9787 AVX512VLVectorVTInfo VTInfo,
9788 Predicate Pred = HasAVX512> {
9789 let Predicates = [Pred] in
9790 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
9791 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9793 let Predicates = [Pred, HasVLX] in {
9794 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
9795 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9796 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
9797 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9801 // FIXME: Is there a better scheduler class for VPEXPAND?
9802 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
9803 avx512vl_i32_info>, EVEX;
9804 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
9805 avx512vl_i64_info>, EVEX, VEX_W;
9806 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
9807 avx512vl_f32_info>, EVEX;
9808 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
9809 avx512vl_f64_info>, EVEX, VEX_W;
9811 //handle instruction reg_vec1 = op(reg_vec,imm)
9813 // op(broadcast(eltVt),imm)
9814 //all instruction created with FROUND_CURRENT
9815 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9816 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9817 let ExeDomain = _.ExeDomain in {
9818 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9819 (ins _.RC:$src1, i32u8imm:$src2),
9820 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9821 (OpNode (_.VT _.RC:$src1),
9822 (i32 imm:$src2))>, Sched<[sched]>;
9823 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9824 (ins _.MemOp:$src1, i32u8imm:$src2),
9825 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9826 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
9828 Sched<[sched.Folded, sched.ReadAfterFold]>;
9829 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9830 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
9831 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
9832 "${src1}"##_.BroadcastStr##", $src2",
9833 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
9834 (i32 imm:$src2))>, EVEX_B,
9835 Sched<[sched.Folded, sched.ReadAfterFold]>;
9839 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9840 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9841 SDNode OpNode, X86FoldableSchedWrite sched,
9842 X86VectorVTInfo _> {
9843 let ExeDomain = _.ExeDomain in
9844 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9845 (ins _.RC:$src1, i32u8imm:$src2),
9846 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
9847 "$src1, {sae}, $src2",
9848 (OpNode (_.VT _.RC:$src1),
9850 (i32 FROUND_NO_EXC))>,
9851 EVEX_B, Sched<[sched]>;
9854 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
9855 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9856 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
9857 let Predicates = [prd] in {
9858 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
9860 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
9861 sched.ZMM, _.info512>, EVEX_V512;
9863 let Predicates = [prd, HasVLX] in {
9864 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
9865 _.info128>, EVEX_V128;
9866 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
9867 _.info256>, EVEX_V256;
9871 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9872 // op(reg_vec2,mem_vec,imm)
9873 // op(reg_vec2,broadcast(eltVt),imm)
9874 //all instruction created with FROUND_CURRENT
9875 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9876 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9877 let ExeDomain = _.ExeDomain in {
9878 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9879 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9880 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9881 (OpNode (_.VT _.RC:$src1),
9885 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9886 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
9887 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9888 (OpNode (_.VT _.RC:$src1),
9889 (_.VT (bitconvert (_.LdFrag addr:$src2))),
9891 Sched<[sched.Folded, sched.ReadAfterFold]>;
9892 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9893 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9894 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9895 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9896 (OpNode (_.VT _.RC:$src1),
9897 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9898 (i32 imm:$src3))>, EVEX_B,
9899 Sched<[sched.Folded, sched.ReadAfterFold]>;
9903 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9904 // op(reg_vec2,mem_vec,imm)
9905 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9906 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
9907 X86VectorVTInfo SrcInfo>{
9908 let ExeDomain = DestInfo.ExeDomain in {
9909 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9910 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9911 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9912 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9913 (SrcInfo.VT SrcInfo.RC:$src2),
9916 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9917 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9918 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9919 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9920 (SrcInfo.VT (bitconvert
9921 (SrcInfo.LdFrag addr:$src2))),
9923 Sched<[sched.Folded, sched.ReadAfterFold]>;
9927 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9928 // op(reg_vec2,mem_vec,imm)
9929 // op(reg_vec2,broadcast(eltVt),imm)
9930 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9931 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
9932 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
9934 let ExeDomain = _.ExeDomain in
9935 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9936 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9937 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9938 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9939 (OpNode (_.VT _.RC:$src1),
9940 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9941 (i8 imm:$src3))>, EVEX_B,
9942 Sched<[sched.Folded, sched.ReadAfterFold]>;
9945 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9946 // op(reg_vec2,mem_scalar,imm)
9947 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9948 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9949 let ExeDomain = _.ExeDomain in {
9950 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9951 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9952 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9953 (OpNode (_.VT _.RC:$src1),
9957 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9958 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9959 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9960 (OpNode (_.VT _.RC:$src1),
9961 (_.VT (scalar_to_vector
9962 (_.ScalarLdFrag addr:$src2))),
9964 Sched<[sched.Folded, sched.ReadAfterFold]>;
9968 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9969 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9970 SDNode OpNode, X86FoldableSchedWrite sched,
9971 X86VectorVTInfo _> {
9972 let ExeDomain = _.ExeDomain in
9973 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9974 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9975 OpcodeStr, "$src3, {sae}, $src2, $src1",
9976 "$src1, $src2, {sae}, $src3",
9977 (OpNode (_.VT _.RC:$src1),
9980 (i32 FROUND_NO_EXC))>,
9981 EVEX_B, Sched<[sched]>;
9984 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9985 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9986 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9987 let ExeDomain = _.ExeDomain in
9988 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9989 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9990 OpcodeStr, "$src3, {sae}, $src2, $src1",
9991 "$src1, $src2, {sae}, $src3",
9992 (OpNode (_.VT _.RC:$src1),
9995 (i32 FROUND_NO_EXC))>,
9996 EVEX_B, Sched<[sched]>;
9999 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10000 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10001 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10002 let Predicates = [prd] in {
10003 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10004 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
10008 let Predicates = [prd, HasVLX] in {
10009 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10011 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10016 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10017 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10018 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10019 let Predicates = [Pred] in {
10020 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10021 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10023 let Predicates = [Pred, HasVLX] in {
10024 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10025 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10026 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10027 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10031 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10032 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10033 Predicate Pred = HasAVX512> {
10034 let Predicates = [Pred] in {
10035 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10038 let Predicates = [Pred, HasVLX] in {
10039 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10041 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10046 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10047 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10048 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
10049 let Predicates = [prd] in {
10050 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10051 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
10055 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10056 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10057 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10058 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10059 opcPs, OpNode, OpNodeRnd, sched, prd>,
10060 EVEX_CD8<32, CD8VF>;
10061 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10062 opcPd, OpNode, OpNodeRnd, sched, prd>,
10063 EVEX_CD8<64, CD8VF>, VEX_W;
10066 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10067 X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
10068 AVX512AIi8Base, EVEX;
10069 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10070 X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
10071 AVX512AIi8Base, EVEX;
10072 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10073 X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
10074 AVX512AIi8Base, EVEX;
10076 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10077 0x50, X86VRange, X86VRangeRnd,
10078 SchedWriteFAdd, HasDQI>,
10079 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10080 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10081 0x50, X86VRange, X86VRangeRnd,
10082 SchedWriteFAdd, HasDQI>,
10083 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10085 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10086 f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10087 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10088 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10089 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10090 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10092 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10093 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10094 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10095 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10096 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10097 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10099 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10100 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10101 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10102 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10103 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10104 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10107 multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> {
10109 def : Pat<(_.VT (ffloor _.RC:$src)),
10110 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10111 _.RC:$src, (i32 0x9))>;
10112 def : Pat<(_.VT (fnearbyint _.RC:$src)),
10113 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10114 _.RC:$src, (i32 0xC))>;
10115 def : Pat<(_.VT (fceil _.RC:$src)),
10116 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10117 _.RC:$src, (i32 0xA))>;
10118 def : Pat<(_.VT (frint _.RC:$src)),
10119 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10120 _.RC:$src, (i32 0x4))>;
10121 def : Pat<(_.VT (ftrunc _.RC:$src)),
10122 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10123 _.RC:$src, (i32 0xB))>;
10126 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)),
10127 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10128 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
10129 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)),
10130 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10131 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
10132 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)),
10133 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10134 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
10135 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)),
10136 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10137 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
10138 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)),
10139 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10140 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
10143 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src),
10145 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10146 _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
10147 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src),
10149 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10150 _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
10151 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src),
10153 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10154 _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
10155 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src),
10157 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10158 _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
10159 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src),
10161 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10162 _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
10165 def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))),
10166 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10167 addr:$src, (i32 0x9))>;
10168 def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))),
10169 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10170 addr:$src, (i32 0xC))>;
10171 def : Pat<(_.VT (fceil (_.LdFrag addr:$src))),
10172 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10173 addr:$src, (i32 0xA))>;
10174 def : Pat<(_.VT (frint (_.LdFrag addr:$src))),
10175 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10176 addr:$src, (i32 0x4))>;
10177 def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))),
10178 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10179 addr:$src, (i32 0xB))>;
10181 // Merge-masking + load
10182 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
10184 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10185 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10186 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
10188 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10189 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10190 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
10192 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10193 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10194 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
10196 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10197 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10198 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
10200 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10201 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10203 // Zero-masking + load
10204 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
10206 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10207 _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10208 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
10210 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10211 _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10212 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
10214 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10215 _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10216 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
10218 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10219 _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10220 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
10222 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10223 _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10226 def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10227 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10228 addr:$src, (i32 0x9))>;
10229 def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10230 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10231 addr:$src, (i32 0xC))>;
10232 def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10233 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10234 addr:$src, (i32 0xA))>;
10235 def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10236 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10237 addr:$src, (i32 0x4))>;
10238 def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10239 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10240 addr:$src, (i32 0xB))>;
10242 // Merge-masking + broadcast load
10243 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10244 (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10246 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10247 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10248 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10249 (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10251 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10252 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10253 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10254 (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10256 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10257 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10258 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10259 (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10261 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10262 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10263 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10264 (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10266 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10267 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10269 // Zero-masking + broadcast load
10270 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10271 (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10273 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10274 _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10275 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10276 (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10278 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10279 _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10280 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10281 (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10283 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10284 _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10285 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10286 (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10288 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10289 _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10290 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10291 (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10293 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10294 _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10297 let Predicates = [HasAVX512] in {
10298 defm : AVX512_rndscale_lowering<v16f32_info, "PS">;
10299 defm : AVX512_rndscale_lowering<v8f64_info, "PD">;
10302 let Predicates = [HasVLX] in {
10303 defm : AVX512_rndscale_lowering<v8f32x_info, "PS">;
10304 defm : AVX512_rndscale_lowering<v4f64x_info, "PD">;
10305 defm : AVX512_rndscale_lowering<v4f32x_info, "PS">;
10306 defm : AVX512_rndscale_lowering<v2f64x_info, "PD">;
10309 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10310 X86FoldableSchedWrite sched,
10312 X86VectorVTInfo CastInfo,
10313 string EVEX2VEXOvrd> {
10314 let ExeDomain = _.ExeDomain in {
10315 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10316 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10317 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10319 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10320 (i8 imm:$src3)))))>,
10321 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10322 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10323 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10324 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10327 (CastInfo.VT (X86Shuf128 _.RC:$src1,
10328 (bitconvert (_.LdFrag addr:$src2)),
10329 (i8 imm:$src3)))))>,
10330 Sched<[sched.Folded, sched.ReadAfterFold]>,
10331 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10332 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10333 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10334 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10335 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10339 (X86Shuf128 _.RC:$src1,
10340 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
10341 (i8 imm:$src3)))))>, EVEX_B,
10342 Sched<[sched.Folded, sched.ReadAfterFold]>;
10346 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10347 AVX512VLVectorVTInfo _,
10348 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10349 string EVEX2VEXOvrd>{
10350 let Predicates = [HasAVX512] in
10351 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10352 _.info512, CastInfo.info512, "">, EVEX_V512;
10354 let Predicates = [HasAVX512, HasVLX] in
10355 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10356 _.info256, CastInfo.info256,
10357 EVEX2VEXOvrd>, EVEX_V256;
10360 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10361 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10362 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10363 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10364 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10365 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10366 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10367 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10369 let Predicates = [HasAVX512] in {
10370 // Provide fallback in case the load node that is used in the broadcast
10371 // patterns above is used by additional users, which prevents the pattern
10373 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10374 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10375 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10377 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10378 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10379 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10382 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10383 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10384 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10386 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10387 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10388 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10391 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10392 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10393 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10396 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10397 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10398 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10402 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10403 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10404 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10405 // instantiation of this class.
10406 let ExeDomain = _.ExeDomain in {
10407 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10408 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10409 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10410 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
10411 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10412 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10413 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10414 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10415 (_.VT (X86VAlign _.RC:$src1,
10416 (bitconvert (_.LdFrag addr:$src2)),
10418 Sched<[sched.Folded, sched.ReadAfterFold]>,
10419 EVEX2VEXOverride<"VPALIGNRrmi">;
10421 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10422 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10423 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10424 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10425 (X86VAlign _.RC:$src1,
10426 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10427 (i8 imm:$src3))>, EVEX_B,
10428 Sched<[sched.Folded, sched.ReadAfterFold]>;
10432 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10433 AVX512VLVectorVTInfo _> {
10434 let Predicates = [HasAVX512] in {
10435 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10436 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10438 let Predicates = [HasAVX512, HasVLX] in {
10439 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10440 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10441 // We can't really override the 256-bit version so change it back to unset.
10442 let EVEX2VEXOverride = ? in
10443 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10444 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10448 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10449 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10450 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10451 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10454 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10455 SchedWriteShuffle, avx512vl_i8_info,
10456 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10458 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10460 def ValignqImm32XForm : SDNodeXForm<imm, [{
10461 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10463 def ValignqImm8XForm : SDNodeXForm<imm, [{
10464 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10466 def ValigndImm8XForm : SDNodeXForm<imm, [{
10467 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10470 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10471 X86VectorVTInfo From, X86VectorVTInfo To,
10472 SDNodeXForm ImmXForm> {
10473 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10475 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10478 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10479 To.RC:$src1, To.RC:$src2,
10480 (ImmXForm imm:$src3))>;
10482 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10484 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10487 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10488 To.RC:$src1, To.RC:$src2,
10489 (ImmXForm imm:$src3))>;
10491 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10493 (From.VT (OpNode From.RC:$src1,
10494 (bitconvert (To.LdFrag addr:$src2)),
10497 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10498 To.RC:$src1, addr:$src2,
10499 (ImmXForm imm:$src3))>;
10501 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10503 (From.VT (OpNode From.RC:$src1,
10504 (bitconvert (To.LdFrag addr:$src2)),
10507 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10508 To.RC:$src1, addr:$src2,
10509 (ImmXForm imm:$src3))>;
10512 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10513 X86VectorVTInfo From,
10514 X86VectorVTInfo To,
10515 SDNodeXForm ImmXForm> :
10516 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10517 def : Pat<(From.VT (OpNode From.RC:$src1,
10518 (bitconvert (To.VT (X86VBroadcast
10519 (To.ScalarLdFrag addr:$src2)))),
10521 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10522 (ImmXForm imm:$src3))>;
10524 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10526 (From.VT (OpNode From.RC:$src1,
10528 (To.VT (X86VBroadcast
10529 (To.ScalarLdFrag addr:$src2)))),
10532 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10533 To.RC:$src1, addr:$src2,
10534 (ImmXForm imm:$src3))>;
10536 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10538 (From.VT (OpNode From.RC:$src1,
10540 (To.VT (X86VBroadcast
10541 (To.ScalarLdFrag addr:$src2)))),
10544 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10545 To.RC:$src1, addr:$src2,
10546 (ImmXForm imm:$src3))>;
10549 let Predicates = [HasAVX512] in {
10550 // For 512-bit we lower to the widest element type we can. So we only need
10551 // to handle converting valignq to valignd.
10552 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10553 v16i32_info, ValignqImm32XForm>;
10556 let Predicates = [HasVLX] in {
10557 // For 128-bit we lower to the widest element type we can. So we only need
10558 // to handle converting valignq to valignd.
10559 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10560 v4i32x_info, ValignqImm32XForm>;
10561 // For 256-bit we lower to the widest element type we can. So we only need
10562 // to handle converting valignq to valignd.
10563 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10564 v8i32x_info, ValignqImm32XForm>;
10567 let Predicates = [HasVLX, HasBWI] in {
10568 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10569 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10570 v16i8x_info, ValignqImm8XForm>;
10571 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10572 v16i8x_info, ValigndImm8XForm>;
10575 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10576 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10577 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10579 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10580 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10581 let ExeDomain = _.ExeDomain in {
10582 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10583 (ins _.RC:$src1), OpcodeStr,
10585 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
10588 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10589 (ins _.MemOp:$src1), OpcodeStr,
10591 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
10592 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10593 Sched<[sched.Folded]>;
10597 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10598 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10599 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10600 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10601 (ins _.ScalarMemOp:$src1), OpcodeStr,
10602 "${src1}"##_.BroadcastStr,
10603 "${src1}"##_.BroadcastStr,
10604 (_.VT (OpNode (X86VBroadcast
10605 (_.ScalarLdFrag addr:$src1))))>,
10606 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10607 Sched<[sched.Folded]>;
10610 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10611 X86SchedWriteWidths sched,
10612 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10613 let Predicates = [prd] in
10614 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10617 let Predicates = [prd, HasVLX] in {
10618 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10620 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10625 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10626 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10628 let Predicates = [prd] in
10629 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10632 let Predicates = [prd, HasVLX] in {
10633 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10635 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10640 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10641 SDNode OpNode, X86SchedWriteWidths sched,
10643 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10644 avx512vl_i64_info, prd>, VEX_W;
10645 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10646 avx512vl_i32_info, prd>;
10649 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10650 SDNode OpNode, X86SchedWriteWidths sched,
10652 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10653 avx512vl_i16_info, prd>, VEX_WIG;
10654 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10655 avx512vl_i8_info, prd>, VEX_WIG;
10658 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10659 bits<8> opc_d, bits<8> opc_q,
10660 string OpcodeStr, SDNode OpNode,
10661 X86SchedWriteWidths sched> {
10662 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10664 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10668 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10671 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10672 let Predicates = [HasAVX512, NoVLX] in {
10673 def : Pat<(v4i64 (abs VR256X:$src)),
10676 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10678 def : Pat<(v2i64 (abs VR128X:$src)),
10681 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10685 // Use 512bit version to implement 128/256 bit.
10686 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10687 AVX512VLVectorVTInfo _, Predicate prd> {
10688 let Predicates = [prd, NoVLX] in {
10689 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
10691 (!cast<Instruction>(InstrStr # "Zrr")
10692 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10693 _.info256.RC:$src1,
10694 _.info256.SubRegIdx)),
10695 _.info256.SubRegIdx)>;
10697 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
10699 (!cast<Instruction>(InstrStr # "Zrr")
10700 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10701 _.info128.RC:$src1,
10702 _.info128.SubRegIdx)),
10703 _.info128.SubRegIdx)>;
10707 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10708 SchedWriteVecIMul, HasCDI>;
10710 // FIXME: Is there a better scheduler class for VPCONFLICT?
10711 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10712 SchedWriteVecALU, HasCDI>;
10714 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10715 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10716 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10718 //===---------------------------------------------------------------------===//
10719 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10720 //===---------------------------------------------------------------------===//
10722 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10723 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10724 SchedWriteVecALU, HasVPOPCNTDQ>;
10726 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10727 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10729 //===---------------------------------------------------------------------===//
10730 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10731 //===---------------------------------------------------------------------===//
10733 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10734 X86SchedWriteWidths sched> {
10735 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10736 avx512vl_f32_info, HasAVX512>, XS;
10739 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10740 SchedWriteFShuffle>;
10741 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10742 SchedWriteFShuffle>;
10744 //===----------------------------------------------------------------------===//
10745 // AVX-512 - MOVDDUP
10746 //===----------------------------------------------------------------------===//
10748 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
10749 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10750 let ExeDomain = _.ExeDomain in {
10751 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10752 (ins _.RC:$src), OpcodeStr, "$src", "$src",
10753 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
10755 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10756 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10757 (_.VT (OpNode (_.VT (scalar_to_vector
10758 (_.ScalarLdFrag addr:$src)))))>,
10759 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10760 Sched<[sched.Folded]>;
10764 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10765 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10766 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10767 VTInfo.info512>, EVEX_V512;
10769 let Predicates = [HasAVX512, HasVLX] in {
10770 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10771 VTInfo.info256>, EVEX_V256;
10772 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
10773 VTInfo.info128>, EVEX_V128;
10777 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10778 X86SchedWriteWidths sched> {
10779 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10780 avx512vl_f64_info>, XD, VEX_W;
10783 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10785 let Predicates = [HasVLX] in {
10786 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
10787 (VMOVDDUPZ128rm addr:$src)>;
10788 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10789 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10790 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10791 (VMOVDDUPZ128rm addr:$src)>;
10793 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10794 (v2f64 VR128X:$src0)),
10795 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10796 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10797 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10798 (bitconvert (v4i32 immAllZerosV))),
10799 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10801 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10802 (v2f64 VR128X:$src0)),
10803 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10804 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10805 (bitconvert (v4i32 immAllZerosV))),
10806 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10808 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10809 (v2f64 VR128X:$src0)),
10810 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10811 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10812 (bitconvert (v4i32 immAllZerosV))),
10813 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10816 //===----------------------------------------------------------------------===//
10817 // AVX-512 - Unpack Instructions
10818 //===----------------------------------------------------------------------===//
10820 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10821 SchedWriteFShuffleSizes, 0, 1>;
10822 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10823 SchedWriteFShuffleSizes>;
10825 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10826 SchedWriteShuffle, HasBWI>;
10827 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10828 SchedWriteShuffle, HasBWI>;
10829 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10830 SchedWriteShuffle, HasBWI>;
10831 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10832 SchedWriteShuffle, HasBWI>;
10834 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10835 SchedWriteShuffle, HasAVX512>;
10836 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10837 SchedWriteShuffle, HasAVX512>;
10838 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10839 SchedWriteShuffle, HasAVX512>;
10840 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10841 SchedWriteShuffle, HasAVX512>;
10843 //===----------------------------------------------------------------------===//
10844 // AVX-512 - Extract & Insert Integer Instructions
10845 //===----------------------------------------------------------------------===//
10847 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10848 X86VectorVTInfo _> {
10849 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10850 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10851 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10852 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10854 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10857 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10858 let Predicates = [HasBWI] in {
10859 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10860 (ins _.RC:$src1, u8imm:$src2),
10861 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10862 [(set GR32orGR64:$dst,
10863 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10864 EVEX, TAPD, Sched<[WriteVecExtract]>;
10866 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10870 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10871 let Predicates = [HasBWI] in {
10872 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10873 (ins _.RC:$src1, u8imm:$src2),
10874 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10875 [(set GR32orGR64:$dst,
10876 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10877 EVEX, PD, Sched<[WriteVecExtract]>;
10879 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10880 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10881 (ins _.RC:$src1, u8imm:$src2),
10882 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10883 EVEX, TAPD, FoldGenData<NAME#rr>,
10884 Sched<[WriteVecExtract]>;
10886 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10890 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10891 RegisterClass GRC> {
10892 let Predicates = [HasDQI] in {
10893 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10894 (ins _.RC:$src1, u8imm:$src2),
10895 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10897 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10898 EVEX, TAPD, Sched<[WriteVecExtract]>;
10900 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10901 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10902 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10903 [(store (extractelt (_.VT _.RC:$src1),
10904 imm:$src2),addr:$dst)]>,
10905 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10906 Sched<[WriteVecExtractSt]>;
10910 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10911 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10912 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10913 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10915 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10916 X86VectorVTInfo _, PatFrag LdFrag> {
10917 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10918 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10919 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10921 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10922 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10925 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10926 X86VectorVTInfo _, PatFrag LdFrag> {
10927 let Predicates = [HasBWI] in {
10928 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10929 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10930 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10932 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10933 Sched<[WriteVecInsert]>;
10935 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10939 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10940 X86VectorVTInfo _, RegisterClass GRC> {
10941 let Predicates = [HasDQI] in {
10942 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10943 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10944 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10946 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10947 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10949 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10950 _.ScalarLdFrag>, TAPD;
10954 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10955 extloadi8>, TAPD, VEX_WIG;
10956 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10957 extloadi16>, PD, VEX_WIG;
10958 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10959 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10961 //===----------------------------------------------------------------------===//
10962 // VSHUFPS - VSHUFPD Operations
10963 //===----------------------------------------------------------------------===//
10965 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10966 AVX512VLVectorVTInfo VTInfo_FP>{
10967 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10968 SchedWriteFShuffle>,
10969 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
10970 AVX512AIi8Base, EVEX_4V;
10973 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
10974 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
10976 //===----------------------------------------------------------------------===//
10977 // AVX-512 - Byte shift Left/Right
10978 //===----------------------------------------------------------------------===//
10980 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
10981 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
10982 Format MRMm, string OpcodeStr,
10983 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10984 def rr : AVX512<opc, MRMr,
10985 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
10986 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10987 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
10989 def rm : AVX512<opc, MRMm,
10990 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
10991 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10992 [(set _.RC:$dst,(_.VT (OpNode
10993 (_.VT (bitconvert (_.LdFrag addr:$src1))),
10994 (i8 imm:$src2))))]>,
10995 Sched<[sched.Folded, sched.ReadAfterFold]>;
10998 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
10999 Format MRMm, string OpcodeStr,
11000 X86SchedWriteWidths sched, Predicate prd>{
11001 let Predicates = [prd] in
11002 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11003 sched.ZMM, v64i8_info>, EVEX_V512;
11004 let Predicates = [prd, HasVLX] in {
11005 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11006 sched.YMM, v32i8x_info>, EVEX_V256;
11007 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11008 sched.XMM, v16i8x_info>, EVEX_V128;
11011 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11012 SchedWriteShuffle, HasBWI>,
11013 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11014 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11015 SchedWriteShuffle, HasBWI>,
11016 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11018 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11019 string OpcodeStr, X86FoldableSchedWrite sched,
11020 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11021 def rr : AVX512BI<opc, MRMSrcReg,
11022 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11023 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11024 [(set _dst.RC:$dst,(_dst.VT
11025 (OpNode (_src.VT _src.RC:$src1),
11026 (_src.VT _src.RC:$src2))))]>,
11028 def rm : AVX512BI<opc, MRMSrcMem,
11029 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11030 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11031 [(set _dst.RC:$dst,(_dst.VT
11032 (OpNode (_src.VT _src.RC:$src1),
11033 (_src.VT (bitconvert
11034 (_src.LdFrag addr:$src2))))))]>,
11035 Sched<[sched.Folded, sched.ReadAfterFold]>;
11038 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11039 string OpcodeStr, X86SchedWriteWidths sched,
11041 let Predicates = [prd] in
11042 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11043 v8i64_info, v64i8_info>, EVEX_V512;
11044 let Predicates = [prd, HasVLX] in {
11045 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11046 v4i64x_info, v32i8x_info>, EVEX_V256;
11047 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11048 v2i64x_info, v16i8x_info>, EVEX_V128;
11052 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11053 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11055 // Transforms to swizzle an immediate to enable better matching when
11056 // memory operand isn't in the right place.
11057 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
11058 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11059 uint8_t Imm = N->getZExtValue();
11060 // Swap bits 1/4 and 3/6.
11061 uint8_t NewImm = Imm & 0xa5;
11062 if (Imm & 0x02) NewImm |= 0x10;
11063 if (Imm & 0x10) NewImm |= 0x02;
11064 if (Imm & 0x08) NewImm |= 0x40;
11065 if (Imm & 0x40) NewImm |= 0x08;
11066 return getI8Imm(NewImm, SDLoc(N));
11068 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
11069 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11070 uint8_t Imm = N->getZExtValue();
11071 // Swap bits 2/4 and 3/5.
11072 uint8_t NewImm = Imm & 0xc3;
11073 if (Imm & 0x04) NewImm |= 0x10;
11074 if (Imm & 0x10) NewImm |= 0x04;
11075 if (Imm & 0x08) NewImm |= 0x20;
11076 if (Imm & 0x20) NewImm |= 0x08;
11077 return getI8Imm(NewImm, SDLoc(N));
11079 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11080 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11081 uint8_t Imm = N->getZExtValue();
11082 // Swap bits 1/2 and 5/6.
11083 uint8_t NewImm = Imm & 0x99;
11084 if (Imm & 0x02) NewImm |= 0x04;
11085 if (Imm & 0x04) NewImm |= 0x02;
11086 if (Imm & 0x20) NewImm |= 0x40;
11087 if (Imm & 0x40) NewImm |= 0x20;
11088 return getI8Imm(NewImm, SDLoc(N));
11090 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11091 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11092 uint8_t Imm = N->getZExtValue();
11093 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11094 uint8_t NewImm = Imm & 0x81;
11095 if (Imm & 0x02) NewImm |= 0x04;
11096 if (Imm & 0x04) NewImm |= 0x10;
11097 if (Imm & 0x08) NewImm |= 0x40;
11098 if (Imm & 0x10) NewImm |= 0x02;
11099 if (Imm & 0x20) NewImm |= 0x08;
11100 if (Imm & 0x40) NewImm |= 0x20;
11101 return getI8Imm(NewImm, SDLoc(N));
11103 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11104 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11105 uint8_t Imm = N->getZExtValue();
11106 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11107 uint8_t NewImm = Imm & 0x81;
11108 if (Imm & 0x02) NewImm |= 0x10;
11109 if (Imm & 0x04) NewImm |= 0x02;
11110 if (Imm & 0x08) NewImm |= 0x20;
11111 if (Imm & 0x10) NewImm |= 0x04;
11112 if (Imm & 0x20) NewImm |= 0x40;
11113 if (Imm & 0x40) NewImm |= 0x08;
11114 return getI8Imm(NewImm, SDLoc(N));
11117 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11118 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11120 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11121 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11122 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11123 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11124 (OpNode (_.VT _.RC:$src1),
11127 (i8 imm:$src4)), 1, 1>,
11128 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11129 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11130 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11131 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11132 (OpNode (_.VT _.RC:$src1),
11134 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11135 (i8 imm:$src4)), 1, 0>,
11136 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11137 Sched<[sched.Folded, sched.ReadAfterFold]>;
11138 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11139 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11140 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11141 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11142 (OpNode (_.VT _.RC:$src1),
11144 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11145 (i8 imm:$src4)), 1, 0>, EVEX_B,
11146 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11147 Sched<[sched.Folded, sched.ReadAfterFold]>;
11148 }// Constraints = "$src1 = $dst"
11150 // Additional patterns for matching passthru operand in other positions.
11151 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11152 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11154 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11155 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11156 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11157 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11159 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11160 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11162 // Additional patterns for matching loads in other positions.
11163 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11164 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11165 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11166 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11167 def : Pat<(_.VT (OpNode _.RC:$src1,
11168 (bitconvert (_.LdFrag addr:$src3)),
11169 _.RC:$src2, (i8 imm:$src4))),
11170 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11171 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11173 // Additional patterns for matching zero masking with loads in other
11175 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11176 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11177 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11179 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11180 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11181 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11182 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11183 _.RC:$src2, (i8 imm:$src4)),
11185 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11186 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11188 // Additional patterns for matching masked loads with different
11190 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11191 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11192 _.RC:$src2, (i8 imm:$src4)),
11194 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11195 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11196 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11197 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11198 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11200 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11201 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11202 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11203 (OpNode _.RC:$src2, _.RC:$src1,
11204 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11206 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11207 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11208 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11209 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11210 _.RC:$src1, (i8 imm:$src4)),
11212 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11213 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11214 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11215 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11216 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11218 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11219 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11221 // Additional patterns for matching broadcasts in other positions.
11222 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11223 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11224 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11225 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11226 def : Pat<(_.VT (OpNode _.RC:$src1,
11227 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11228 _.RC:$src2, (i8 imm:$src4))),
11229 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11230 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11232 // Additional patterns for matching zero masking with broadcasts in other
11234 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11235 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11236 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11238 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11239 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11240 (VPTERNLOG321_imm8 imm:$src4))>;
11241 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11242 (OpNode _.RC:$src1,
11243 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11244 _.RC:$src2, (i8 imm:$src4)),
11246 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11247 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11248 (VPTERNLOG132_imm8 imm:$src4))>;
11250 // Additional patterns for matching masked broadcasts with different
11252 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11253 (OpNode _.RC:$src1,
11254 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11255 _.RC:$src2, (i8 imm:$src4)),
11257 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11258 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11259 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11260 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11261 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11263 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11264 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11265 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11266 (OpNode _.RC:$src2, _.RC:$src1,
11267 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11268 (i8 imm:$src4)), _.RC:$src1)),
11269 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11270 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11271 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11272 (OpNode _.RC:$src2,
11273 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11274 _.RC:$src1, (i8 imm:$src4)),
11276 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11277 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11278 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11279 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11280 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11282 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11283 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11286 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11287 AVX512VLVectorVTInfo _> {
11288 let Predicates = [HasAVX512] in
11289 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11290 _.info512, NAME>, EVEX_V512;
11291 let Predicates = [HasAVX512, HasVLX] in {
11292 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11293 _.info128, NAME>, EVEX_V128;
11294 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11295 _.info256, NAME>, EVEX_V256;
11299 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11300 avx512vl_i32_info>;
11301 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11302 avx512vl_i64_info>, VEX_W;
11304 // Patterns to implement vnot using vpternlog instead of creating all ones
11305 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11306 // so that the result is only dependent on src0. But we use the same source
11307 // for all operands to prevent a false dependency.
11308 // TODO: We should maybe have a more generalized algorithm for folding to
11310 let Predicates = [HasAVX512] in {
11311 def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
11312 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11315 let Predicates = [HasAVX512, NoVLX] in {
11316 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11319 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11320 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11321 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11322 (i8 15)), sub_xmm)>;
11323 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11326 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11327 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11328 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11329 (i8 15)), sub_ymm)>;
11332 let Predicates = [HasVLX] in {
11333 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11334 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11335 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11336 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11339 //===----------------------------------------------------------------------===//
11340 // AVX-512 - FixupImm
11341 //===----------------------------------------------------------------------===//
11343 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
11344 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11345 X86VectorVTInfo TblVT>{
11346 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11347 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11348 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11349 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11350 (OpNode (_.VT _.RC:$src1),
11352 (TblVT.VT _.RC:$src3),
11354 (i32 FROUND_CURRENT))>, Sched<[sched]>;
11355 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11356 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11357 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11358 (OpNode (_.VT _.RC:$src1),
11360 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11362 (i32 FROUND_CURRENT))>,
11363 Sched<[sched.Folded, sched.ReadAfterFold]>;
11364 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11365 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11366 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11367 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11368 (OpNode (_.VT _.RC:$src1),
11370 (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
11372 (i32 FROUND_CURRENT))>,
11373 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11374 } // Constraints = "$src1 = $dst"
11377 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11378 SDNode OpNode, X86FoldableSchedWrite sched,
11379 X86VectorVTInfo _, X86VectorVTInfo TblVT>{
11380 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11381 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11382 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11383 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11384 "$src2, $src3, {sae}, $src4",
11385 (OpNode (_.VT _.RC:$src1),
11387 (TblVT.VT _.RC:$src3),
11389 (i32 FROUND_NO_EXC))>,
11390 EVEX_B, Sched<[sched]>;
11394 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
11395 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11396 X86VectorVTInfo _src3VT> {
11397 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11398 ExeDomain = _.ExeDomain in {
11399 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11400 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11401 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11402 (OpNode (_.VT _.RC:$src1),
11404 (_src3VT.VT _src3VT.RC:$src3),
11406 (i32 FROUND_CURRENT))>, Sched<[sched]>;
11407 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11408 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11409 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11410 "$src2, $src3, {sae}, $src4",
11411 (OpNode (_.VT _.RC:$src1),
11413 (_src3VT.VT _src3VT.RC:$src3),
11415 (i32 FROUND_NO_EXC))>,
11416 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11417 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11418 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11419 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11420 (OpNode (_.VT _.RC:$src1),
11422 (_src3VT.VT (scalar_to_vector
11423 (_src3VT.ScalarLdFrag addr:$src3))),
11425 (i32 FROUND_CURRENT))>,
11426 Sched<[sched.Folded, sched.ReadAfterFold]>;
11430 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11431 AVX512VLVectorVTInfo _Vec,
11432 AVX512VLVectorVTInfo _Tbl> {
11433 let Predicates = [HasAVX512] in
11434 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11435 _Vec.info512, _Tbl.info512>,
11436 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11437 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11438 EVEX_4V, EVEX_V512;
11439 let Predicates = [HasAVX512, HasVLX] in {
11440 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
11441 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11442 EVEX_4V, EVEX_V128;
11443 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
11444 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11445 EVEX_4V, EVEX_V256;
11449 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11450 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11451 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11452 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11453 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11454 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11455 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11456 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11457 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11458 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11460 // Patterns used to select SSE scalar fp arithmetic instructions from
11463 // (1) a scalar fp operation followed by a blend
11465 // The effect is that the backend no longer emits unnecessary vector
11466 // insert instructions immediately after SSE scalar fp instructions
11467 // like addss or mulss.
11469 // For example, given the following code:
11470 // __m128 foo(__m128 A, __m128 B) {
11475 // Previously we generated:
11476 // addss %xmm0, %xmm1
11477 // movss %xmm1, %xmm0
11479 // We now generate:
11480 // addss %xmm1, %xmm0
11482 // (2) a vector packed single/double fp operation followed by a vector insert
11484 // The effect is that the backend converts the packed fp instruction
11485 // followed by a vector insert into a single SSE scalar fp instruction.
11487 // For example, given the following code:
11488 // __m128 foo(__m128 A, __m128 B) {
11489 // __m128 C = A + B;
11490 // return (__m128) {c[0], a[1], a[2], a[3]};
11493 // Previously we generated:
11494 // addps %xmm0, %xmm1
11495 // movss %xmm1, %xmm0
11497 // We now generate:
11498 // addss %xmm1, %xmm0
11500 // TODO: Some canonicalization in lowering would simplify the number of
11501 // patterns we have to try to match.
11502 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11503 X86VectorVTInfo _, PatLeaf ZeroFP> {
11504 let Predicates = [HasAVX512] in {
11505 // extracted scalar math op with insert via movss
11506 def : Pat<(MoveNode
11507 (_.VT VR128X:$dst),
11508 (_.VT (scalar_to_vector
11509 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11511 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11512 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11514 // extracted masked scalar math op with insert via movss
11515 def : Pat<(MoveNode (_.VT VR128X:$src1),
11517 (X86selects VK1WM:$mask,
11519 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11522 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11523 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11524 VK1WM:$mask, _.VT:$src1,
11525 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11527 // extracted masked scalar math op with insert via movss
11528 def : Pat<(MoveNode (_.VT VR128X:$src1),
11530 (X86selects VK1WM:$mask,
11532 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11533 _.FRC:$src2), (_.EltVT ZeroFP)))),
11534 (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11535 VK1WM:$mask, _.VT:$src1,
11536 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11540 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11541 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11542 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11543 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11545 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11546 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11547 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11548 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11550 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11551 SDNode Move, X86VectorVTInfo _> {
11552 let Predicates = [HasAVX512] in {
11553 def : Pat<(_.VT (Move _.VT:$dst,
11554 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11555 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11559 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11560 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11562 multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix,
11563 SDNode Move, X86VectorVTInfo _,
11565 let Predicates = [HasAVX512] in {
11566 def : Pat<(_.VT (Move _.VT:$dst,
11567 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11568 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src,
11573 defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss,
11574 v4f32x_info, 0x01>;
11575 defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss,
11576 v4f32x_info, 0x02>;
11577 defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd,
11578 v2f64x_info, 0x01>;
11579 defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd,
11580 v2f64x_info, 0x02>;
11582 //===----------------------------------------------------------------------===//
11583 // AES instructions
11584 //===----------------------------------------------------------------------===//
11586 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11587 let Predicates = [HasVLX, HasVAES] in {
11588 defm Z128 : AESI_binop_rm_int<Op, OpStr,
11589 !cast<Intrinsic>(IntPrefix),
11590 loadv2i64, 0, VR128X, i128mem>,
11591 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11592 defm Z256 : AESI_binop_rm_int<Op, OpStr,
11593 !cast<Intrinsic>(IntPrefix##"_256"),
11594 loadv4i64, 0, VR256X, i256mem>,
11595 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11597 let Predicates = [HasAVX512, HasVAES] in
11598 defm Z : AESI_binop_rm_int<Op, OpStr,
11599 !cast<Intrinsic>(IntPrefix##"_512"),
11600 loadv8i64, 0, VR512, i512mem>,
11601 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11604 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11605 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11606 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11607 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11609 //===----------------------------------------------------------------------===//
11610 // PCLMUL instructions - Carry less multiplication
11611 //===----------------------------------------------------------------------===//
11613 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11614 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11615 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11617 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11618 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11619 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11621 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11622 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11623 EVEX_CD8<64, CD8VF>, VEX_WIG;
11627 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11628 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11629 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11631 //===----------------------------------------------------------------------===//
11633 //===----------------------------------------------------------------------===//
11635 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11636 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11637 let Constraints = "$src1 = $dst",
11638 ExeDomain = VTI.ExeDomain in {
11639 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11640 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11641 "$src3, $src2", "$src2, $src3",
11642 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11643 AVX512FMA3Base, Sched<[sched]>;
11644 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11645 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11646 "$src3, $src2", "$src2, $src3",
11647 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11648 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
11650 Sched<[sched.Folded, sched.ReadAfterFold]>;
11654 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11655 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11656 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11657 let Constraints = "$src1 = $dst",
11658 ExeDomain = VTI.ExeDomain in
11659 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11660 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11661 "${src3}"##VTI.BroadcastStr##", $src2",
11662 "$src2, ${src3}"##VTI.BroadcastStr,
11663 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11664 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
11665 AVX512FMA3Base, EVEX_B,
11666 Sched<[sched.Folded, sched.ReadAfterFold]>;
11669 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11670 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11671 let Predicates = [HasVBMI2] in
11672 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11674 let Predicates = [HasVBMI2, HasVLX] in {
11675 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11677 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11682 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11683 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11684 let Predicates = [HasVBMI2] in
11685 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11687 let Predicates = [HasVBMI2, HasVLX] in {
11688 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11690 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11694 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11695 SDNode OpNode, X86SchedWriteWidths sched> {
11696 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11697 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11698 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11699 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11700 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11701 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11704 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11705 SDNode OpNode, X86SchedWriteWidths sched> {
11706 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11707 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11708 VEX_W, EVEX_CD8<16, CD8VF>;
11709 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11710 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11711 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11712 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11716 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11717 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11718 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11719 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11722 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11723 avx512vl_i8_info, HasVBMI2>, EVEX,
11725 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11726 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11729 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11730 avx512vl_i8_info, HasVBMI2>, EVEX;
11731 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11732 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11734 //===----------------------------------------------------------------------===//
11736 //===----------------------------------------------------------------------===//
11738 let Constraints = "$src1 = $dst" in
11739 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11740 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11741 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11742 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11743 "$src3, $src2", "$src2, $src3",
11744 (VTI.VT (OpNode VTI.RC:$src1,
11745 VTI.RC:$src2, VTI.RC:$src3))>,
11746 EVEX_4V, T8PD, Sched<[sched]>;
11747 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11748 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11749 "$src3, $src2", "$src2, $src3",
11750 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11751 (VTI.VT (bitconvert
11752 (VTI.LdFrag addr:$src3)))))>,
11753 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11754 Sched<[sched.Folded, sched.ReadAfterFold]>;
11755 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11756 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11757 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11758 "$src2, ${src3}"##VTI.BroadcastStr,
11759 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11760 (VTI.VT (X86VBroadcast
11761 (VTI.ScalarLdFrag addr:$src3))))>,
11762 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11763 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
11766 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11767 X86SchedWriteWidths sched> {
11768 let Predicates = [HasVNNI] in
11769 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512;
11770 let Predicates = [HasVNNI, HasVLX] in {
11771 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256;
11772 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128;
11776 // FIXME: Is there a better scheduler class for VPDP?
11777 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>;
11778 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>;
11779 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>;
11780 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>;
11782 //===----------------------------------------------------------------------===//
11784 //===----------------------------------------------------------------------===//
11786 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11787 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11788 avx512vl_i8_info, HasBITALG>;
11789 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11790 avx512vl_i16_info, HasBITALG>, VEX_W;
11792 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11793 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11795 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11796 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11797 (ins VTI.RC:$src1, VTI.RC:$src2),
11799 "$src2, $src1", "$src1, $src2",
11800 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11801 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11803 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11804 (ins VTI.RC:$src1, VTI.MemOp:$src2),
11806 "$src2, $src1", "$src1, $src2",
11807 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11808 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
11809 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11810 Sched<[sched.Folded, sched.ReadAfterFold]>;
11813 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11814 let Predicates = [HasBITALG] in
11815 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11816 let Predicates = [HasBITALG, HasVLX] in {
11817 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11818 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11822 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11823 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11825 //===----------------------------------------------------------------------===//
11827 //===----------------------------------------------------------------------===//
11829 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11830 X86SchedWriteWidths sched> {
11831 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11832 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11834 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11835 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11837 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11842 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11844 EVEX_CD8<8, CD8VF>, T8PD;
11846 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11847 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11848 X86VectorVTInfo BcstVTI>
11849 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
11850 let ExeDomain = VTI.ExeDomain in
11851 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11852 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
11853 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
11854 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
11855 (OpNode (VTI.VT VTI.RC:$src1),
11856 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
11857 (i8 imm:$src3))>, EVEX_B,
11858 Sched<[sched.Folded, sched.ReadAfterFold]>;
11861 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11862 X86SchedWriteWidths sched> {
11863 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11864 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
11865 v64i8_info, v8i64_info>, EVEX_V512;
11866 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11867 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
11868 v32i8x_info, v4i64x_info>, EVEX_V256;
11869 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
11870 v16i8x_info, v2i64x_info>, EVEX_V128;
11874 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
11875 X86GF2P8affineinvqb, SchedWriteVecIMul>,
11876 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11877 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
11878 X86GF2P8affineqb, SchedWriteVecIMul>,
11879 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11882 //===----------------------------------------------------------------------===//
11884 //===----------------------------------------------------------------------===//
11886 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
11887 Constraints = "$src1 = $dst" in {
11888 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
11889 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11890 "v4fmaddps", "$src3, $src2", "$src2, $src3",
11891 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11892 Sched<[SchedWriteFMA.ZMM.Folded]>;
11894 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
11895 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11896 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
11897 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11898 Sched<[SchedWriteFMA.ZMM.Folded]>;
11900 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
11901 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
11902 "v4fmaddss", "$src3, $src2", "$src2, $src3",
11903 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11904 Sched<[SchedWriteFMA.Scl.Folded]>;
11906 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
11907 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
11908 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
11909 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11910 Sched<[SchedWriteFMA.Scl.Folded]>;
11913 //===----------------------------------------------------------------------===//
11915 //===----------------------------------------------------------------------===//
11917 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
11918 Constraints = "$src1 = $dst" in {
11919 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
11920 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11921 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
11922 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11923 Sched<[SchedWriteFMA.ZMM.Folded]>;
11925 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
11926 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11927 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
11928 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11929 Sched<[SchedWriteFMA.ZMM.Folded]>;