1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 AVX512 instruction set, defining the
11 // instructions, and properties of the instructions which are needed for code
12 // generation, machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 // Group template arguments that can be derived from the vector type (EltNum x
17 // EltVT). These are things like the register class for the writemask, etc.
18 // The idea is to pass one of these as the template argument rather than the
19 // individual arguments.
20 // The template is also used for scalar types, in this case numelts is 1.
21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
23 RegisterClass RC = rc;
24 ValueType EltVT = eltvt;
25 int NumElts = numelts;
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
34 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
36 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
39 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
48 ValueType VT = !cast<ValueType>(VTName);
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
52 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
55 // "i" for integer types and "f" for floating-point types
56 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
58 // Size of RC in bits, e.g. 512 for VR512.
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70 // due to load promotion during legalization
71 PatFrag LdFrag = !cast<PatFrag>("load" #
72 !if (!eq (TypeVariantName, "i"),
73 !if (!eq (Size, 128), "v2i64",
74 !if (!eq (Size, 256), "v4i64",
75 !if (!eq (Size, 512), "v8i64",
78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79 !if (!eq (TypeVariantName, "i"),
80 !if (!eq (Size, 128), "v2i64",
81 !if (!eq (Size, 256), "v4i64",
82 !if (!eq (Size, 512), "v8i64",
85 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
87 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88 !cast<ComplexPattern>("sse_load_f32"),
89 !if (!eq (EltTypeName, "f64"),
90 !cast<ComplexPattern>("sse_load_f64"),
93 // The string to specify embedded broadcast in assembly.
94 string BroadcastStr = "{1to" # NumElts # "}";
96 // 8-bit compressed displacement tuple/subvector format. This is only
97 // defined for NumElts <= 8.
98 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
99 !cast<CD8VForm>("CD8VT" # NumElts), ?);
101 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
102 !if (!eq (Size, 256), sub_ymm, ?));
104 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
105 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
108 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
110 // A vector tye of the same width with element type i64. This is used to
111 // create patterns for logic ops.
112 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
114 // A vector type of the same width with element type i32. This is used to
115 // create the canonical constant zero node ImmAllZerosV.
116 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
117 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
119 string ZSuffix = !if (!eq (Size, 128), "Z128",
120 !if (!eq (Size, 256), "Z256", "Z"));
123 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
124 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
125 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
126 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
127 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
128 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
130 // "x" in v32i8x_info means RC = VR256X
131 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
132 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
133 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
134 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
135 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
136 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
138 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
139 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
140 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
141 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
142 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
143 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
145 // We map scalar types to the smallest (128-bit) vector type
146 // with the appropriate element type. This allows to use the same masking logic.
147 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
148 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
149 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
150 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
152 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
153 X86VectorVTInfo i128> {
154 X86VectorVTInfo info512 = i512;
155 X86VectorVTInfo info256 = i256;
156 X86VectorVTInfo info128 = i128;
159 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
161 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
163 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
165 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
167 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
169 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
172 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
174 RegisterClass KRC = _krc;
175 RegisterClass KRCWM = _krcwm;
179 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
180 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
181 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
182 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
183 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
184 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
185 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
187 // This multiclass generates the masking variants from the non-masking
188 // variant. It only provides the assembly pieces for the masking variants.
189 // It assumes custom ISel patterns for masking which can be provided as
190 // template arguments.
191 multiclass AVX512_maskable_custom<bits<8> O, Format F,
193 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
195 string AttSrcAsm, string IntelSrcAsm,
197 list<dag> MaskingPattern,
198 list<dag> ZeroMaskingPattern,
199 string MaskingConstraint = "",
200 bit IsCommutable = 0,
201 bit IsKCommutable = 0,
202 bit IsKZCommutable = IsCommutable> {
203 let isCommutable = IsCommutable in
204 def NAME: AVX512<O, F, Outs, Ins,
205 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
206 "$dst, "#IntelSrcAsm#"}",
209 // Prefer over VMOV*rrk Pat<>
210 let isCommutable = IsKCommutable in
211 def NAME#k: AVX512<O, F, Outs, MaskingIns,
212 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
213 "$dst {${mask}}, "#IntelSrcAsm#"}",
216 // In case of the 3src subclass this is overridden with a let.
217 string Constraints = MaskingConstraint;
220 // Zero mask does not add any restrictions to commute operands transformation.
221 // So, it is Ok to use IsCommutable instead of IsKCommutable.
222 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
223 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
224 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
225 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
231 // Common base class of AVX512_maskable and AVX512_maskable_3src.
232 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
234 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
236 string AttSrcAsm, string IntelSrcAsm,
237 dag RHS, dag MaskingRHS,
238 SDNode Select = vselect,
239 string MaskingConstraint = "",
240 bit IsCommutable = 0,
241 bit IsKCommutable = 0,
242 bit IsKZCommutable = IsCommutable> :
243 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
244 AttSrcAsm, IntelSrcAsm,
245 [(set _.RC:$dst, RHS)],
246 [(set _.RC:$dst, MaskingRHS)],
248 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
249 MaskingConstraint, IsCommutable,
250 IsKCommutable, IsKZCommutable>;
252 // This multiclass generates the unconditional/non-masking, the masking and
253 // the zero-masking variant of the vector instruction. In the masking case, the
254 // perserved vector elements come from a new dummy input operand tied to $dst.
255 // This version uses a separate dag for non-masking and masking.
256 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
257 dag Outs, dag Ins, string OpcodeStr,
258 string AttSrcAsm, string IntelSrcAsm,
259 dag RHS, dag MaskRHS,
260 bit IsCommutable = 0, bit IsKCommutable = 0,
261 SDNode Select = vselect> :
262 AVX512_maskable_custom<O, F, Outs, Ins,
263 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
264 !con((ins _.KRCWM:$mask), Ins),
265 OpcodeStr, AttSrcAsm, IntelSrcAsm,
266 [(set _.RC:$dst, RHS)],
268 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
270 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
271 "$src0 = $dst", IsCommutable, IsKCommutable>;
273 // This multiclass generates the unconditional/non-masking, the masking and
274 // the zero-masking variant of the vector instruction. In the masking case, the
275 // perserved vector elements come from a new dummy input operand tied to $dst.
276 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
277 dag Outs, dag Ins, string OpcodeStr,
278 string AttSrcAsm, string IntelSrcAsm,
280 bit IsCommutable = 0, bit IsKCommutable = 0,
281 bit IsKZCommutable = IsCommutable,
282 SDNode Select = vselect> :
283 AVX512_maskable_common<O, F, _, Outs, Ins,
284 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
285 !con((ins _.KRCWM:$mask), Ins),
286 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
287 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
288 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
291 // This multiclass generates the unconditional/non-masking, the masking and
292 // the zero-masking variant of the scalar instruction.
293 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
294 dag Outs, dag Ins, string OpcodeStr,
295 string AttSrcAsm, string IntelSrcAsm,
297 bit IsCommutable = 0> :
298 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
299 RHS, IsCommutable, 0, IsCommutable, X86selects>;
301 // Similar to AVX512_maskable but in this case one of the source operands
302 // ($src1) is already tied to $dst so we just use that for the preserved
303 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
305 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
306 dag Outs, dag NonTiedIns, string OpcodeStr,
307 string AttSrcAsm, string IntelSrcAsm,
309 bit IsCommutable = 0,
310 bit IsKCommutable = 0,
311 SDNode Select = vselect,
313 AVX512_maskable_common<O, F, _, Outs,
314 !con((ins _.RC:$src1), NonTiedIns),
315 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
316 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
317 OpcodeStr, AttSrcAsm, IntelSrcAsm,
318 !if(MaskOnly, (null_frag), RHS),
319 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
320 Select, "", IsCommutable, IsKCommutable>;
322 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
323 // operand differs from the output VT. This requires a bitconvert on
324 // the preserved vector going into the vselect.
325 // NOTE: The unmasked pattern is disabled.
326 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
327 X86VectorVTInfo InVT,
328 dag Outs, dag NonTiedIns, string OpcodeStr,
329 string AttSrcAsm, string IntelSrcAsm,
330 dag RHS, bit IsCommutable = 0> :
331 AVX512_maskable_common<O, F, OutVT, Outs,
332 !con((ins InVT.RC:$src1), NonTiedIns),
333 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
334 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
335 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
336 (vselect InVT.KRCWM:$mask, RHS,
337 (bitconvert InVT.RC:$src1)),
338 vselect, "", IsCommutable>;
340 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341 dag Outs, dag NonTiedIns, string OpcodeStr,
342 string AttSrcAsm, string IntelSrcAsm,
344 bit IsCommutable = 0,
345 bit IsKCommutable = 0,
347 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
348 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
349 X86selects, MaskOnly>;
351 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
354 string AttSrcAsm, string IntelSrcAsm,
356 AVX512_maskable_custom<O, F, Outs, Ins,
357 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
358 !con((ins _.KRCWM:$mask), Ins),
359 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
362 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
363 dag Outs, dag NonTiedIns,
365 string AttSrcAsm, string IntelSrcAsm,
367 AVX512_maskable_custom<O, F, Outs,
368 !con((ins _.RC:$src1), NonTiedIns),
369 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
370 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
371 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
374 // Instruction with mask that puts result in mask register,
375 // like "compare" and "vptest"
376 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
378 dag Ins, dag MaskingIns,
380 string AttSrcAsm, string IntelSrcAsm,
382 list<dag> MaskingPattern,
383 bit IsCommutable = 0> {
384 let isCommutable = IsCommutable in
385 def NAME: AVX512<O, F, Outs, Ins,
386 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
387 "$dst, "#IntelSrcAsm#"}",
390 def NAME#k: AVX512<O, F, Outs, MaskingIns,
391 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
392 "$dst {${mask}}, "#IntelSrcAsm#"}",
393 MaskingPattern>, EVEX_K;
396 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
398 dag Ins, dag MaskingIns,
400 string AttSrcAsm, string IntelSrcAsm,
401 dag RHS, dag MaskingRHS,
402 bit IsCommutable = 0> :
403 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
404 AttSrcAsm, IntelSrcAsm,
405 [(set _.KRC:$dst, RHS)],
406 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
408 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
409 dag Outs, dag Ins, string OpcodeStr,
410 string AttSrcAsm, string IntelSrcAsm,
411 dag RHS, bit IsCommutable = 0> :
412 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
413 !con((ins _.KRCWM:$mask), Ins),
414 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
415 (and _.KRCWM:$mask, RHS), IsCommutable>;
417 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
418 dag Outs, dag Ins, string OpcodeStr,
419 string AttSrcAsm, string IntelSrcAsm> :
420 AVX512_maskable_custom_cmp<O, F, Outs,
421 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
422 AttSrcAsm, IntelSrcAsm, [], []>;
424 // This multiclass generates the unconditional/non-masking, the masking and
425 // the zero-masking variant of the vector instruction. In the masking case, the
426 // perserved vector elements come from a new dummy input operand tied to $dst.
427 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
428 dag Outs, dag Ins, string OpcodeStr,
429 string AttSrcAsm, string IntelSrcAsm,
430 dag RHS, dag MaskedRHS,
431 bit IsCommutable = 0, SDNode Select = vselect> :
432 AVX512_maskable_custom<O, F, Outs, Ins,
433 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
434 !con((ins _.KRCWM:$mask), Ins),
435 OpcodeStr, AttSrcAsm, IntelSrcAsm,
436 [(set _.RC:$dst, RHS)],
438 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
440 (Select _.KRCWM:$mask, MaskedRHS,
442 "$src0 = $dst", IsCommutable>;
445 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
446 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
447 // swizzled by ExecutionDomainFix to pxor.
448 // We set canFoldAsLoad because this can be converted to a constant-pool
449 // load of an all-zeros value if folding it would be beneficial.
450 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
451 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
452 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
453 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
454 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
455 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
458 // Alias instructions that allow VPTERNLOG to be used with a mask to create
459 // a mix of all ones and all zeros elements. This is done this way to force
460 // the same register to be used as input for all three sources.
461 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
462 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
463 (ins VK16WM:$mask), "",
464 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
465 (v16i32 immAllOnesV),
466 (v16i32 immAllZerosV)))]>;
467 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
468 (ins VK8WM:$mask), "",
469 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
470 (bc_v8i64 (v16i32 immAllOnesV)),
471 (bc_v8i64 (v16i32 immAllZerosV))))]>;
474 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
475 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
476 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
477 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
478 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
479 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
482 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
483 // This is expanded by ExpandPostRAPseudos.
484 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
485 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
486 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
487 [(set FR32X:$dst, fp32imm0)]>;
488 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
489 [(set FR64X:$dst, fpimm0)]>;
492 //===----------------------------------------------------------------------===//
493 // AVX-512 - VECTOR INSERT
496 // Supports two different pattern operators for mask and unmasked ops. Allows
497 // null_frag to be passed for one.
498 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
500 SDPatternOperator vinsert_insert,
501 SDPatternOperator vinsert_for_mask,
502 X86FoldableSchedWrite sched> {
503 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
504 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
505 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
506 "vinsert" # From.EltTypeName # "x" # From.NumElts,
507 "$src3, $src2, $src1", "$src1, $src2, $src3",
508 (vinsert_insert:$src3 (To.VT To.RC:$src1),
509 (From.VT From.RC:$src2),
511 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
512 (From.VT From.RC:$src2),
514 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
516 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
517 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
518 "vinsert" # From.EltTypeName # "x" # From.NumElts,
519 "$src3, $src2, $src1", "$src1, $src2, $src3",
520 (vinsert_insert:$src3 (To.VT To.RC:$src1),
521 (From.VT (bitconvert (From.LdFrag addr:$src2))),
523 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
524 (From.VT (bitconvert (From.LdFrag addr:$src2))),
525 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
526 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
527 Sched<[sched.Folded, ReadAfterLd]>;
531 // Passes the same pattern operator for masked and unmasked ops.
532 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
534 SDPatternOperator vinsert_insert,
535 X86FoldableSchedWrite sched> :
536 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
538 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
539 X86VectorVTInfo To, PatFrag vinsert_insert,
540 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
541 let Predicates = p in {
542 def : Pat<(vinsert_insert:$ins
543 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
544 (To.VT (!cast<Instruction>(InstrStr#"rr")
545 To.RC:$src1, From.RC:$src2,
546 (INSERT_get_vinsert_imm To.RC:$ins)))>;
548 def : Pat<(vinsert_insert:$ins
550 (From.VT (bitconvert (From.LdFrag addr:$src2))),
552 (To.VT (!cast<Instruction>(InstrStr#"rm")
553 To.RC:$src1, addr:$src2,
554 (INSERT_get_vinsert_imm To.RC:$ins)))>;
558 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
559 ValueType EltVT64, int Opcode256,
560 X86FoldableSchedWrite sched> {
562 let Predicates = [HasVLX] in
563 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
564 X86VectorVTInfo< 4, EltVT32, VR128X>,
565 X86VectorVTInfo< 8, EltVT32, VR256X>,
566 vinsert128_insert, sched>, EVEX_V256;
568 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
569 X86VectorVTInfo< 4, EltVT32, VR128X>,
570 X86VectorVTInfo<16, EltVT32, VR512>,
571 vinsert128_insert, sched>, EVEX_V512;
573 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
574 X86VectorVTInfo< 4, EltVT64, VR256X>,
575 X86VectorVTInfo< 8, EltVT64, VR512>,
576 vinsert256_insert, sched>, VEX_W, EVEX_V512;
578 // Even with DQI we'd like to only use these instructions for masking.
579 let Predicates = [HasVLX, HasDQI] in
580 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
581 X86VectorVTInfo< 2, EltVT64, VR128X>,
582 X86VectorVTInfo< 4, EltVT64, VR256X>,
583 null_frag, vinsert128_insert, sched>,
586 // Even with DQI we'd like to only use these instructions for masking.
587 let Predicates = [HasDQI] in {
588 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
589 X86VectorVTInfo< 2, EltVT64, VR128X>,
590 X86VectorVTInfo< 8, EltVT64, VR512>,
591 null_frag, vinsert128_insert, sched>,
594 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
595 X86VectorVTInfo< 8, EltVT32, VR256X>,
596 X86VectorVTInfo<16, EltVT32, VR512>,
597 null_frag, vinsert256_insert, sched>,
602 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
603 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
604 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
606 // Codegen pattern with the alternative types,
607 // Even with AVX512DQ we'll still use these for unmasked operations.
608 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
609 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
610 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
611 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
613 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
614 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
615 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
616 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
618 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
619 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
620 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
621 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
623 // Codegen pattern with the alternative types insert VEC128 into VEC256
624 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
625 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
626 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
627 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
628 // Codegen pattern with the alternative types insert VEC128 into VEC512
629 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
630 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
631 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
632 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
633 // Codegen pattern with the alternative types insert VEC256 into VEC512
634 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
635 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
636 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
637 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
640 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
641 X86VectorVTInfo To, X86VectorVTInfo Cast,
642 PatFrag vinsert_insert,
643 SDNodeXForm INSERT_get_vinsert_imm,
645 let Predicates = p in {
647 (vselect Cast.KRCWM:$mask,
649 (vinsert_insert:$ins (To.VT To.RC:$src1),
650 (From.VT From.RC:$src2),
653 (!cast<Instruction>(InstrStr#"rrk")
654 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
655 (INSERT_get_vinsert_imm To.RC:$ins))>;
657 (vselect Cast.KRCWM:$mask,
659 (vinsert_insert:$ins (To.VT To.RC:$src1),
662 (From.LdFrag addr:$src2))),
665 (!cast<Instruction>(InstrStr#"rmk")
666 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
667 (INSERT_get_vinsert_imm To.RC:$ins))>;
670 (vselect Cast.KRCWM:$mask,
672 (vinsert_insert:$ins (To.VT To.RC:$src1),
673 (From.VT From.RC:$src2),
676 (!cast<Instruction>(InstrStr#"rrkz")
677 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
678 (INSERT_get_vinsert_imm To.RC:$ins))>;
680 (vselect Cast.KRCWM:$mask,
682 (vinsert_insert:$ins (To.VT To.RC:$src1),
685 (From.LdFrag addr:$src2))),
688 (!cast<Instruction>(InstrStr#"rmkz")
689 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
690 (INSERT_get_vinsert_imm To.RC:$ins))>;
694 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
695 v8f32x_info, vinsert128_insert,
696 INSERT_get_vinsert128_imm, [HasVLX]>;
697 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
698 v4f64x_info, vinsert128_insert,
699 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
701 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
702 v8i32x_info, vinsert128_insert,
703 INSERT_get_vinsert128_imm, [HasVLX]>;
704 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
705 v8i32x_info, vinsert128_insert,
706 INSERT_get_vinsert128_imm, [HasVLX]>;
707 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
708 v8i32x_info, vinsert128_insert,
709 INSERT_get_vinsert128_imm, [HasVLX]>;
710 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
711 v4i64x_info, vinsert128_insert,
712 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
713 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
714 v4i64x_info, vinsert128_insert,
715 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
716 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
717 v4i64x_info, vinsert128_insert,
718 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
720 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
721 v16f32_info, vinsert128_insert,
722 INSERT_get_vinsert128_imm, [HasAVX512]>;
723 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
724 v8f64_info, vinsert128_insert,
725 INSERT_get_vinsert128_imm, [HasDQI]>;
727 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
728 v16i32_info, vinsert128_insert,
729 INSERT_get_vinsert128_imm, [HasAVX512]>;
730 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
731 v16i32_info, vinsert128_insert,
732 INSERT_get_vinsert128_imm, [HasAVX512]>;
733 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
734 v16i32_info, vinsert128_insert,
735 INSERT_get_vinsert128_imm, [HasAVX512]>;
736 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
737 v8i64_info, vinsert128_insert,
738 INSERT_get_vinsert128_imm, [HasDQI]>;
739 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
740 v8i64_info, vinsert128_insert,
741 INSERT_get_vinsert128_imm, [HasDQI]>;
742 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
743 v8i64_info, vinsert128_insert,
744 INSERT_get_vinsert128_imm, [HasDQI]>;
746 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
747 v16f32_info, vinsert256_insert,
748 INSERT_get_vinsert256_imm, [HasDQI]>;
749 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
750 v8f64_info, vinsert256_insert,
751 INSERT_get_vinsert256_imm, [HasAVX512]>;
753 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
754 v16i32_info, vinsert256_insert,
755 INSERT_get_vinsert256_imm, [HasDQI]>;
756 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
757 v16i32_info, vinsert256_insert,
758 INSERT_get_vinsert256_imm, [HasDQI]>;
759 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
760 v16i32_info, vinsert256_insert,
761 INSERT_get_vinsert256_imm, [HasDQI]>;
762 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
763 v8i64_info, vinsert256_insert,
764 INSERT_get_vinsert256_imm, [HasAVX512]>;
765 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
766 v8i64_info, vinsert256_insert,
767 INSERT_get_vinsert256_imm, [HasAVX512]>;
768 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
769 v8i64_info, vinsert256_insert,
770 INSERT_get_vinsert256_imm, [HasAVX512]>;
772 // vinsertps - insert f32 to XMM
773 let ExeDomain = SSEPackedSingle in {
774 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
775 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
776 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
777 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
778 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
779 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
780 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
781 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
782 [(set VR128X:$dst, (X86insertps VR128X:$src1,
783 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
785 EVEX_4V, EVEX_CD8<32, CD8VT1>,
786 Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
789 //===----------------------------------------------------------------------===//
790 // AVX-512 VECTOR EXTRACT
793 // Supports two different pattern operators for mask and unmasked ops. Allows
794 // null_frag to be passed for one.
795 multiclass vextract_for_size_split<int Opcode,
796 X86VectorVTInfo From, X86VectorVTInfo To,
797 SDPatternOperator vextract_extract,
798 SDPatternOperator vextract_for_mask,
799 SchedWrite SchedRR, SchedWrite SchedMR> {
801 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
802 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
803 (ins From.RC:$src1, u8imm:$idx),
804 "vextract" # To.EltTypeName # "x" # To.NumElts,
805 "$idx, $src1", "$src1, $idx",
806 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
807 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
808 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
810 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
811 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
812 "vextract" # To.EltTypeName # "x" # To.NumElts #
813 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
814 [(store (To.VT (vextract_extract:$idx
815 (From.VT From.RC:$src1), (iPTR imm))),
819 let mayStore = 1, hasSideEffects = 0 in
820 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
821 (ins To.MemOp:$dst, To.KRCWM:$mask,
822 From.RC:$src1, u8imm:$idx),
823 "vextract" # To.EltTypeName # "x" # To.NumElts #
824 "\t{$idx, $src1, $dst {${mask}}|"
825 "$dst {${mask}}, $src1, $idx}", []>,
826 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
830 // Passes the same pattern operator for masked and unmasked ops.
831 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
833 SDPatternOperator vextract_extract,
834 SchedWrite SchedRR, SchedWrite SchedMR> :
835 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
837 // Codegen pattern for the alternative types
838 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
839 X86VectorVTInfo To, PatFrag vextract_extract,
840 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
841 let Predicates = p in {
842 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
843 (To.VT (!cast<Instruction>(InstrStr#"rr")
845 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
846 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
847 (iPTR imm))), addr:$dst),
848 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
849 (EXTRACT_get_vextract_imm To.RC:$ext))>;
853 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
854 ValueType EltVT64, int Opcode256,
855 SchedWrite SchedRR, SchedWrite SchedMR> {
856 let Predicates = [HasAVX512] in {
857 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
858 X86VectorVTInfo<16, EltVT32, VR512>,
859 X86VectorVTInfo< 4, EltVT32, VR128X>,
860 vextract128_extract, SchedRR, SchedMR>,
861 EVEX_V512, EVEX_CD8<32, CD8VT4>;
862 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
863 X86VectorVTInfo< 8, EltVT64, VR512>,
864 X86VectorVTInfo< 4, EltVT64, VR256X>,
865 vextract256_extract, SchedRR, SchedMR>,
866 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
868 let Predicates = [HasVLX] in
869 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
870 X86VectorVTInfo< 8, EltVT32, VR256X>,
871 X86VectorVTInfo< 4, EltVT32, VR128X>,
872 vextract128_extract, SchedRR, SchedMR>,
873 EVEX_V256, EVEX_CD8<32, CD8VT4>;
875 // Even with DQI we'd like to only use these instructions for masking.
876 let Predicates = [HasVLX, HasDQI] in
877 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
878 X86VectorVTInfo< 4, EltVT64, VR256X>,
879 X86VectorVTInfo< 2, EltVT64, VR128X>,
880 null_frag, vextract128_extract, SchedRR, SchedMR>,
881 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
883 // Even with DQI we'd like to only use these instructions for masking.
884 let Predicates = [HasDQI] in {
885 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
886 X86VectorVTInfo< 8, EltVT64, VR512>,
887 X86VectorVTInfo< 2, EltVT64, VR128X>,
888 null_frag, vextract128_extract, SchedRR, SchedMR>,
889 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
890 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
891 X86VectorVTInfo<16, EltVT32, VR512>,
892 X86VectorVTInfo< 8, EltVT32, VR256X>,
893 null_frag, vextract256_extract, SchedRR, SchedMR>,
894 EVEX_V512, EVEX_CD8<32, CD8VT8>;
898 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
899 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
900 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
902 // extract_subvector codegen patterns with the alternative types.
903 // Even with AVX512DQ we'll still use these for unmasked operations.
904 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
905 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
906 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
907 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
909 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
910 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
911 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
912 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
914 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
915 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
916 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
917 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
919 // Codegen pattern with the alternative types extract VEC128 from VEC256
920 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
921 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
922 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
923 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
925 // Codegen pattern with the alternative types extract VEC128 from VEC512
926 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
927 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
928 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
929 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
930 // Codegen pattern with the alternative types extract VEC256 from VEC512
931 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
932 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
933 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
934 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
937 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
938 // smaller extract to enable EVEX->VEX.
939 let Predicates = [NoVLX] in {
940 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
941 (v2i64 (VEXTRACTI128rr
942 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
944 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
945 (v2f64 (VEXTRACTF128rr
946 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
948 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
949 (v4i32 (VEXTRACTI128rr
950 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
952 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
953 (v4f32 (VEXTRACTF128rr
954 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
956 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
957 (v8i16 (VEXTRACTI128rr
958 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
960 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
961 (v16i8 (VEXTRACTI128rr
962 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
966 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
967 // smaller extract to enable EVEX->VEX.
968 let Predicates = [HasVLX] in {
969 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
970 (v2i64 (VEXTRACTI32x4Z256rr
971 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
973 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
974 (v2f64 (VEXTRACTF32x4Z256rr
975 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
977 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
978 (v4i32 (VEXTRACTI32x4Z256rr
979 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
981 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
982 (v4f32 (VEXTRACTF32x4Z256rr
983 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
985 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
986 (v8i16 (VEXTRACTI32x4Z256rr
987 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
989 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
990 (v16i8 (VEXTRACTI32x4Z256rr
991 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
996 // Additional patterns for handling a bitcast between the vselect and the
997 // extract_subvector.
998 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
999 X86VectorVTInfo To, X86VectorVTInfo Cast,
1000 PatFrag vextract_extract,
1001 SDNodeXForm EXTRACT_get_vextract_imm,
1002 list<Predicate> p> {
1003 let Predicates = p in {
1004 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1006 (To.VT (vextract_extract:$ext
1007 (From.VT From.RC:$src), (iPTR imm)))),
1009 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1010 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1011 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1013 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1015 (To.VT (vextract_extract:$ext
1016 (From.VT From.RC:$src), (iPTR imm)))),
1017 Cast.ImmAllZerosV)),
1018 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1019 Cast.KRCWM:$mask, From.RC:$src,
1020 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1024 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1025 v4f32x_info, vextract128_extract,
1026 EXTRACT_get_vextract128_imm, [HasVLX]>;
1027 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1028 v2f64x_info, vextract128_extract,
1029 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1031 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1032 v4i32x_info, vextract128_extract,
1033 EXTRACT_get_vextract128_imm, [HasVLX]>;
1034 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1035 v4i32x_info, vextract128_extract,
1036 EXTRACT_get_vextract128_imm, [HasVLX]>;
1037 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1038 v4i32x_info, vextract128_extract,
1039 EXTRACT_get_vextract128_imm, [HasVLX]>;
1040 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1041 v2i64x_info, vextract128_extract,
1042 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1043 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1044 v2i64x_info, vextract128_extract,
1045 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1046 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1047 v2i64x_info, vextract128_extract,
1048 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1050 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1051 v4f32x_info, vextract128_extract,
1052 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1053 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1054 v2f64x_info, vextract128_extract,
1055 EXTRACT_get_vextract128_imm, [HasDQI]>;
1057 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1058 v4i32x_info, vextract128_extract,
1059 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1060 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1061 v4i32x_info, vextract128_extract,
1062 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1063 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1064 v4i32x_info, vextract128_extract,
1065 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1067 v2i64x_info, vextract128_extract,
1068 EXTRACT_get_vextract128_imm, [HasDQI]>;
1069 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1070 v2i64x_info, vextract128_extract,
1071 EXTRACT_get_vextract128_imm, [HasDQI]>;
1072 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1073 v2i64x_info, vextract128_extract,
1074 EXTRACT_get_vextract128_imm, [HasDQI]>;
1076 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1077 v8f32x_info, vextract256_extract,
1078 EXTRACT_get_vextract256_imm, [HasDQI]>;
1079 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1080 v4f64x_info, vextract256_extract,
1081 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1083 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1084 v8i32x_info, vextract256_extract,
1085 EXTRACT_get_vextract256_imm, [HasDQI]>;
1086 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1087 v8i32x_info, vextract256_extract,
1088 EXTRACT_get_vextract256_imm, [HasDQI]>;
1089 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1090 v8i32x_info, vextract256_extract,
1091 EXTRACT_get_vextract256_imm, [HasDQI]>;
1092 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1093 v4i64x_info, vextract256_extract,
1094 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1095 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1096 v4i64x_info, vextract256_extract,
1097 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1098 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1099 v4i64x_info, vextract256_extract,
1100 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1102 // vextractps - extract 32 bits from XMM
1103 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1104 (ins VR128X:$src1, u8imm:$src2),
1105 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1106 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1107 EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1109 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1110 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1111 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1112 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1114 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1116 //===---------------------------------------------------------------------===//
1117 // AVX-512 BROADCAST
1119 // broadcast with a scalar argument.
1120 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1122 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1123 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1124 (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1125 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1126 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1127 (X86VBroadcast SrcInfo.FRC:$src),
1128 DestInfo.RC:$src0)),
1129 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1130 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1131 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1132 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1133 (X86VBroadcast SrcInfo.FRC:$src),
1134 DestInfo.ImmAllZerosV)),
1135 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1136 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1139 // Split version to allow mask and broadcast node to be different types. This
1140 // helps support the 32x2 broadcasts.
1141 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1143 SchedWrite SchedRR, SchedWrite SchedRM,
1144 X86VectorVTInfo MaskInfo,
1145 X86VectorVTInfo DestInfo,
1146 X86VectorVTInfo SrcInfo,
1147 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1148 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1149 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1150 (outs MaskInfo.RC:$dst),
1151 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1155 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1159 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1160 T8PD, EVEX, Sched<[SchedRR]>;
1162 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1163 (outs MaskInfo.RC:$dst),
1164 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1167 (DestInfo.VT (UnmaskedOp
1168 (SrcInfo.ScalarLdFrag addr:$src))))),
1171 (DestInfo.VT (X86VBroadcast
1172 (SrcInfo.ScalarLdFrag addr:$src)))))>,
1173 T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1177 def : Pat<(MaskInfo.VT
1179 (DestInfo.VT (UnmaskedOp
1180 (SrcInfo.VT (scalar_to_vector
1181 (SrcInfo.ScalarLdFrag addr:$src))))))),
1182 (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
1183 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1187 (SrcInfo.VT (scalar_to_vector
1188 (SrcInfo.ScalarLdFrag addr:$src)))))),
1189 MaskInfo.RC:$src0)),
1190 (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
1191 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1192 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1196 (SrcInfo.VT (scalar_to_vector
1197 (SrcInfo.ScalarLdFrag addr:$src)))))),
1198 MaskInfo.ImmAllZerosV)),
1199 (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
1200 MaskInfo.KRCWM:$mask, addr:$src)>;
1203 // Helper class to force mask and broadcast result to same type.
1204 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1205 SchedWrite SchedRR, SchedWrite SchedRM,
1206 X86VectorVTInfo DestInfo,
1207 X86VectorVTInfo SrcInfo> :
1208 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1209 DestInfo, DestInfo, SrcInfo>;
1211 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1212 AVX512VLVectorVTInfo _> {
1213 let Predicates = [HasAVX512] in {
1214 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1215 WriteFShuffle256Ld, _.info512, _.info128>,
1216 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1221 let Predicates = [HasVLX] in {
1222 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1223 WriteFShuffle256Ld, _.info256, _.info128>,
1224 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1230 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1231 AVX512VLVectorVTInfo _> {
1232 let Predicates = [HasAVX512] in {
1233 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1234 WriteFShuffle256Ld, _.info512, _.info128>,
1235 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1240 let Predicates = [HasVLX] in {
1241 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1242 WriteFShuffle256Ld, _.info256, _.info128>,
1243 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1246 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1247 WriteFShuffle256Ld, _.info128, _.info128>,
1248 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1253 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1255 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1256 avx512vl_f64_info>, VEX_W1X;
1258 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1259 X86VectorVTInfo _, SDPatternOperator OpNode,
1260 RegisterClass SrcRC> {
1261 let ExeDomain = _.ExeDomain in
1262 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1264 "vpbroadcast"##_.Suffix, "$src", "$src",
1265 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1269 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1270 X86VectorVTInfo _, SDPatternOperator OpNode,
1271 RegisterClass SrcRC, SubRegIndex Subreg> {
1272 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1273 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1274 (outs _.RC:$dst), (ins GR32:$src),
1275 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1276 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1277 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1278 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1280 def : Pat <(_.VT (OpNode SrcRC:$src)),
1281 (!cast<Instruction>(Name#r)
1282 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1284 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1285 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1286 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1288 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1289 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1290 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1293 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1294 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1295 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1296 let Predicates = [prd] in
1297 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1298 OpNode, SrcRC, Subreg>, EVEX_V512;
1299 let Predicates = [prd, HasVLX] in {
1300 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1301 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1302 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1303 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1307 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1308 SDPatternOperator OpNode,
1309 RegisterClass SrcRC, Predicate prd> {
1310 let Predicates = [prd] in
1311 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1313 let Predicates = [prd, HasVLX] in {
1314 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1316 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1321 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1322 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1323 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1324 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1326 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1327 X86VBroadcast, GR32, HasAVX512>;
1328 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1329 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1331 // Provide aliases for broadcast from the same register class that
1332 // automatically does the extract.
1333 multiclass avx512_int_broadcast_rm_lowering<string Name,
1334 X86VectorVTInfo DestInfo,
1335 X86VectorVTInfo SrcInfo,
1336 X86VectorVTInfo ExtInfo> {
1337 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1338 (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
1339 (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>;
1342 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1343 AVX512VLVectorVTInfo _, Predicate prd> {
1344 let Predicates = [prd] in {
1345 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1346 WriteShuffle256Ld, _.info512, _.info128>,
1347 avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>,
1349 // Defined separately to avoid redefinition.
1350 defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>;
1352 let Predicates = [prd, HasVLX] in {
1353 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1354 WriteShuffle256Ld, _.info256, _.info128>,
1355 avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>,
1357 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1358 WriteShuffleXLd, _.info128, _.info128>,
1363 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1364 avx512vl_i8_info, HasBWI>;
1365 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1366 avx512vl_i16_info, HasBWI>;
1367 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1368 avx512vl_i32_info, HasAVX512>;
1369 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1370 avx512vl_i64_info, HasAVX512>, VEX_W1X;
1372 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1373 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1374 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1375 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1376 (_Dst.VT (X86SubVBroadcast
1377 (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1378 Sched<[SchedWriteShuffle.YMM.Folded]>,
1382 // This should be used for the AVX512DQ broadcast instructions. It disables
1383 // the unmasked patterns so that we only use the DQ instructions when masking
1385 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1386 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1387 let hasSideEffects = 0, mayLoad = 1 in
1388 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1389 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1391 (_Dst.VT (X86SubVBroadcast
1392 (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1393 Sched<[SchedWriteShuffle.YMM.Folded]>,
1397 let Predicates = [HasAVX512] in {
1398 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1399 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1400 (VPBROADCASTQZm addr:$src)>;
1403 let Predicates = [HasVLX] in {
1404 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1405 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1406 (VPBROADCASTQZ128m addr:$src)>;
1407 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1408 (VPBROADCASTQZ256m addr:$src)>;
1410 let Predicates = [HasVLX, HasBWI] in {
1411 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1412 // This means we'll encounter truncated i32 loads; match that here.
1413 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1414 (VPBROADCASTWZ128m addr:$src)>;
1415 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1416 (VPBROADCASTWZ256m addr:$src)>;
1417 def : Pat<(v8i16 (X86VBroadcast
1418 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1419 (VPBROADCASTWZ128m addr:$src)>;
1420 def : Pat<(v16i16 (X86VBroadcast
1421 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1422 (VPBROADCASTWZ256m addr:$src)>;
1425 //===----------------------------------------------------------------------===//
1426 // AVX-512 BROADCAST SUBVECTORS
1429 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1430 v16i32_info, v4i32x_info>,
1431 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1432 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1433 v16f32_info, v4f32x_info>,
1434 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1435 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1436 v8i64_info, v4i64x_info>, VEX_W,
1437 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1438 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1439 v8f64_info, v4f64x_info>, VEX_W,
1440 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1442 let Predicates = [HasAVX512] in {
1443 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1444 (VBROADCASTF64X4rm addr:$src)>;
1445 def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1446 (VBROADCASTI64X4rm addr:$src)>;
1447 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1448 (VBROADCASTI64X4rm addr:$src)>;
1449 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1450 (VBROADCASTI64X4rm addr:$src)>;
1452 // Provide fallback in case the load node that is used in the patterns above
1453 // is used by additional users, which prevents the pattern selection.
1454 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1455 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1456 (v4f64 VR256X:$src), 1)>;
1457 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1458 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1459 (v8f32 VR256X:$src), 1)>;
1460 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1461 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1462 (v4i64 VR256X:$src), 1)>;
1463 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1464 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1465 (v8i32 VR256X:$src), 1)>;
1466 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1467 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468 (v16i16 VR256X:$src), 1)>;
1469 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1470 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1471 (v32i8 VR256X:$src), 1)>;
1473 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1474 (VBROADCASTF32X4rm addr:$src)>;
1475 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1476 (VBROADCASTI32X4rm addr:$src)>;
1477 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1478 (VBROADCASTI32X4rm addr:$src)>;
1479 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1480 (VBROADCASTI32X4rm addr:$src)>;
1482 // Patterns for selects of bitcasted operations.
1483 def : Pat<(vselect VK16WM:$mask,
1484 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1485 (bc_v16f32 (v16i32 immAllZerosV))),
1486 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1487 def : Pat<(vselect VK16WM:$mask,
1488 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1490 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1491 def : Pat<(vselect VK16WM:$mask,
1492 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1493 (v16i32 immAllZerosV)),
1494 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1495 def : Pat<(vselect VK16WM:$mask,
1496 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1498 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1500 def : Pat<(vselect VK8WM:$mask,
1501 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1502 (bc_v8f64 (v16i32 immAllZerosV))),
1503 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1504 def : Pat<(vselect VK8WM:$mask,
1505 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1507 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1508 def : Pat<(vselect VK8WM:$mask,
1509 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1510 (bc_v8i64 (v16i32 immAllZerosV))),
1511 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1512 def : Pat<(vselect VK8WM:$mask,
1513 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1515 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1518 let Predicates = [HasVLX] in {
1519 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1520 v8i32x_info, v4i32x_info>,
1521 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1522 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1523 v8f32x_info, v4f32x_info>,
1524 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1526 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1527 (VBROADCASTF32X4Z256rm addr:$src)>;
1528 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1529 (VBROADCASTI32X4Z256rm addr:$src)>;
1530 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1531 (VBROADCASTI32X4Z256rm addr:$src)>;
1532 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1533 (VBROADCASTI32X4Z256rm addr:$src)>;
1535 // Patterns for selects of bitcasted operations.
1536 def : Pat<(vselect VK8WM:$mask,
1537 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1538 (bc_v8f32 (v8i32 immAllZerosV))),
1539 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1540 def : Pat<(vselect VK8WM:$mask,
1541 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1543 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1544 def : Pat<(vselect VK8WM:$mask,
1545 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1546 (v8i32 immAllZerosV)),
1547 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1548 def : Pat<(vselect VK8WM:$mask,
1549 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1551 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1554 // Provide fallback in case the load node that is used in the patterns above
1555 // is used by additional users, which prevents the pattern selection.
1556 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1557 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1558 (v2f64 VR128X:$src), 1)>;
1559 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1560 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1561 (v4f32 VR128X:$src), 1)>;
1562 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1563 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1564 (v2i64 VR128X:$src), 1)>;
1565 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1566 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1567 (v4i32 VR128X:$src), 1)>;
1568 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1569 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1570 (v8i16 VR128X:$src), 1)>;
1571 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1572 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1573 (v16i8 VR128X:$src), 1)>;
1576 let Predicates = [HasVLX, HasDQI] in {
1577 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1578 v4i64x_info, v2i64x_info>, VEX_W1X,
1579 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1580 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1581 v4f64x_info, v2f64x_info>, VEX_W1X,
1582 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1584 // Patterns for selects of bitcasted operations.
1585 def : Pat<(vselect VK4WM:$mask,
1586 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1587 (bc_v4f64 (v8i32 immAllZerosV))),
1588 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1589 def : Pat<(vselect VK4WM:$mask,
1590 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1592 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1593 def : Pat<(vselect VK4WM:$mask,
1594 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1595 (bc_v4i64 (v8i32 immAllZerosV))),
1596 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1597 def : Pat<(vselect VK4WM:$mask,
1598 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1600 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1603 let Predicates = [HasDQI] in {
1604 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1605 v8i64_info, v2i64x_info>, VEX_W,
1606 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1607 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1608 v16i32_info, v8i32x_info>,
1609 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1610 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1611 v8f64_info, v2f64x_info>, VEX_W,
1612 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1613 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1614 v16f32_info, v8f32x_info>,
1615 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1617 // Patterns for selects of bitcasted operations.
1618 def : Pat<(vselect VK16WM:$mask,
1619 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1620 (bc_v16f32 (v16i32 immAllZerosV))),
1621 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1622 def : Pat<(vselect VK16WM:$mask,
1623 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1625 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1626 def : Pat<(vselect VK16WM:$mask,
1627 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1628 (v16i32 immAllZerosV)),
1629 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1630 def : Pat<(vselect VK16WM:$mask,
1631 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1633 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1635 def : Pat<(vselect VK8WM:$mask,
1636 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1637 (bc_v8f64 (v16i32 immAllZerosV))),
1638 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1639 def : Pat<(vselect VK8WM:$mask,
1640 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1642 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1643 def : Pat<(vselect VK8WM:$mask,
1644 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1645 (bc_v8i64 (v16i32 immAllZerosV))),
1646 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1647 def : Pat<(vselect VK8WM:$mask,
1648 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1650 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1653 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1654 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1655 let Predicates = [HasDQI] in
1656 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1657 WriteShuffle256Ld, _Dst.info512,
1658 _Src.info512, _Src.info128, null_frag>,
1660 let Predicates = [HasDQI, HasVLX] in
1661 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1662 WriteShuffle256Ld, _Dst.info256,
1663 _Src.info256, _Src.info128, null_frag>,
1667 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1668 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1669 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1671 let Predicates = [HasDQI, HasVLX] in
1672 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1673 WriteShuffleXLd, _Dst.info128,
1674 _Src.info128, _Src.info128, null_frag>,
1678 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1679 avx512vl_i32_info, avx512vl_i64_info>;
1680 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1681 avx512vl_f32_info, avx512vl_f64_info>;
1683 let Predicates = [HasVLX] in {
1684 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1685 (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1686 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1687 (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1690 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1691 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>;
1692 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1693 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1695 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1696 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>;
1697 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1698 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1700 //===----------------------------------------------------------------------===//
1701 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1703 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1704 X86VectorVTInfo _, RegisterClass KRC> {
1705 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1706 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1707 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1708 EVEX, Sched<[WriteShuffle]>;
1711 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1712 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1713 let Predicates = [HasCDI] in
1714 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1715 let Predicates = [HasCDI, HasVLX] in {
1716 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1717 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1721 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1722 avx512vl_i32_info, VK16>;
1723 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1724 avx512vl_i64_info, VK8>, VEX_W;
1726 //===----------------------------------------------------------------------===//
1727 // -- VPERMI2 - 3 source operands form --
1728 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1729 X86FoldableSchedWrite sched,
1730 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1731 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1732 hasSideEffects = 0 in {
1733 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1734 (ins _.RC:$src2, _.RC:$src3),
1735 OpcodeStr, "$src3, $src2", "$src2, $src3",
1736 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1737 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1740 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1741 (ins _.RC:$src2, _.MemOp:$src3),
1742 OpcodeStr, "$src3, $src2", "$src2, $src3",
1743 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1744 (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
1745 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
1749 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1750 X86FoldableSchedWrite sched,
1751 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1752 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1753 hasSideEffects = 0, mayLoad = 1 in
1754 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1755 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1756 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1757 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1758 (_.VT (X86VPermt2 _.RC:$src2,
1759 IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1760 AVX5128IBase, EVEX_4V, EVEX_B,
1761 Sched<[sched.Folded, ReadAfterLd]>;
1764 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1765 X86FoldableSchedWrite sched,
1766 AVX512VLVectorVTInfo VTInfo,
1767 AVX512VLVectorVTInfo ShuffleMask> {
1768 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1769 ShuffleMask.info512>,
1770 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1771 ShuffleMask.info512>, EVEX_V512;
1772 let Predicates = [HasVLX] in {
1773 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1774 ShuffleMask.info128>,
1775 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1776 ShuffleMask.info128>, EVEX_V128;
1777 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1778 ShuffleMask.info256>,
1779 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1780 ShuffleMask.info256>, EVEX_V256;
1784 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1785 X86FoldableSchedWrite sched,
1786 AVX512VLVectorVTInfo VTInfo,
1787 AVX512VLVectorVTInfo Idx,
1789 let Predicates = [Prd] in
1790 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1791 Idx.info512>, EVEX_V512;
1792 let Predicates = [Prd, HasVLX] in {
1793 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1794 Idx.info128>, EVEX_V128;
1795 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1796 Idx.info256>, EVEX_V256;
1800 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1801 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1802 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1803 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1804 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1805 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1806 VEX_W, EVEX_CD8<16, CD8VF>;
1807 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1808 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1810 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1811 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1812 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1813 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1815 // Extra patterns to deal with extra bitcasts due to passthru and index being
1816 // different types on the fp versions.
1817 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1818 X86VectorVTInfo IdxVT,
1819 X86VectorVTInfo CastVT> {
1820 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1821 (X86VPermt2 (_.VT _.RC:$src2),
1822 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1823 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1824 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1825 _.RC:$src2, _.RC:$src3)>;
1826 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1827 (X86VPermt2 _.RC:$src2,
1828 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1829 (_.LdFrag addr:$src3)),
1830 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1831 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1832 _.RC:$src2, addr:$src3)>;
1833 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1834 (X86VPermt2 _.RC:$src2,
1835 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1836 (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1837 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1838 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1839 _.RC:$src2, addr:$src3)>;
1842 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1843 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1844 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1845 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1848 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1849 X86FoldableSchedWrite sched,
1850 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1851 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1852 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1853 (ins IdxVT.RC:$src2, _.RC:$src3),
1854 OpcodeStr, "$src3, $src2", "$src2, $src3",
1855 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1856 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1858 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1859 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1860 OpcodeStr, "$src3, $src2", "$src2, $src3",
1861 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1862 (bitconvert (_.LdFrag addr:$src3)))), 1>,
1863 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
1866 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1867 X86FoldableSchedWrite sched,
1868 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1869 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1870 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1871 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1872 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1873 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1874 (_.VT (X86VPermt2 _.RC:$src1,
1875 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1876 AVX5128IBase, EVEX_4V, EVEX_B,
1877 Sched<[sched.Folded, ReadAfterLd]>;
1880 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1881 X86FoldableSchedWrite sched,
1882 AVX512VLVectorVTInfo VTInfo,
1883 AVX512VLVectorVTInfo ShuffleMask> {
1884 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1885 ShuffleMask.info512>,
1886 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1887 ShuffleMask.info512>, EVEX_V512;
1888 let Predicates = [HasVLX] in {
1889 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1890 ShuffleMask.info128>,
1891 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1892 ShuffleMask.info128>, EVEX_V128;
1893 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1894 ShuffleMask.info256>,
1895 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1896 ShuffleMask.info256>, EVEX_V256;
1900 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1901 X86FoldableSchedWrite sched,
1902 AVX512VLVectorVTInfo VTInfo,
1903 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1904 let Predicates = [Prd] in
1905 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1906 Idx.info512>, EVEX_V512;
1907 let Predicates = [Prd, HasVLX] in {
1908 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1909 Idx.info128>, EVEX_V128;
1910 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1911 Idx.info256>, EVEX_V256;
1915 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1916 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1917 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1918 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1919 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1920 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1921 VEX_W, EVEX_CD8<16, CD8VF>;
1922 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1923 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1925 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1926 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1927 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1928 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1930 //===----------------------------------------------------------------------===//
1931 // AVX-512 - BLEND using mask
1934 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1935 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1936 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1937 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1938 (ins _.RC:$src1, _.RC:$src2),
1939 !strconcat(OpcodeStr,
1940 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1941 EVEX_4V, Sched<[sched]>;
1942 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1943 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1944 !strconcat(OpcodeStr,
1945 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1946 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1947 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1948 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1949 !strconcat(OpcodeStr,
1950 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1951 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1952 let mayLoad = 1 in {
1953 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1954 (ins _.RC:$src1, _.MemOp:$src2),
1955 !strconcat(OpcodeStr,
1956 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1957 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1958 Sched<[sched.Folded, ReadAfterLd]>;
1959 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1960 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1961 !strconcat(OpcodeStr,
1962 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1963 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1964 Sched<[sched.Folded, ReadAfterLd]>;
1965 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1966 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1967 !strconcat(OpcodeStr,
1968 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1969 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1970 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
1974 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1975 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1976 let mayLoad = 1, hasSideEffects = 0 in {
1977 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1978 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1979 !strconcat(OpcodeStr,
1980 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1981 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1982 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1983 Sched<[sched.Folded, ReadAfterLd]>;
1985 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1986 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1987 !strconcat(OpcodeStr,
1988 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1989 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1990 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1991 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
1993 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1994 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1995 !strconcat(OpcodeStr,
1996 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1997 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1998 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1999 Sched<[sched.Folded, ReadAfterLd]>;
2003 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2004 AVX512VLVectorVTInfo VTInfo> {
2005 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2006 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2009 let Predicates = [HasVLX] in {
2010 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2011 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2013 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2014 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2019 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2020 AVX512VLVectorVTInfo VTInfo> {
2021 let Predicates = [HasBWI] in
2022 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2025 let Predicates = [HasBWI, HasVLX] in {
2026 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2028 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2033 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2035 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2036 avx512vl_f64_info>, VEX_W;
2037 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2039 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2040 avx512vl_i64_info>, VEX_W;
2041 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2043 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2044 avx512vl_i16_info>, VEX_W;
2046 //===----------------------------------------------------------------------===//
2047 // Compare Instructions
2048 //===----------------------------------------------------------------------===//
2050 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2052 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
2053 X86FoldableSchedWrite sched> {
2054 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2056 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2057 "vcmp${cc}"#_.Suffix,
2058 "$src2, $src1", "$src1, $src2",
2059 (OpNode (_.VT _.RC:$src1),
2061 imm:$cc)>, EVEX_4V, Sched<[sched]>;
2063 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2065 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
2066 "vcmp${cc}"#_.Suffix,
2067 "$src2, $src1", "$src1, $src2",
2068 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2069 imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2070 Sched<[sched.Folded, ReadAfterLd]>;
2072 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2074 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2075 "vcmp${cc}"#_.Suffix,
2076 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2077 (OpNodeRnd (_.VT _.RC:$src1),
2080 (i32 FROUND_NO_EXC))>,
2081 EVEX_4V, EVEX_B, Sched<[sched]>;
2082 // Accept explicit immediate argument form instead of comparison code.
2083 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2084 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2086 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2088 "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V,
2089 Sched<[sched]>, NotMemoryFoldable;
2091 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2093 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2095 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2096 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2097 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
2099 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2101 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2103 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
2104 EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2105 }// let isAsmParserOnly = 1, hasSideEffects = 0
2107 let isCodeGenOnly = 1 in {
2108 let isCommutable = 1 in
2109 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2110 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
2111 !strconcat("vcmp${cc}", _.Suffix,
2112 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2113 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2116 EVEX_4V, Sched<[sched]>;
2117 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2119 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2120 !strconcat("vcmp${cc}", _.Suffix,
2121 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2122 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2123 (_.ScalarLdFrag addr:$src2),
2125 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2126 Sched<[sched.Folded, ReadAfterLd]>;
2130 let Predicates = [HasAVX512] in {
2131 let ExeDomain = SSEPackedSingle in
2132 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
2133 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2134 let ExeDomain = SSEPackedDouble in
2135 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
2136 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2139 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2140 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2142 let isCommutable = IsCommutable in
2143 def rr : AVX512BI<opc, MRMSrcReg,
2144 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2145 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2146 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2147 EVEX_4V, Sched<[sched]>;
2148 def rm : AVX512BI<opc, MRMSrcMem,
2149 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2150 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2151 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2152 (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
2153 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
2154 let isCommutable = IsCommutable in
2155 def rrk : AVX512BI<opc, MRMSrcReg,
2156 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2157 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2158 "$dst {${mask}}, $src1, $src2}"),
2159 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2160 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2161 EVEX_4V, EVEX_K, Sched<[sched]>;
2162 def rmk : AVX512BI<opc, MRMSrcMem,
2163 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2164 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2165 "$dst {${mask}}, $src1, $src2}"),
2166 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2167 (OpNode (_.VT _.RC:$src1),
2169 (_.LdFrag addr:$src2))))))]>,
2170 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2173 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2174 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2176 avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> {
2177 def rmb : AVX512BI<opc, MRMSrcMem,
2178 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2179 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2180 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2181 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2182 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2183 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2184 def rmbk : AVX512BI<opc, MRMSrcMem,
2185 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2186 _.ScalarMemOp:$src2),
2187 !strconcat(OpcodeStr,
2188 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2189 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2190 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2191 (OpNode (_.VT _.RC:$src1),
2193 (_.ScalarLdFrag addr:$src2)))))]>,
2194 EVEX_4V, EVEX_K, EVEX_B,
2195 Sched<[sched.Folded, ReadAfterLd]>;
2198 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2199 X86SchedWriteWidths sched,
2200 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2201 bit IsCommutable = 0> {
2202 let Predicates = [prd] in
2203 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM,
2204 VTInfo.info512, IsCommutable>, EVEX_V512;
2206 let Predicates = [prd, HasVLX] in {
2207 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM,
2208 VTInfo.info256, IsCommutable>, EVEX_V256;
2209 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM,
2210 VTInfo.info128, IsCommutable>, EVEX_V128;
2214 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2215 PatFrag OpNode, X86SchedWriteWidths sched,
2216 AVX512VLVectorVTInfo VTInfo,
2217 Predicate prd, bit IsCommutable = 0> {
2218 let Predicates = [prd] in
2219 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM,
2220 VTInfo.info512, IsCommutable>, EVEX_V512;
2222 let Predicates = [prd, HasVLX] in {
2223 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM,
2224 VTInfo.info256, IsCommutable>, EVEX_V256;
2225 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM,
2226 VTInfo.info128, IsCommutable>, EVEX_V128;
2230 // This fragment treats X86cmpm as commutable to help match loads in both
2231 // operands for PCMPEQ.
2232 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2233 def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2234 (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2235 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2236 (setcc node:$src1, node:$src2, SETGT)>;
2238 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2239 // increase the pattern complexity the way an immediate would.
2240 let AddedComplexity = 2 in {
2241 // FIXME: Is there a better scheduler class for VPCMP?
2242 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
2243 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2244 EVEX_CD8<8, CD8VF>, VEX_WIG;
2246 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
2247 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2248 EVEX_CD8<16, CD8VF>, VEX_WIG;
2250 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
2251 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2252 EVEX_CD8<32, CD8VF>;
2254 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
2255 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2256 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2258 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
2259 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2260 EVEX_CD8<8, CD8VF>, VEX_WIG;
2262 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
2263 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2264 EVEX_CD8<16, CD8VF>, VEX_WIG;
2266 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
2267 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2268 EVEX_CD8<32, CD8VF>;
2270 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
2271 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2272 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2275 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2276 PatFrag CommFrag, X86FoldableSchedWrite sched,
2277 X86VectorVTInfo _, string Name> {
2278 let isCommutable = 1 in
2279 def rri : AVX512AIi8<opc, MRMSrcReg,
2280 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
2281 !strconcat("vpcmp${cc}", Suffix,
2282 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2283 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2286 EVEX_4V, Sched<[sched]>;
2287 def rmi : AVX512AIi8<opc, MRMSrcMem,
2288 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
2289 !strconcat("vpcmp${cc}", Suffix,
2290 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2291 [(set _.KRC:$dst, (_.KVT
2294 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2296 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
2297 let isCommutable = 1 in
2298 def rrik : AVX512AIi8<opc, MRMSrcReg,
2299 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2301 !strconcat("vpcmp${cc}", Suffix,
2302 "\t{$src2, $src1, $dst {${mask}}|",
2303 "$dst {${mask}}, $src1, $src2}"),
2304 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2305 (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2308 EVEX_4V, EVEX_K, Sched<[sched]>;
2309 def rmik : AVX512AIi8<opc, MRMSrcMem,
2310 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2312 !strconcat("vpcmp${cc}", Suffix,
2313 "\t{$src2, $src1, $dst {${mask}}|",
2314 "$dst {${mask}}, $src1, $src2}"),
2315 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2320 (_.LdFrag addr:$src2))),
2322 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2324 // Accept explicit immediate argument form instead of comparison code.
2325 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2326 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2327 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2328 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2329 "$dst, $src1, $src2, $cc}"), []>,
2330 EVEX_4V, Sched<[sched]>, NotMemoryFoldable;
2332 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2333 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2334 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2335 "$dst, $src1, $src2, $cc}"), []>,
2336 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
2337 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2338 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2340 !strconcat("vpcmp", Suffix,
2341 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2342 "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2343 EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable;
2345 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2346 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2348 !strconcat("vpcmp", Suffix,
2349 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2350 "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2351 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>,
2355 def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2356 (_.VT _.RC:$src1), cond)),
2357 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2358 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2360 def : Pat<(and _.KRCWM:$mask,
2361 (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2362 (_.VT _.RC:$src1), cond))),
2363 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2364 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2365 (CommFrag.OperandTransform $cc))>;
2368 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2369 PatFrag CommFrag, X86FoldableSchedWrite sched,
2370 X86VectorVTInfo _, string Name> :
2371 avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
2372 def rmib : AVX512AIi8<opc, MRMSrcMem,
2373 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2375 !strconcat("vpcmp${cc}", Suffix,
2376 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2377 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2378 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2381 (_.ScalarLdFrag addr:$src2)),
2383 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2384 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2385 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2386 _.ScalarMemOp:$src2, AVX512ICC:$cc),
2387 !strconcat("vpcmp${cc}", Suffix,
2388 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2389 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2390 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2394 (_.ScalarLdFrag addr:$src2)),
2396 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2398 // Accept explicit immediate argument form instead of comparison code.
2399 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2400 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2401 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2403 !strconcat("vpcmp", Suffix,
2404 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2405 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2406 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2408 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2409 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2410 _.ScalarMemOp:$src2, u8imm:$cc),
2411 !strconcat("vpcmp", Suffix,
2412 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2413 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2414 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2418 def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2419 (_.VT _.RC:$src1), cond)),
2420 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2421 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2423 def : Pat<(and _.KRCWM:$mask,
2424 (_.KVT (CommFrag:$cc (X86VBroadcast
2425 (_.ScalarLdFrag addr:$src2)),
2426 (_.VT _.RC:$src1), cond))),
2427 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2428 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2429 (CommFrag.OperandTransform $cc))>;
2432 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2433 PatFrag CommFrag, X86SchedWriteWidths sched,
2434 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2435 let Predicates = [prd] in
2436 defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
2437 VTInfo.info512, NAME>, EVEX_V512;
2439 let Predicates = [prd, HasVLX] in {
2440 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
2441 VTInfo.info256, NAME>, EVEX_V256;
2442 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
2443 VTInfo.info128, NAME>, EVEX_V128;
2447 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2448 PatFrag CommFrag, X86SchedWriteWidths sched,
2449 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2450 let Predicates = [prd] in
2451 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
2452 VTInfo.info512, NAME>, EVEX_V512;
2454 let Predicates = [prd, HasVLX] in {
2455 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
2456 VTInfo.info256, NAME>, EVEX_V256;
2457 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
2458 VTInfo.info128, NAME>, EVEX_V128;
2462 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2463 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2464 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2465 return getI8Imm(SSECC, SDLoc(N));
2468 // Swapped operand version of the above.
2469 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2470 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2471 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2472 SSECC = X86::getSwappedVPCMPImm(SSECC);
2473 return getI8Imm(SSECC, SDLoc(N));
2476 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2477 (setcc node:$src1, node:$src2, node:$cc), [{
2478 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2479 return !ISD::isUnsignedIntSetCC(CC);
2482 // Same as above, but commutes immediate. Use for load folding.
2483 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2484 (setcc node:$src1, node:$src2, node:$cc), [{
2485 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2486 return !ISD::isUnsignedIntSetCC(CC);
2487 }], X86pcmpm_imm_commute>;
2489 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2490 (setcc node:$src1, node:$src2, node:$cc), [{
2491 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2492 return ISD::isUnsignedIntSetCC(CC);
2495 // Same as above, but commutes immediate. Use for load folding.
2496 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2497 (setcc node:$src1, node:$src2, node:$cc), [{
2498 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2499 return ISD::isUnsignedIntSetCC(CC);
2500 }], X86pcmpm_imm_commute>;
2502 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2503 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
2504 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2506 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
2507 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2510 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
2511 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2512 VEX_W, EVEX_CD8<16, CD8VF>;
2513 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
2514 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2515 VEX_W, EVEX_CD8<16, CD8VF>;
2517 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
2518 SchedWriteVecALU, avx512vl_i32_info,
2519 HasAVX512>, EVEX_CD8<32, CD8VF>;
2520 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
2521 SchedWriteVecALU, avx512vl_i32_info,
2522 HasAVX512>, EVEX_CD8<32, CD8VF>;
2524 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
2525 SchedWriteVecALU, avx512vl_i64_info,
2526 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2527 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
2528 SchedWriteVecALU, avx512vl_i64_info,
2529 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2531 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2533 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2534 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2535 "vcmp${cc}"#_.Suffix,
2536 "$src2, $src1", "$src1, $src2",
2537 (X86cmpm (_.VT _.RC:$src1),
2542 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2543 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2544 "vcmp${cc}"#_.Suffix,
2545 "$src2, $src1", "$src1, $src2",
2546 (X86cmpm (_.VT _.RC:$src1),
2547 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2549 Sched<[sched.Folded, ReadAfterLd]>;
2551 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2553 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2554 "vcmp${cc}"#_.Suffix,
2555 "${src2}"##_.BroadcastStr##", $src1",
2556 "$src1, ${src2}"##_.BroadcastStr,
2557 (X86cmpm (_.VT _.RC:$src1),
2558 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2560 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2561 // Accept explicit immediate argument form instead of comparison code.
2562 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2563 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2565 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2567 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2568 Sched<[sched]>, NotMemoryFoldable;
2570 let mayLoad = 1 in {
2571 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2573 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2575 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2576 Sched<[sched.Folded, ReadAfterLd]>,
2579 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2581 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2583 "$cc, ${src2}"##_.BroadcastStr##", $src1",
2584 "$src1, ${src2}"##_.BroadcastStr##", $cc">,
2585 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2590 // Patterns for selecting with loads in other operand.
2591 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2592 CommutableCMPCC:$cc),
2593 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2596 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2598 CommutableCMPCC:$cc)),
2599 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2600 _.RC:$src1, addr:$src2,
2603 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2604 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2605 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2608 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2609 (_.ScalarLdFrag addr:$src2)),
2611 CommutableCMPCC:$cc)),
2612 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2613 _.RC:$src1, addr:$src2,
2617 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2618 // comparison code form (VCMP[EQ/LT/LE/...]
2619 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2620 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2621 "vcmp${cc}"#_.Suffix,
2622 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2623 (X86cmpmRnd (_.VT _.RC:$src1),
2626 (i32 FROUND_NO_EXC))>,
2627 EVEX_B, Sched<[sched]>;
2629 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2630 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2632 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2634 "$cc, {sae}, $src2, $src1",
2635 "$src1, $src2, {sae}, $cc">,
2636 EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2640 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2641 let Predicates = [HasAVX512] in {
2642 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2643 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2646 let Predicates = [HasAVX512,HasVLX] in {
2647 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2648 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2652 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2653 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2654 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2655 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2657 // Patterns to select fp compares with load as first operand.
2658 let Predicates = [HasAVX512] in {
2659 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2660 CommutableCMPCC:$cc)),
2661 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2663 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2664 CommutableCMPCC:$cc)),
2665 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2668 // ----------------------------------------------------------------
2670 //handle fpclass instruction mask = op(reg_scalar,imm)
2671 // op(mem_scalar,imm)
2672 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2673 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2675 let Predicates = [prd], ExeDomain = _.ExeDomain in {
2676 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2677 (ins _.RC:$src1, i32u8imm:$src2),
2678 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2679 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2680 (i32 imm:$src2)))]>,
2682 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2683 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2684 OpcodeStr##_.Suffix#
2685 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2686 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2687 (OpNode (_.VT _.RC:$src1),
2688 (i32 imm:$src2))))]>,
2689 EVEX_K, Sched<[sched]>;
2690 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2691 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2692 OpcodeStr##_.Suffix##
2693 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2695 (OpNode _.ScalarIntMemCPat:$src1,
2696 (i32 imm:$src2)))]>,
2697 Sched<[sched.Folded, ReadAfterLd]>;
2698 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2699 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2700 OpcodeStr##_.Suffix##
2701 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2702 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2703 (OpNode _.ScalarIntMemCPat:$src1,
2704 (i32 imm:$src2))))]>,
2705 EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2709 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2710 // fpclass(reg_vec, mem_vec, imm)
2711 // fpclass(reg_vec, broadcast(eltVt), imm)
2712 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2713 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2714 string mem, string broadcast>{
2715 let ExeDomain = _.ExeDomain in {
2716 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2717 (ins _.RC:$src1, i32u8imm:$src2),
2718 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2719 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2720 (i32 imm:$src2)))]>,
2722 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2723 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2724 OpcodeStr##_.Suffix#
2725 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2726 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2727 (OpNode (_.VT _.RC:$src1),
2728 (i32 imm:$src2))))]>,
2729 EVEX_K, Sched<[sched]>;
2730 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2731 (ins _.MemOp:$src1, i32u8imm:$src2),
2732 OpcodeStr##_.Suffix##mem#
2733 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2734 [(set _.KRC:$dst,(OpNode
2735 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2736 (i32 imm:$src2)))]>,
2737 Sched<[sched.Folded, ReadAfterLd]>;
2738 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2739 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2740 OpcodeStr##_.Suffix##mem#
2741 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2742 [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
2743 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2744 (i32 imm:$src2))))]>,
2745 EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2746 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2747 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2748 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2749 _.BroadcastStr##", $dst|$dst, ${src1}"
2750 ##_.BroadcastStr##", $src2}",
2751 [(set _.KRC:$dst,(OpNode
2752 (_.VT (X86VBroadcast
2753 (_.ScalarLdFrag addr:$src1))),
2754 (i32 imm:$src2)))]>,
2755 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2756 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2757 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2758 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2759 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2760 _.BroadcastStr##", $src2}",
2761 [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
2762 (_.VT (X86VBroadcast
2763 (_.ScalarLdFrag addr:$src1))),
2764 (i32 imm:$src2))))]>,
2765 EVEX_B, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2769 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2770 bits<8> opc, SDNode OpNode,
2771 X86SchedWriteWidths sched, Predicate prd,
2773 let Predicates = [prd] in {
2774 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
2775 _.info512, "{z}", broadcast>, EVEX_V512;
2777 let Predicates = [prd, HasVLX] in {
2778 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
2779 _.info128, "{x}", broadcast>, EVEX_V128;
2780 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
2781 _.info256, "{y}", broadcast>, EVEX_V256;
2785 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2786 bits<8> opcScalar, SDNode VecOpNode,
2787 SDNode ScalarOpNode, X86SchedWriteWidths sched,
2789 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2790 VecOpNode, sched, prd, "{l}">,
2791 EVEX_CD8<32, CD8VF>;
2792 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2793 VecOpNode, sched, prd, "{q}">,
2794 EVEX_CD8<64, CD8VF> , VEX_W;
2795 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2796 sched.Scl, f32x_info, prd>,
2797 EVEX_CD8<32, CD8VT1>;
2798 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2799 sched.Scl, f64x_info, prd>,
2800 EVEX_CD8<64, CD8VT1>, VEX_W;
2803 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2804 X86Vfpclasss, SchedWriteFCmp, HasDQI>,
2805 AVX512AIi8Base, EVEX;
2807 //-----------------------------------------------------------------
2808 // Mask register copy, including
2809 // - copy between mask registers
2810 // - load/store mask registers
2811 // - copy from GPR to mask register and vice versa
2813 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2814 string OpcodeStr, RegisterClass KRC,
2815 ValueType vvt, X86MemOperand x86memop> {
2816 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2817 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2818 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2820 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2821 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2822 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2824 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2825 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2826 [(store KRC:$src, addr:$dst)]>,
2827 Sched<[WriteStore]>;
2830 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2832 RegisterClass KRC, RegisterClass GRC> {
2833 let hasSideEffects = 0 in {
2834 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2835 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2837 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2838 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2843 let Predicates = [HasDQI] in
2844 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2845 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2848 let Predicates = [HasAVX512] in
2849 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2850 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2853 let Predicates = [HasBWI] in {
2854 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2856 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2858 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2860 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2864 // GR from/to mask register
2865 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2866 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2867 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2868 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2870 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2871 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2872 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2873 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2875 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2876 (KMOVWrk VK16:$src)>;
2877 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2878 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2880 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2881 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2882 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2883 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2885 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2886 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2887 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2888 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2889 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2890 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2891 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2892 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2895 let Predicates = [HasDQI] in {
2896 def : Pat<(store VK1:$src, addr:$dst),
2897 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2899 def : Pat<(v1i1 (load addr:$src)),
2900 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2901 def : Pat<(v2i1 (load addr:$src)),
2902 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2903 def : Pat<(v4i1 (load addr:$src)),
2904 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2907 let Predicates = [HasAVX512] in {
2908 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2909 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2912 let Predicates = [HasAVX512] in {
2913 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2914 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2915 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2917 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2918 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2921 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2922 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2923 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2924 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2925 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2926 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2927 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2929 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2930 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2933 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2937 // Mask unary operation
2939 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2940 RegisterClass KRC, SDPatternOperator OpNode,
2941 X86FoldableSchedWrite sched, Predicate prd> {
2942 let Predicates = [prd] in
2943 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2944 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2945 [(set KRC:$dst, (OpNode KRC:$src))]>,
2949 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2950 SDPatternOperator OpNode,
2951 X86FoldableSchedWrite sched> {
2952 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2953 sched, HasDQI>, VEX, PD;
2954 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2955 sched, HasAVX512>, VEX, PS;
2956 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2957 sched, HasBWI>, VEX, PD, VEX_W;
2958 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2959 sched, HasBWI>, VEX, PS, VEX_W;
2962 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2963 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2965 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2966 let Predicates = [HasAVX512, NoDQI] in
2967 def : Pat<(vnot VK8:$src),
2968 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2970 def : Pat<(vnot VK4:$src),
2971 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2972 def : Pat<(vnot VK2:$src),
2973 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2975 // Mask binary operation
2976 // - KAND, KANDN, KOR, KXNOR, KXOR
2977 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2978 RegisterClass KRC, SDPatternOperator OpNode,
2979 X86FoldableSchedWrite sched, Predicate prd,
2981 let Predicates = [prd], isCommutable = IsCommutable in
2982 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2983 !strconcat(OpcodeStr,
2984 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2985 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2989 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2990 SDPatternOperator OpNode,
2991 X86FoldableSchedWrite sched, bit IsCommutable,
2992 Predicate prdW = HasAVX512> {
2993 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2994 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2995 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2996 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2997 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2998 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2999 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3000 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3003 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
3004 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3005 // These nodes use 'vnot' instead of 'not' to support vectors.
3006 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3007 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3009 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3010 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
3011 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
3012 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
3013 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
3014 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
3015 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3017 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3019 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3020 // for the DQI set, this type is legal and KxxxB instruction is used
3021 let Predicates = [NoDQI] in
3022 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3024 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3025 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3027 // All types smaller than 8 bits require conversion anyway
3028 def : Pat<(OpNode VK1:$src1, VK1:$src2),
3029 (COPY_TO_REGCLASS (Inst
3030 (COPY_TO_REGCLASS VK1:$src1, VK16),
3031 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3032 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3033 (COPY_TO_REGCLASS (Inst
3034 (COPY_TO_REGCLASS VK2:$src1, VK16),
3035 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3036 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3037 (COPY_TO_REGCLASS (Inst
3038 (COPY_TO_REGCLASS VK4:$src1, VK16),
3039 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3042 defm : avx512_binop_pat<and, and, KANDWrr>;
3043 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3044 defm : avx512_binop_pat<or, or, KORWrr>;
3045 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3046 defm : avx512_binop_pat<xor, xor, KXORWrr>;
3049 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
3050 RegisterClass KRCSrc, X86FoldableSchedWrite sched,
3052 let Predicates = [prd] in {
3053 let hasSideEffects = 0 in
3054 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
3055 (ins KRC:$src1, KRC:$src2),
3056 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3057 VEX_4V, VEX_L, Sched<[sched]>;
3059 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
3060 (!cast<Instruction>(NAME##rr)
3061 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
3062 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
3066 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD;
3067 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS;
3068 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W;
3071 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3072 SDNode OpNode, X86FoldableSchedWrite sched,
3074 let Predicates = [prd], Defs = [EFLAGS] in
3075 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3076 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3077 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3081 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3082 X86FoldableSchedWrite sched,
3083 Predicate prdW = HasAVX512> {
3084 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3086 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3088 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3090 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3094 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3095 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3096 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3099 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3100 SDNode OpNode, X86FoldableSchedWrite sched> {
3101 let Predicates = [HasAVX512] in
3102 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3103 !strconcat(OpcodeStr,
3104 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3105 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3109 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3110 SDNode OpNode, X86FoldableSchedWrite sched> {
3111 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3112 sched>, VEX, TAPD, VEX_W;
3113 let Predicates = [HasDQI] in
3114 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3116 let Predicates = [HasBWI] in {
3117 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3118 sched>, VEX, TAPD, VEX_W;
3119 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3124 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3125 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3127 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3128 multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
3129 X86VectorVTInfo Narrow,
3130 X86VectorVTInfo Wide> {
3131 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3132 (Narrow.VT Narrow.RC:$src2))),
3134 (!cast<Instruction>(InstStr#"Zrr")
3135 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3136 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3139 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3140 (Frag (Narrow.VT Narrow.RC:$src1),
3141 (Narrow.VT Narrow.RC:$src2)))),
3143 (!cast<Instruction>(InstStr#"Zrrk")
3144 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3145 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3146 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3150 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3151 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag,
3153 X86VectorVTInfo Narrow,
3154 X86VectorVTInfo Wide> {
3155 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3156 (Narrow.VT Narrow.RC:$src2), cond)),
3158 (!cast<Instruction>(InstStr##Zrri)
3159 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3160 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3161 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3163 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3164 (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3165 (Narrow.VT Narrow.RC:$src2),
3167 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3168 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3169 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3170 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3171 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3174 // Same as above, but for fp types which don't use PatFrags.
3175 multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
3176 X86VectorVTInfo Narrow,
3177 X86VectorVTInfo Wide> {
3178 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3179 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3181 (!cast<Instruction>(InstStr##Zrri)
3182 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3183 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3184 imm:$cc), Narrow.KRC)>;
3186 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3187 (OpNode (Narrow.VT Narrow.RC:$src1),
3188 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3189 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3190 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3191 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3192 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3193 imm:$cc), Narrow.KRC)>;
3196 let Predicates = [HasAVX512, NoVLX] in {
3197 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3198 // increase the pattern complexity the way an immediate would.
3199 let AddedComplexity = 2 in {
3200 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
3201 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
3203 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
3204 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
3206 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3207 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3209 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3210 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3213 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3214 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>;
3216 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3217 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>;
3219 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3220 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>;
3222 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3223 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>;
3225 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3226 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3227 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3228 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3231 let Predicates = [HasBWI, NoVLX] in {
3232 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3233 // increase the pattern complexity the way an immediate would.
3234 let AddedComplexity = 2 in {
3235 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
3236 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
3238 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
3239 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
3241 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
3242 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
3244 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
3245 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
3248 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3249 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>;
3251 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3252 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>;
3254 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3255 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>;
3257 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3258 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>;
3261 // Mask setting all 0s or 1s
3262 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3263 let Predicates = [HasAVX512] in
3264 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3265 SchedRW = [WriteZero] in
3266 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3267 [(set KRC:$dst, (VT Val))]>;
3270 multiclass avx512_mask_setop_w<PatFrag Val> {
3271 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3272 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3273 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3276 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3277 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3279 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3280 let Predicates = [HasAVX512] in {
3281 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3282 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3283 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3284 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3285 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3286 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3287 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3288 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3291 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3292 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3293 RegisterClass RC, ValueType VT> {
3294 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3295 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3297 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3298 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3300 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3301 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3302 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3303 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3304 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3305 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3307 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3308 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3309 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3310 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3311 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3313 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3314 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3315 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3316 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3318 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3319 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3320 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3322 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3323 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3325 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3327 //===----------------------------------------------------------------------===//
3328 // AVX-512 - Aligned and unaligned load and store
3331 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3332 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3333 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3334 bit NoRMPattern = 0,
3335 SDPatternOperator SelectOprr = vselect> {
3336 let hasSideEffects = 0 in {
3337 let isMoveReg = 1 in
3338 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3339 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3340 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3341 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3342 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3343 (ins _.KRCWM:$mask, _.RC:$src),
3344 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3345 "${dst} {${mask}} {z}, $src}"),
3346 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3348 _.ImmAllZerosV)))], _.ExeDomain>,
3349 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3351 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3352 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3353 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3354 !if(NoRMPattern, [],
3356 (_.VT (bitconvert (ld_frag addr:$src))))]),
3357 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3358 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3360 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3361 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3362 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3363 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3364 "${dst} {${mask}}, $src1}"),
3365 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3367 (_.VT _.RC:$src0))))], _.ExeDomain>,
3368 EVEX, EVEX_K, Sched<[Sched.RR]>;
3369 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3370 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3371 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3372 "${dst} {${mask}}, $src1}"),
3373 [(set _.RC:$dst, (_.VT
3374 (vselect _.KRCWM:$mask,
3375 (_.VT (bitconvert (ld_frag addr:$src1))),
3376 (_.VT _.RC:$src0))))], _.ExeDomain>,
3377 EVEX, EVEX_K, Sched<[Sched.RM]>;
3379 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3380 (ins _.KRCWM:$mask, _.MemOp:$src),
3381 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3382 "${dst} {${mask}} {z}, $src}",
3383 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3384 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
3385 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3387 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3388 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3390 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3391 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3393 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3394 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3395 _.KRCWM:$mask, addr:$ptr)>;
3398 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3399 AVX512VLVectorVTInfo _, Predicate prd,
3400 X86SchedWriteMoveLSWidths Sched,
3401 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3402 let Predicates = [prd] in
3403 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3404 _.info512.AlignedLdFrag, masked_load_aligned512,
3405 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3407 let Predicates = [prd, HasVLX] in {
3408 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3409 _.info256.AlignedLdFrag, masked_load_aligned256,
3410 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3411 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3412 _.info128.AlignedLdFrag, masked_load_aligned128,
3413 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3417 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3418 AVX512VLVectorVTInfo _, Predicate prd,
3419 X86SchedWriteMoveLSWidths Sched,
3420 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3421 SDPatternOperator SelectOprr = vselect> {
3422 let Predicates = [prd] in
3423 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3424 masked_load_unaligned, Sched.ZMM, "",
3425 NoRMPattern, SelectOprr>, EVEX_V512;
3427 let Predicates = [prd, HasVLX] in {
3428 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3429 masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y",
3430 NoRMPattern, SelectOprr>, EVEX_V256;
3431 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3432 masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd,
3433 NoRMPattern, SelectOprr>, EVEX_V128;
3437 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3438 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3439 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3440 bit NoMRPattern = 0> {
3441 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3442 let isMoveReg = 1 in
3443 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3444 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3445 [], _.ExeDomain>, EVEX,
3446 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3447 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3448 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3449 (ins _.KRCWM:$mask, _.RC:$src),
3450 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3451 "${dst} {${mask}}, $src}",
3452 [], _.ExeDomain>, EVEX, EVEX_K,
3453 FoldGenData<BaseName#_.ZSuffix#rrk>,
3455 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3456 (ins _.KRCWM:$mask, _.RC:$src),
3457 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3458 "${dst} {${mask}} {z}, $src}",
3459 [], _.ExeDomain>, EVEX, EVEX_KZ,
3460 FoldGenData<BaseName#_.ZSuffix#rrkz>,
3464 let hasSideEffects = 0, mayStore = 1 in
3465 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3466 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3467 !if(NoMRPattern, [],
3468 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3469 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3470 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3471 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3472 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3473 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3474 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3477 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3478 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3479 _.KRCWM:$mask, _.RC:$src)>;
3481 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3482 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3483 _.RC:$dst, _.RC:$src), 0>;
3484 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3485 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3486 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3487 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3488 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3489 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3492 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3493 AVX512VLVectorVTInfo _, Predicate prd,
3494 X86SchedWriteMoveLSWidths Sched,
3495 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3496 let Predicates = [prd] in
3497 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3498 masked_store_unaligned, Sched.ZMM, "",
3499 NoMRPattern>, EVEX_V512;
3500 let Predicates = [prd, HasVLX] in {
3501 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3502 masked_store_unaligned, Sched.YMM,
3503 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3504 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3505 masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd,
3506 NoMRPattern>, EVEX_V128;
3510 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3511 AVX512VLVectorVTInfo _, Predicate prd,
3512 X86SchedWriteMoveLSWidths Sched,
3513 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3514 let Predicates = [prd] in
3515 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3516 masked_store_aligned512, Sched.ZMM, "",
3517 NoMRPattern>, EVEX_V512;
3519 let Predicates = [prd, HasVLX] in {
3520 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3521 masked_store_aligned256, Sched.YMM,
3522 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3523 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3524 masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd,
3525 NoMRPattern>, EVEX_V128;
3529 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3530 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3531 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3532 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3533 PS, EVEX_CD8<32, CD8VF>;
3535 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3536 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3537 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3538 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3539 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3541 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3542 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3543 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3544 SchedWriteFMoveLS, "VMOVUPS">,
3545 PS, EVEX_CD8<32, CD8VF>;
3547 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3548 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3549 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3550 SchedWriteFMoveLS, "VMOVUPD">,
3551 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3553 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3554 HasAVX512, SchedWriteVecMoveLS,
3556 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3557 HasAVX512, SchedWriteVecMoveLS,
3559 PD, EVEX_CD8<32, CD8VF>;
3561 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3562 HasAVX512, SchedWriteVecMoveLS,
3564 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3565 HasAVX512, SchedWriteVecMoveLS,
3567 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3569 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3570 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3571 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3572 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3573 XD, EVEX_CD8<8, CD8VF>;
3575 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3576 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3577 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3578 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3579 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3581 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3582 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3583 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3584 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3585 XS, EVEX_CD8<32, CD8VF>;
3587 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3588 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3589 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3590 SchedWriteVecMoveLS, "VMOVDQU">,
3591 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3593 // Special instructions to help with spilling when we don't have VLX. We need
3594 // to load or store from a ZMM register instead. These are converted in
3595 // expandPostRAPseudos.
3596 let isReMaterializable = 1, canFoldAsLoad = 1,
3597 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3598 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3599 "", []>, Sched<[WriteFLoadX]>;
3600 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3601 "", []>, Sched<[WriteFLoadY]>;
3602 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3603 "", []>, Sched<[WriteFLoadX]>;
3604 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3605 "", []>, Sched<[WriteFLoadY]>;
3608 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3609 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3610 "", []>, Sched<[WriteFStoreX]>;
3611 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3612 "", []>, Sched<[WriteFStoreY]>;
3613 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3614 "", []>, Sched<[WriteFStoreX]>;
3615 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3616 "", []>, Sched<[WriteFStoreY]>;
3619 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3620 (v8i64 VR512:$src))),
3621 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3624 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3625 (v16i32 VR512:$src))),
3626 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3628 // These patterns exist to prevent the above patterns from introducing a second
3629 // mask inversion when one already exists.
3630 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3631 (bc_v8i64 (v16i32 immAllZerosV)),
3632 (v8i64 VR512:$src))),
3633 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3634 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3635 (v16i32 immAllZerosV),
3636 (v16i32 VR512:$src))),
3637 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3639 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3640 X86VectorVTInfo Wide> {
3641 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3642 Narrow.RC:$src1, Narrow.RC:$src0)),
3645 (!cast<Instruction>(InstrStr#"rrk")
3646 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3647 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3648 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3651 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3652 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3655 (!cast<Instruction>(InstrStr#"rrkz")
3656 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3657 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3661 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3662 // available. Use a 512-bit operation and extract.
3663 let Predicates = [HasAVX512, NoVLX] in {
3664 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3665 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3666 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3667 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3669 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3670 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3671 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3672 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3675 let Predicates = [HasBWI, NoVLX] in {
3676 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3677 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3679 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3680 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3683 let Predicates = [HasAVX512] in {
3685 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3686 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3687 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3688 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3689 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3690 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3691 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3692 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3693 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3694 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3695 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3696 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3699 let Predicates = [HasVLX] in {
3701 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3702 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3703 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3704 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3705 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3706 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3707 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3708 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3709 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3710 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3711 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3712 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3715 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3716 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3717 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3718 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3719 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3720 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3721 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3722 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3723 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3724 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3725 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3726 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3729 multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3730 X86VectorVTInfo To, X86VectorVTInfo Cast> {
3731 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3733 (To.VT (extract_subvector
3734 (From.VT From.RC:$src), (iPTR 0)))),
3736 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3737 Cast.RC:$src0, Cast.KRCWM:$mask,
3738 (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
3740 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3742 (To.VT (extract_subvector
3743 (From.VT From.RC:$src), (iPTR 0)))),
3744 Cast.ImmAllZerosV)),
3745 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3747 (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
3751 let Predicates = [HasVLX] in {
3752 // A masked extract from the first 128-bits of a 256-bit vector can be
3753 // implemented with masked move.
3754 defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
3755 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
3756 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3757 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
3758 defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
3759 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
3760 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3761 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
3762 defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
3763 defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
3764 defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
3765 defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
3767 // A masked extract from the first 128-bits of a 512-bit vector can be
3768 // implemented with masked move.
3769 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
3770 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3771 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3772 defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
3773 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
3774 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3775 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3776 defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
3777 defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
3778 defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
3779 defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
3780 defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
3782 // A masked extract from the first 256-bits of a 512-bit vector can be
3783 // implemented with masked move.
3784 defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
3785 defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
3786 defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3787 defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
3788 defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
3789 defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
3790 defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3791 defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
3792 defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
3793 defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
3794 defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
3795 defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
3798 // Move Int Doubleword to Packed Double Int
3800 let ExeDomain = SSEPackedInt in {
3801 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3802 "vmovd\t{$src, $dst|$dst, $src}",
3804 (v4i32 (scalar_to_vector GR32:$src)))]>,
3805 EVEX, Sched<[WriteVecMoveFromGpr]>;
3806 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3807 "vmovd\t{$src, $dst|$dst, $src}",
3809 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3810 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3811 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3812 "vmovq\t{$src, $dst|$dst, $src}",
3814 (v2i64 (scalar_to_vector GR64:$src)))]>,
3815 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3816 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3817 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3819 "vmovq\t{$src, $dst|$dst, $src}", []>,
3820 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3821 let isCodeGenOnly = 1 in {
3822 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3823 "vmovq\t{$src, $dst|$dst, $src}",
3824 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3825 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3826 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3827 "vmovq\t{$src, $dst|$dst, $src}",
3828 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3829 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3830 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3831 "vmovq\t{$src, $dst|$dst, $src}",
3832 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3833 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3834 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3835 "vmovq\t{$src, $dst|$dst, $src}",
3836 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>,
3837 EVEX, VEX_W, Sched<[WriteVecStore]>,
3838 EVEX_CD8<64, CD8VT1>;
3840 } // ExeDomain = SSEPackedInt
3842 // Move Int Doubleword to Single Scalar
3844 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3845 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3846 "vmovd\t{$src, $dst|$dst, $src}",
3847 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3848 EVEX, Sched<[WriteVecMoveFromGpr]>;
3850 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3851 "vmovd\t{$src, $dst|$dst, $src}",
3852 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
3853 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3854 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3856 // Move doubleword from xmm register to r/m32
3858 let ExeDomain = SSEPackedInt in {
3859 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3860 "vmovd\t{$src, $dst|$dst, $src}",
3861 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3863 EVEX, Sched<[WriteVecMoveToGpr]>;
3864 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3865 (ins i32mem:$dst, VR128X:$src),
3866 "vmovd\t{$src, $dst|$dst, $src}",
3867 [(store (i32 (extractelt (v4i32 VR128X:$src),
3868 (iPTR 0))), addr:$dst)]>,
3869 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3870 } // ExeDomain = SSEPackedInt
3872 // Move quadword from xmm1 register to r/m64
3874 let ExeDomain = SSEPackedInt in {
3875 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3876 "vmovq\t{$src, $dst|$dst, $src}",
3877 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3879 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3880 Requires<[HasAVX512]>;
3882 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3883 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3884 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3885 EVEX, VEX_W, Sched<[WriteVecStore]>,
3886 Requires<[HasAVX512, In64BitMode]>;
3888 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3889 (ins i64mem:$dst, VR128X:$src),
3890 "vmovq\t{$src, $dst|$dst, $src}",
3891 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3893 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3894 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3896 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3897 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3899 "vmovq\t{$src, $dst|$dst, $src}", []>,
3900 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3901 } // ExeDomain = SSEPackedInt
3903 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3904 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3906 // Move Scalar Single to Double Int
3908 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3909 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3911 "vmovd\t{$src, $dst|$dst, $src}",
3912 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3913 EVEX, Sched<[WriteVecMoveToGpr]>;
3914 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3915 (ins i32mem:$dst, FR32X:$src),
3916 "vmovd\t{$src, $dst|$dst, $src}",
3917 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>,
3918 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3919 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3921 // Move Quadword Int to Packed Quadword Int
3923 let ExeDomain = SSEPackedInt in {
3924 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3926 "vmovq\t{$src, $dst|$dst, $src}",
3928 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3929 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3930 } // ExeDomain = SSEPackedInt
3932 // Allow "vmovd" but print "vmovq".
3933 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3934 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3935 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3936 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3938 //===----------------------------------------------------------------------===//
3939 // AVX-512 MOVSS, MOVSD
3940 //===----------------------------------------------------------------------===//
3942 multiclass avx512_move_scalar<string asm, SDNode OpNode,
3943 X86VectorVTInfo _> {
3944 let Predicates = [HasAVX512, OptForSize] in
3945 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3946 (ins _.RC:$src1, _.RC:$src2),
3947 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3948 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3949 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3950 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3951 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3952 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3953 "$dst {${mask}} {z}, $src1, $src2}"),
3954 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3955 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3957 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3958 let Constraints = "$src0 = $dst" in
3959 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3960 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3961 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3962 "$dst {${mask}}, $src1, $src2}"),
3963 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3964 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3965 (_.VT _.RC:$src0))))],
3966 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3967 let canFoldAsLoad = 1, isReMaterializable = 1 in
3968 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3969 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3970 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3971 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3972 let mayLoad = 1, hasSideEffects = 0 in {
3973 let Constraints = "$src0 = $dst" in
3974 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3975 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3976 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3977 "$dst {${mask}}, $src}"),
3978 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3979 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3980 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3981 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3982 "$dst {${mask}} {z}, $src}"),
3983 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3985 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3986 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3987 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
3988 EVEX, Sched<[WriteFStore]>;
3989 let mayStore = 1, hasSideEffects = 0 in
3990 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3991 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3992 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3993 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3997 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3998 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4000 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
4001 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4004 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4005 PatLeaf ZeroFP, X86VectorVTInfo _> {
4007 def : Pat<(_.VT (OpNode _.RC:$src0,
4008 (_.VT (scalar_to_vector
4009 (_.EltVT (X86selects VK1WM:$mask,
4010 (_.EltVT _.FRC:$src1),
4011 (_.EltVT _.FRC:$src2))))))),
4012 (!cast<Instruction>(InstrStr#rrk)
4013 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4016 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4018 def : Pat<(_.VT (OpNode _.RC:$src0,
4019 (_.VT (scalar_to_vector
4020 (_.EltVT (X86selects VK1WM:$mask,
4021 (_.EltVT _.FRC:$src1),
4022 (_.EltVT ZeroFP))))))),
4023 (!cast<Instruction>(InstrStr#rrkz)
4026 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4029 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4030 dag Mask, RegisterClass MaskRC> {
4032 def : Pat<(masked_store
4033 (_.info512.VT (insert_subvector undef,
4034 (_.info128.VT _.info128.RC:$src),
4035 (iPTR 0))), addr:$dst, Mask),
4036 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4037 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4038 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4042 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4043 AVX512VLVectorVTInfo _,
4044 dag Mask, RegisterClass MaskRC,
4045 SubRegIndex subreg> {
4047 def : Pat<(masked_store
4048 (_.info512.VT (insert_subvector undef,
4049 (_.info128.VT _.info128.RC:$src),
4050 (iPTR 0))), addr:$dst, Mask),
4051 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4052 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4053 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4057 // This matches the more recent codegen from clang that avoids emitting a 512
4058 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4059 // bits on AVX512F only targets.
4060 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4061 AVX512VLVectorVTInfo _,
4062 dag Mask512, dag Mask128,
4063 RegisterClass MaskRC,
4064 SubRegIndex subreg> {
4067 def : Pat<(masked_store
4068 (_.info512.VT (insert_subvector undef,
4069 (_.info128.VT _.info128.RC:$src),
4070 (iPTR 0))), addr:$dst, Mask512),
4071 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4072 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4073 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4075 // AVX512VL pattern.
4076 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4077 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4078 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4079 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4082 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4083 dag Mask, RegisterClass MaskRC> {
4085 def : Pat<(_.info128.VT (extract_subvector
4086 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4087 (_.info512.VT (bitconvert
4088 (v16i32 immAllZerosV))))),
4090 (!cast<Instruction>(InstrStr#rmkz)
4091 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4094 def : Pat<(_.info128.VT (extract_subvector
4095 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4096 (_.info512.VT (insert_subvector undef,
4097 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4100 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4101 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4106 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4107 AVX512VLVectorVTInfo _,
4108 dag Mask, RegisterClass MaskRC,
4109 SubRegIndex subreg> {
4111 def : Pat<(_.info128.VT (extract_subvector
4112 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4113 (_.info512.VT (bitconvert
4114 (v16i32 immAllZerosV))))),
4116 (!cast<Instruction>(InstrStr#rmkz)
4117 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4120 def : Pat<(_.info128.VT (extract_subvector
4121 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4122 (_.info512.VT (insert_subvector undef,
4123 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4126 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4127 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4132 // This matches the more recent codegen from clang that avoids emitting a 512
4133 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4134 // bits on AVX512F only targets.
4135 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4136 AVX512VLVectorVTInfo _,
4137 dag Mask512, dag Mask128,
4138 RegisterClass MaskRC,
4139 SubRegIndex subreg> {
4140 // AVX512F patterns.
4141 def : Pat<(_.info128.VT (extract_subvector
4142 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4143 (_.info512.VT (bitconvert
4144 (v16i32 immAllZerosV))))),
4146 (!cast<Instruction>(InstrStr#rmkz)
4147 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4150 def : Pat<(_.info128.VT (extract_subvector
4151 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4152 (_.info512.VT (insert_subvector undef,
4153 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4156 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4157 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4160 // AVX512Vl patterns.
4161 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4162 (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
4163 (!cast<Instruction>(InstrStr#rmkz)
4164 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4167 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4168 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4169 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4170 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4174 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4175 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4177 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4178 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4179 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4180 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4181 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4182 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4184 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4185 (v16i1 (insert_subvector
4186 (v16i1 immAllZerosV),
4187 (v4i1 (extract_subvector
4188 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4191 (v4i1 (extract_subvector
4192 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4193 (iPTR 0))), GR8, sub_8bit>;
4194 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4199 (v16i1 immAllZerosV),
4200 (v2i1 (extract_subvector
4201 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4205 (v2i1 (extract_subvector
4206 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4207 (iPTR 0))), GR8, sub_8bit>;
4209 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4210 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4211 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4212 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4213 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4214 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4216 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4217 (v16i1 (insert_subvector
4218 (v16i1 immAllZerosV),
4219 (v4i1 (extract_subvector
4220 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4223 (v4i1 (extract_subvector
4224 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4225 (iPTR 0))), GR8, sub_8bit>;
4226 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4231 (v16i1 immAllZerosV),
4232 (v2i1 (extract_subvector
4233 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4237 (v2i1 (extract_subvector
4238 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4239 (iPTR 0))), GR8, sub_8bit>;
4241 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4242 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4243 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4244 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4245 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4247 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4248 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4249 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4251 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4252 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4253 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4254 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4255 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4257 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
4258 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4259 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4261 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4262 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4263 (ins VR128X:$src1, VR128X:$src2),
4264 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4265 []>, XS, EVEX_4V, VEX_LIG,
4266 FoldGenData<"VMOVSSZrr">,
4267 Sched<[SchedWriteFShuffle.XMM]>;
4269 let Constraints = "$src0 = $dst" in
4270 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4271 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4272 VR128X:$src1, VR128X:$src2),
4273 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4274 "$dst {${mask}}, $src1, $src2}",
4275 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4276 FoldGenData<"VMOVSSZrrk">,
4277 Sched<[SchedWriteFShuffle.XMM]>;
4279 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4280 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4281 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4282 "$dst {${mask}} {z}, $src1, $src2}",
4283 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4284 FoldGenData<"VMOVSSZrrkz">,
4285 Sched<[SchedWriteFShuffle.XMM]>;
4287 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4288 (ins VR128X:$src1, VR128X:$src2),
4289 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4290 []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4291 FoldGenData<"VMOVSDZrr">,
4292 Sched<[SchedWriteFShuffle.XMM]>;
4294 let Constraints = "$src0 = $dst" in
4295 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4296 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4297 VR128X:$src1, VR128X:$src2),
4298 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4299 "$dst {${mask}}, $src1, $src2}",
4300 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4301 VEX_W, FoldGenData<"VMOVSDZrrk">,
4302 Sched<[SchedWriteFShuffle.XMM]>;
4304 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4305 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4307 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4308 "$dst {${mask}} {z}, $src1, $src2}",
4309 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4310 VEX_W, FoldGenData<"VMOVSDZrrkz">,
4311 Sched<[SchedWriteFShuffle.XMM]>;
4314 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4315 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4316 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4317 "$dst {${mask}}, $src1, $src2}",
4318 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4319 VR128X:$src1, VR128X:$src2), 0>;
4320 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4321 "$dst {${mask}} {z}, $src1, $src2}",
4322 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4323 VR128X:$src1, VR128X:$src2), 0>;
4324 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4325 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4326 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4327 "$dst {${mask}}, $src1, $src2}",
4328 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4329 VR128X:$src1, VR128X:$src2), 0>;
4330 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4331 "$dst {${mask}} {z}, $src1, $src2}",
4332 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4333 VR128X:$src1, VR128X:$src2), 0>;
4335 let Predicates = [HasAVX512, OptForSize] in {
4336 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4337 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4338 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4339 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4341 // Move low f32 and clear high bits.
4342 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4343 (SUBREG_TO_REG (i32 0),
4344 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4345 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4346 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4347 (SUBREG_TO_REG (i32 0),
4348 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4349 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4351 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4352 (SUBREG_TO_REG (i32 0),
4353 (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4354 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>;
4355 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4356 (SUBREG_TO_REG (i32 0),
4357 (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4358 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>;
4360 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4361 (SUBREG_TO_REG (i32 0),
4362 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4363 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4364 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4365 (SUBREG_TO_REG (i32 0),
4366 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4367 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4369 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4370 (SUBREG_TO_REG (i32 0),
4371 (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4372 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>;
4374 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4375 (SUBREG_TO_REG (i32 0),
4376 (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4377 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>;
4381 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4382 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4383 let Predicates = [HasAVX512, OptForSpeed] in {
4384 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4385 (SUBREG_TO_REG (i32 0),
4386 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4387 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4388 (i8 1))), sub_xmm)>;
4389 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4390 (SUBREG_TO_REG (i32 0),
4391 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4392 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4393 (i8 3))), sub_xmm)>;
4395 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4396 (SUBREG_TO_REG (i32 0),
4397 (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
4398 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)),
4399 (i8 1))), sub_xmm)>;
4400 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4401 (SUBREG_TO_REG (i32 0),
4402 (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
4403 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)),
4404 (i8 0xf))), sub_xmm)>;
4407 let Predicates = [HasAVX512] in {
4409 // MOVSSrm zeros the high parts of the register; represent this
4410 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4411 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4412 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4413 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4414 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4415 def : Pat<(v4f32 (X86vzload addr:$src)),
4416 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4418 // MOVSDrm zeros the high parts of the register; represent this
4419 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4420 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4421 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4422 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4423 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4424 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4425 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4426 def : Pat<(v2f64 (X86vzload addr:$src)),
4427 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4429 // Represent the same patterns above but in the form they appear for
4431 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4432 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4433 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4434 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4435 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4436 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4437 def : Pat<(v8f32 (X86vzload addr:$src)),
4438 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4439 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4440 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4441 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4442 def : Pat<(v4f64 (X86vzload addr:$src)),
4443 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4445 // Represent the same patterns above but in the form they appear for
4447 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4448 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4449 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4450 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4451 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4452 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4453 def : Pat<(v16f32 (X86vzload addr:$src)),
4454 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4455 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4456 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4457 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4458 def : Pat<(v8f64 (X86vzload addr:$src)),
4459 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4461 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4462 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
4463 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4465 // Extract and store.
4466 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
4468 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
4471 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4472 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4474 "vmovq\t{$src, $dst|$dst, $src}",
4475 [(set VR128X:$dst, (v2i64 (X86vzmovl
4476 (v2i64 VR128X:$src))))]>,
4480 let Predicates = [HasAVX512] in {
4481 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4482 (VMOVDI2PDIZrr GR32:$src)>;
4484 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4485 (VMOV64toPQIZrr GR64:$src)>;
4487 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4488 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4489 (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
4491 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4492 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4493 (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
4495 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4496 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4497 (VMOVDI2PDIZrm addr:$src)>;
4498 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4499 (VMOVDI2PDIZrm addr:$src)>;
4500 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4501 (VMOVDI2PDIZrm addr:$src)>;
4502 def : Pat<(v4i32 (X86vzload addr:$src)),
4503 (VMOVDI2PDIZrm addr:$src)>;
4504 def : Pat<(v8i32 (X86vzload addr:$src)),
4505 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4506 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4507 (VMOVQI2PQIZrm addr:$src)>;
4508 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4509 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4510 def : Pat<(v2i64 (X86vzload addr:$src)),
4511 (VMOVQI2PQIZrm addr:$src)>;
4512 def : Pat<(v4i64 (X86vzload addr:$src)),
4513 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4515 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4516 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4517 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4518 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
4519 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4520 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4521 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
4523 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4524 def : Pat<(v16i32 (X86vzload addr:$src)),
4525 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4526 def : Pat<(v8i64 (X86vzload addr:$src)),
4527 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4530 //===----------------------------------------------------------------------===//
4531 // AVX-512 - Non-temporals
4532 //===----------------------------------------------------------------------===//
4534 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4535 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4536 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4537 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4539 let Predicates = [HasVLX] in {
4540 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4542 "vmovntdqa\t{$src, $dst|$dst, $src}",
4543 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4544 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4546 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4548 "vmovntdqa\t{$src, $dst|$dst, $src}",
4549 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4550 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4553 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4554 X86SchedWriteMoveLS Sched,
4555 PatFrag st_frag = alignednontemporalstore> {
4556 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4557 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4558 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4559 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4560 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4563 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4564 AVX512VLVectorVTInfo VTInfo,
4565 X86SchedWriteMoveLSWidths Sched> {
4566 let Predicates = [HasAVX512] in
4567 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4569 let Predicates = [HasAVX512, HasVLX] in {
4570 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4571 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4575 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4576 SchedWriteVecMoveLSNT>, PD;
4577 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4578 SchedWriteFMoveLSNT>, PD, VEX_W;
4579 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4580 SchedWriteFMoveLSNT>, PS;
4582 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4583 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4584 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4585 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4586 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4587 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4588 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4590 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4591 (VMOVNTDQAZrm addr:$src)>;
4592 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4593 (VMOVNTDQAZrm addr:$src)>;
4594 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4595 (VMOVNTDQAZrm addr:$src)>;
4598 let Predicates = [HasVLX], AddedComplexity = 400 in {
4599 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4600 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4601 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4602 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4603 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4604 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4606 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4607 (VMOVNTDQAZ256rm addr:$src)>;
4608 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4609 (VMOVNTDQAZ256rm addr:$src)>;
4610 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4611 (VMOVNTDQAZ256rm addr:$src)>;
4613 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4614 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4615 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4616 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4617 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4618 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4620 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4621 (VMOVNTDQAZ128rm addr:$src)>;
4622 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4623 (VMOVNTDQAZ128rm addr:$src)>;
4624 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4625 (VMOVNTDQAZ128rm addr:$src)>;
4628 //===----------------------------------------------------------------------===//
4629 // AVX-512 - Integer arithmetic
4631 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4632 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4633 bit IsCommutable = 0> {
4634 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4635 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4636 "$src2, $src1", "$src1, $src2",
4637 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4638 IsCommutable>, AVX512BIBase, EVEX_4V,
4641 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4642 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4643 "$src2, $src1", "$src1, $src2",
4644 (_.VT (OpNode _.RC:$src1,
4645 (bitconvert (_.LdFrag addr:$src2))))>,
4646 AVX512BIBase, EVEX_4V,
4647 Sched<[sched.Folded, ReadAfterLd]>;
4650 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4651 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4652 bit IsCommutable = 0> :
4653 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4654 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4655 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4656 "${src2}"##_.BroadcastStr##", $src1",
4657 "$src1, ${src2}"##_.BroadcastStr,
4658 (_.VT (OpNode _.RC:$src1,
4660 (_.ScalarLdFrag addr:$src2))))>,
4661 AVX512BIBase, EVEX_4V, EVEX_B,
4662 Sched<[sched.Folded, ReadAfterLd]>;
4665 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4666 AVX512VLVectorVTInfo VTInfo,
4667 X86SchedWriteWidths sched, Predicate prd,
4668 bit IsCommutable = 0> {
4669 let Predicates = [prd] in
4670 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4671 IsCommutable>, EVEX_V512;
4673 let Predicates = [prd, HasVLX] in {
4674 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4675 sched.YMM, IsCommutable>, EVEX_V256;
4676 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4677 sched.XMM, IsCommutable>, EVEX_V128;
4681 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4682 AVX512VLVectorVTInfo VTInfo,
4683 X86SchedWriteWidths sched, Predicate prd,
4684 bit IsCommutable = 0> {
4685 let Predicates = [prd] in
4686 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4687 IsCommutable>, EVEX_V512;
4689 let Predicates = [prd, HasVLX] in {
4690 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4691 sched.YMM, IsCommutable>, EVEX_V256;
4692 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4693 sched.XMM, IsCommutable>, EVEX_V128;
4697 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4698 X86SchedWriteWidths sched, Predicate prd,
4699 bit IsCommutable = 0> {
4700 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4701 sched, prd, IsCommutable>,
4702 VEX_W, EVEX_CD8<64, CD8VF>;
4705 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4706 X86SchedWriteWidths sched, Predicate prd,
4707 bit IsCommutable = 0> {
4708 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4709 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4712 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4713 X86SchedWriteWidths sched, Predicate prd,
4714 bit IsCommutable = 0> {
4715 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4716 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4720 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4721 X86SchedWriteWidths sched, Predicate prd,
4722 bit IsCommutable = 0> {
4723 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4724 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4728 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4729 SDNode OpNode, X86SchedWriteWidths sched,
4730 Predicate prd, bit IsCommutable = 0> {
4731 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4734 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4738 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4739 SDNode OpNode, X86SchedWriteWidths sched,
4740 Predicate prd, bit IsCommutable = 0> {
4741 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4744 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4748 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4749 bits<8> opc_d, bits<8> opc_q,
4750 string OpcodeStr, SDNode OpNode,
4751 X86SchedWriteWidths sched,
4752 bit IsCommutable = 0> {
4753 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4754 sched, HasAVX512, IsCommutable>,
4755 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4756 sched, HasBWI, IsCommutable>;
4759 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4760 X86FoldableSchedWrite sched,
4761 SDNode OpNode,X86VectorVTInfo _Src,
4762 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4763 bit IsCommutable = 0> {
4764 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4765 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4766 "$src2, $src1","$src1, $src2",
4768 (_Src.VT _Src.RC:$src1),
4769 (_Src.VT _Src.RC:$src2))),
4771 AVX512BIBase, EVEX_4V, Sched<[sched]>;
4772 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4773 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4774 "$src2, $src1", "$src1, $src2",
4775 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4776 (bitconvert (_Src.LdFrag addr:$src2))))>,
4777 AVX512BIBase, EVEX_4V,
4778 Sched<[sched.Folded, ReadAfterLd]>;
4780 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4781 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4783 "${src2}"##_Brdct.BroadcastStr##", $src1",
4784 "$src1, ${src2}"##_Brdct.BroadcastStr,
4785 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4786 (_Brdct.VT (X86VBroadcast
4787 (_Brdct.ScalarLdFrag addr:$src2))))))>,
4788 AVX512BIBase, EVEX_4V, EVEX_B,
4789 Sched<[sched.Folded, ReadAfterLd]>;
4792 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4793 SchedWriteVecALU, 1>;
4794 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4795 SchedWriteVecALU, 0>;
4796 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4797 SchedWriteVecALU, HasBWI, 1>;
4798 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4799 SchedWriteVecALU, HasBWI, 0>;
4800 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4801 SchedWriteVecALU, HasBWI, 1>;
4802 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4803 SchedWriteVecALU, HasBWI, 0>;
4804 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4805 SchedWritePMULLD, HasAVX512, 1>, T8PD;
4806 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4807 SchedWriteVecIMul, HasBWI, 1>;
4808 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4809 SchedWriteVecIMul, HasDQI, 1>, T8PD,
4810 NotEVEX2VEXConvertible;
4811 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4813 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4815 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4816 SchedWriteVecIMul, HasBWI, 1>, T8PD;
4817 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4818 SchedWriteVecALU, HasBWI, 1>;
4819 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4820 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4821 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4822 SchedWriteVecIMul, HasAVX512, 1>;
4824 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4825 X86SchedWriteWidths sched,
4826 AVX512VLVectorVTInfo _SrcVTInfo,
4827 AVX512VLVectorVTInfo _DstVTInfo,
4828 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4829 let Predicates = [prd] in
4830 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4831 _SrcVTInfo.info512, _DstVTInfo.info512,
4832 v8i64_info, IsCommutable>,
4833 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4834 let Predicates = [HasVLX, prd] in {
4835 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4836 _SrcVTInfo.info256, _DstVTInfo.info256,
4837 v4i64x_info, IsCommutable>,
4838 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4839 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4840 _SrcVTInfo.info128, _DstVTInfo.info128,
4841 v2i64x_info, IsCommutable>,
4842 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4846 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4847 avx512vl_i8_info, avx512vl_i8_info,
4848 X86multishift, HasVBMI, 0>, T8PD;
4850 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4851 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4852 X86FoldableSchedWrite sched> {
4853 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4854 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4856 "${src2}"##_Src.BroadcastStr##", $src1",
4857 "$src1, ${src2}"##_Src.BroadcastStr,
4858 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4859 (_Src.VT (X86VBroadcast
4860 (_Src.ScalarLdFrag addr:$src2))))))>,
4861 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4862 Sched<[sched.Folded, ReadAfterLd]>;
4865 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4866 SDNode OpNode,X86VectorVTInfo _Src,
4867 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4868 bit IsCommutable = 0> {
4869 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4870 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4871 "$src2, $src1","$src1, $src2",
4873 (_Src.VT _Src.RC:$src1),
4874 (_Src.VT _Src.RC:$src2))),
4876 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4877 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4878 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4879 "$src2, $src1", "$src1, $src2",
4880 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4881 (bitconvert (_Src.LdFrag addr:$src2))))>,
4882 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4883 Sched<[sched.Folded, ReadAfterLd]>;
4886 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4888 let Predicates = [HasBWI] in
4889 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4890 v32i16_info, SchedWriteShuffle.ZMM>,
4891 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4892 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4893 let Predicates = [HasBWI, HasVLX] in {
4894 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4895 v16i16x_info, SchedWriteShuffle.YMM>,
4896 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4897 v16i16x_info, SchedWriteShuffle.YMM>,
4899 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4900 v8i16x_info, SchedWriteShuffle.XMM>,
4901 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4902 v8i16x_info, SchedWriteShuffle.XMM>,
4906 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4908 let Predicates = [HasBWI] in
4909 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4910 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4911 let Predicates = [HasBWI, HasVLX] in {
4912 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4913 v32i8x_info, SchedWriteShuffle.YMM>,
4915 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4916 v16i8x_info, SchedWriteShuffle.XMM>,
4921 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4922 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4923 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4924 let Predicates = [HasBWI] in
4925 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4926 _Dst.info512, SchedWriteVecIMul.ZMM,
4927 IsCommutable>, EVEX_V512;
4928 let Predicates = [HasBWI, HasVLX] in {
4929 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4930 _Dst.info256, SchedWriteVecIMul.YMM,
4931 IsCommutable>, EVEX_V256;
4932 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4933 _Dst.info128, SchedWriteVecIMul.XMM,
4934 IsCommutable>, EVEX_V128;
4938 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4939 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4940 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4941 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4943 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4944 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4945 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4946 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4948 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4949 SchedWriteVecALU, HasBWI, 1>, T8PD;
4950 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4951 SchedWriteVecALU, HasBWI, 1>;
4952 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4953 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4954 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4955 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4956 NotEVEX2VEXConvertible;
4958 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4959 SchedWriteVecALU, HasBWI, 1>;
4960 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4961 SchedWriteVecALU, HasBWI, 1>, T8PD;
4962 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4963 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4964 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4965 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4966 NotEVEX2VEXConvertible;
4968 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4969 SchedWriteVecALU, HasBWI, 1>, T8PD;
4970 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4971 SchedWriteVecALU, HasBWI, 1>;
4972 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4973 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4974 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4975 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4976 NotEVEX2VEXConvertible;
4978 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4979 SchedWriteVecALU, HasBWI, 1>;
4980 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4981 SchedWriteVecALU, HasBWI, 1>, T8PD;
4982 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4983 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4984 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4985 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4986 NotEVEX2VEXConvertible;
4988 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4989 let Predicates = [HasDQI, NoVLX] in {
4990 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4993 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4994 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4997 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5000 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5001 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5005 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5006 let Predicates = [HasDQI, NoVLX] in {
5007 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5010 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5011 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5014 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5017 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5018 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5022 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
5023 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5026 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5027 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5030 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5033 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5034 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5038 let Predicates = [HasAVX512, NoVLX] in {
5039 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
5040 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
5041 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
5042 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
5045 //===----------------------------------------------------------------------===//
5046 // AVX-512 Logical Instructions
5047 //===----------------------------------------------------------------------===//
5049 // OpNodeMsk is the OpNode to use when element size is important. OpNode will
5050 // be set to null_frag for 32-bit elements.
5051 multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
5052 SDPatternOperator OpNode,
5053 SDNode OpNodeMsk, X86FoldableSchedWrite sched,
5054 X86VectorVTInfo _, bit IsCommutable = 0> {
5055 let hasSideEffects = 0 in
5056 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
5057 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5058 "$src2, $src1", "$src1, $src2",
5059 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5060 (bitconvert (_.VT _.RC:$src2)))),
5061 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5063 IsCommutable>, AVX512BIBase, EVEX_4V,
5066 let hasSideEffects = 0, mayLoad = 1 in
5067 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5068 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5069 "$src2, $src1", "$src1, $src2",
5070 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5071 (bitconvert (_.LdFrag addr:$src2)))),
5072 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5073 (bitconvert (_.LdFrag addr:$src2))))))>,
5074 AVX512BIBase, EVEX_4V,
5075 Sched<[sched.Folded, ReadAfterLd]>;
5078 // OpNodeMsk is the OpNode to use where element size is important. So use
5079 // for all of the broadcast patterns.
5080 multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
5081 SDPatternOperator OpNode,
5082 SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _,
5083 bit IsCommutable = 0> :
5084 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _,
5086 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5087 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5088 "${src2}"##_.BroadcastStr##", $src1",
5089 "$src1, ${src2}"##_.BroadcastStr,
5090 (_.i64VT (OpNodeMsk _.RC:$src1,
5092 (_.VT (X86VBroadcast
5093 (_.ScalarLdFrag addr:$src2)))))),
5094 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5096 (_.VT (X86VBroadcast
5097 (_.ScalarLdFrag addr:$src2))))))))>,
5098 AVX512BIBase, EVEX_4V, EVEX_B,
5099 Sched<[sched.Folded, ReadAfterLd]>;
5102 multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
5103 SDPatternOperator OpNode,
5104 SDNode OpNodeMsk, X86SchedWriteWidths sched,
5105 AVX512VLVectorVTInfo VTInfo,
5106 bit IsCommutable = 0> {
5107 let Predicates = [HasAVX512] in
5108 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
5109 VTInfo.info512, IsCommutable>, EVEX_V512;
5111 let Predicates = [HasAVX512, HasVLX] in {
5112 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
5113 VTInfo.info256, IsCommutable>, EVEX_V256;
5114 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
5115 VTInfo.info128, IsCommutable>, EVEX_V128;
5119 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
5120 SDNode OpNode, X86SchedWriteWidths sched,
5121 bit IsCommutable = 0> {
5122 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
5123 avx512vl_i64_info, IsCommutable>,
5124 VEX_W, EVEX_CD8<64, CD8VF>;
5125 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched,
5126 avx512vl_i32_info, IsCommutable>,
5127 EVEX_CD8<32, CD8VF>;
5130 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5131 SchedWriteVecLogic, 1>;
5132 defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5133 SchedWriteVecLogic, 1>;
5134 defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5135 SchedWriteVecLogic, 1>;
5136 defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5137 SchedWriteVecLogic>;
5139 //===----------------------------------------------------------------------===//
5140 // AVX-512 FP arithmetic
5141 //===----------------------------------------------------------------------===//
5143 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5144 SDNode OpNode, SDNode VecNode,
5145 X86FoldableSchedWrite sched, bit IsCommutable> {
5146 let ExeDomain = _.ExeDomain in {
5147 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5148 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5149 "$src2, $src1", "$src1, $src2",
5150 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
5151 (i32 FROUND_CURRENT)))>,
5154 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5155 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5156 "$src2, $src1", "$src1, $src2",
5157 (_.VT (VecNode _.RC:$src1,
5158 _.ScalarIntMemCPat:$src2,
5159 (i32 FROUND_CURRENT)))>,
5160 Sched<[sched.Folded, ReadAfterLd]>;
5161 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5162 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5163 (ins _.FRC:$src1, _.FRC:$src2),
5164 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5165 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5167 let isCommutable = IsCommutable;
5169 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5170 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5171 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5172 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5173 (_.ScalarLdFrag addr:$src2)))]>,
5174 Sched<[sched.Folded, ReadAfterLd]>;
5179 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5180 SDNode VecNode, X86FoldableSchedWrite sched,
5181 bit IsCommutable = 0> {
5182 let ExeDomain = _.ExeDomain in
5183 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5184 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5185 "$rc, $src2, $src1", "$src1, $src2, $rc",
5186 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5187 (i32 imm:$rc)), IsCommutable>,
5188 EVEX_B, EVEX_RC, Sched<[sched]>;
5190 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5191 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5192 X86FoldableSchedWrite sched, bit IsCommutable> {
5193 let ExeDomain = _.ExeDomain in {
5194 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5195 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5196 "$src2, $src1", "$src1, $src2",
5197 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5200 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5201 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5202 "$src2, $src1", "$src1, $src2",
5203 (_.VT (VecNode _.RC:$src1,
5204 _.ScalarIntMemCPat:$src2))>,
5205 Sched<[sched.Folded, ReadAfterLd]>;
5207 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5208 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5209 (ins _.FRC:$src1, _.FRC:$src2),
5210 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5211 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5213 let isCommutable = IsCommutable;
5215 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5216 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5217 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5218 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5219 (_.ScalarLdFrag addr:$src2)))]>,
5220 Sched<[sched.Folded, ReadAfterLd]>;
5223 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5224 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5225 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5226 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5227 (i32 FROUND_NO_EXC))>, EVEX_B,
5232 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5233 SDNode VecNode, X86SchedWriteSizes sched,
5235 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5236 sched.PS.Scl, IsCommutable>,
5237 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
5238 sched.PS.Scl, IsCommutable>,
5239 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5240 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5241 sched.PD.Scl, IsCommutable>,
5242 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
5243 sched.PD.Scl, IsCommutable>,
5244 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5247 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5248 SDNode VecNode, SDNode SaeNode,
5249 X86SchedWriteSizes sched, bit IsCommutable> {
5250 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5251 VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5252 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5253 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5254 VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5255 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5257 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
5258 SchedWriteFAddSizes, 1>;
5259 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
5260 SchedWriteFMulSizes, 1>;
5261 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
5262 SchedWriteFAddSizes, 0>;
5263 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
5264 SchedWriteFDivSizes, 0>;
5265 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
5266 SchedWriteFCmpSizes, 0>;
5267 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
5268 SchedWriteFCmpSizes, 0>;
5270 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5271 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5272 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5273 X86VectorVTInfo _, SDNode OpNode,
5274 X86FoldableSchedWrite sched> {
5275 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5276 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5277 (ins _.FRC:$src1, _.FRC:$src2),
5278 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5279 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5281 let isCommutable = 1;
5283 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5284 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5285 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5286 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5287 (_.ScalarLdFrag addr:$src2)))]>,
5288 Sched<[sched.Folded, ReadAfterLd]>;
5291 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5292 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5293 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5295 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5296 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5297 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5299 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5300 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5301 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5303 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5304 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5305 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5307 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5308 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5310 bit IsKZCommutable = IsCommutable> {
5311 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5312 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5313 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5314 "$src2, $src1", "$src1, $src2",
5315 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 0,
5317 EVEX_4V, Sched<[sched]>;
5318 let mayLoad = 1 in {
5319 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5320 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5321 "$src2, $src1", "$src1, $src2",
5322 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5323 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5324 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5325 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5326 "${src2}"##_.BroadcastStr##", $src1",
5327 "$src1, ${src2}"##_.BroadcastStr,
5328 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5329 (_.ScalarLdFrag addr:$src2))))>,
5331 Sched<[sched.Folded, ReadAfterLd]>;
5336 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5337 SDPatternOperator OpNodeRnd,
5338 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5339 let ExeDomain = _.ExeDomain in
5340 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5341 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5342 "$rc, $src2, $src1", "$src1, $src2, $rc",
5343 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
5344 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5347 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5348 SDPatternOperator OpNodeRnd,
5349 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5350 let ExeDomain = _.ExeDomain in
5351 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5352 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5353 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5354 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
5355 EVEX_4V, EVEX_B, Sched<[sched]>;
5358 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5359 Predicate prd, X86SchedWriteSizes sched,
5360 bit IsCommutable = 0,
5361 bit IsPD128Commutable = IsCommutable> {
5362 let Predicates = [prd] in {
5363 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5364 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5365 EVEX_CD8<32, CD8VF>;
5366 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5367 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5368 EVEX_CD8<64, CD8VF>;
5371 // Define only if AVX512VL feature is present.
5372 let Predicates = [prd, HasVLX] in {
5373 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5374 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5375 EVEX_CD8<32, CD8VF>;
5376 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5377 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5378 EVEX_CD8<32, CD8VF>;
5379 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5380 sched.PD.XMM, IsPD128Commutable,
5381 IsCommutable>, EVEX_V128, PD, VEX_W,
5382 EVEX_CD8<64, CD8VF>;
5383 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5384 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5385 EVEX_CD8<64, CD8VF>;
5389 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5390 X86SchedWriteSizes sched> {
5391 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5393 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5394 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5396 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5399 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5400 X86SchedWriteSizes sched> {
5401 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5403 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5404 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5406 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5409 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5410 SchedWriteFAddSizes, 1>,
5411 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5412 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5413 SchedWriteFMulSizes, 1>,
5414 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5415 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5416 SchedWriteFAddSizes>,
5417 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5418 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5419 SchedWriteFDivSizes>,
5420 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5421 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5422 SchedWriteFCmpSizes, 0>,
5423 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
5424 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5425 SchedWriteFCmpSizes, 0>,
5426 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
5427 let isCodeGenOnly = 1 in {
5428 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5429 SchedWriteFCmpSizes, 1>;
5430 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5431 SchedWriteFCmpSizes, 1>;
5433 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5434 SchedWriteFLogicSizes, 1>;
5435 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5436 SchedWriteFLogicSizes, 0>;
5437 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5438 SchedWriteFLogicSizes, 1>;
5439 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5440 SchedWriteFLogicSizes, 1>;
5442 // Patterns catch floating point selects with bitcasted integer logic ops.
5443 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5444 X86VectorVTInfo _, Predicate prd> {
5445 let Predicates = [prd] in {
5446 // Masked register-register logical operations.
5447 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5448 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5450 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5451 _.RC:$src1, _.RC:$src2)>;
5452 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5453 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5455 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5457 // Masked register-memory logical operations.
5458 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5459 (bitconvert (_.i64VT (OpNode _.RC:$src1,
5460 (load addr:$src2)))),
5462 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5463 _.RC:$src1, addr:$src2)>;
5464 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5465 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5467 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5469 // Register-broadcast logical operations.
5470 def : Pat<(_.i64VT (OpNode _.RC:$src1,
5471 (bitconvert (_.VT (X86VBroadcast
5472 (_.ScalarLdFrag addr:$src2)))))),
5473 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5474 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5476 (_.i64VT (OpNode _.RC:$src1,
5479 (_.ScalarLdFrag addr:$src2))))))),
5481 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5482 _.RC:$src1, addr:$src2)>;
5483 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5485 (_.i64VT (OpNode _.RC:$src1,
5488 (_.ScalarLdFrag addr:$src2))))))),
5490 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5491 _.RC:$src1, addr:$src2)>;
5495 multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5496 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5497 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5498 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5499 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5500 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5501 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
5504 defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5505 defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5506 defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5507 defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5509 let Predicates = [HasVLX,HasDQI] in {
5510 // Use packed logical operations for scalar ops.
5511 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5513 (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5514 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5516 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5518 (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5519 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5521 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5523 (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5524 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5526 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5528 (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5529 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5532 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5534 (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5535 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5537 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5539 (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5540 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5542 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5544 (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5545 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5547 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5549 (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5550 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5554 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5555 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5556 let ExeDomain = _.ExeDomain in {
5557 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5558 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5559 "$src2, $src1", "$src1, $src2",
5560 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5561 EVEX_4V, Sched<[sched]>;
5562 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5563 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5564 "$src2, $src1", "$src1, $src2",
5565 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
5566 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5567 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5568 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5569 "${src2}"##_.BroadcastStr##", $src1",
5570 "$src1, ${src2}"##_.BroadcastStr,
5571 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5572 (_.ScalarLdFrag addr:$src2))),
5573 (i32 FROUND_CURRENT))>,
5574 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
5578 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5579 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5580 let ExeDomain = _.ExeDomain in {
5581 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5582 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5583 "$src2, $src1", "$src1, $src2",
5584 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5586 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5587 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5588 "$src2, $src1", "$src1, $src2",
5589 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
5590 (i32 FROUND_CURRENT))>,
5591 Sched<[sched.Folded, ReadAfterLd]>;
5595 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5596 SDNode OpNode, SDNode OpNodeScal,
5597 X86SchedWriteWidths sched> {
5598 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5599 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5600 EVEX_V512, EVEX_CD8<32, CD8VF>;
5601 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5602 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5603 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5604 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
5605 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
5606 EVEX_4V,EVEX_CD8<32, CD8VT1>;
5607 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
5608 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
5609 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5611 // Define only if AVX512VL feature is present.
5612 let Predicates = [HasVLX] in {
5613 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
5614 EVEX_V128, EVEX_CD8<32, CD8VF>;
5615 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
5616 EVEX_V256, EVEX_CD8<32, CD8VF>;
5617 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
5618 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5619 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
5620 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5623 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
5624 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5626 //===----------------------------------------------------------------------===//
5627 // AVX-512 VPTESTM instructions
5628 //===----------------------------------------------------------------------===//
5630 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5631 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5633 let ExeDomain = _.ExeDomain in {
5634 let isCommutable = 1 in
5635 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5636 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5637 "$src2, $src1", "$src1, $src2",
5638 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5640 EVEX_4V, Sched<[sched]>;
5641 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5642 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5643 "$src2, $src1", "$src1, $src2",
5645 (_.i64VT (and _.RC:$src1,
5646 (bitconvert (_.LdFrag addr:$src2))))),
5648 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5649 Sched<[sched.Folded, ReadAfterLd]>;
5652 // Patterns for compare with 0 that just use the same source twice.
5653 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5654 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr")
5655 _.RC:$src, _.RC:$src))>;
5657 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5658 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk")
5659 _.KRC:$mask, _.RC:$src, _.RC:$src))>;
5662 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5663 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5664 let ExeDomain = _.ExeDomain in
5665 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5666 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5667 "${src2}"##_.BroadcastStr##", $src1",
5668 "$src1, ${src2}"##_.BroadcastStr,
5669 (OpNode (and _.RC:$src1,
5671 (_.ScalarLdFrag addr:$src2))),
5673 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5674 Sched<[sched.Folded, ReadAfterLd]>;
5677 // Use 512bit version to implement 128/256 bit in case NoVLX.
5678 multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
5679 X86VectorVTInfo _, string Name> {
5680 def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5682 (_.KVT (COPY_TO_REGCLASS
5683 (!cast<Instruction>(Name # "Zrr")
5684 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5685 _.RC:$src1, _.SubRegIdx),
5686 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5687 _.RC:$src2, _.SubRegIdx)),
5690 def : Pat<(_.KVT (and _.KRC:$mask,
5691 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5694 (!cast<Instruction>(Name # "Zrrk")
5695 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5696 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5697 _.RC:$src1, _.SubRegIdx),
5698 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5699 _.RC:$src2, _.SubRegIdx)),
5702 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5703 (_.KVT (COPY_TO_REGCLASS
5704 (!cast<Instruction>(Name # "Zrr")
5705 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5706 _.RC:$src, _.SubRegIdx),
5707 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5708 _.RC:$src, _.SubRegIdx)),
5711 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5713 (!cast<Instruction>(Name # "Zrrk")
5714 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5715 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5716 _.RC:$src, _.SubRegIdx),
5717 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5718 _.RC:$src, _.SubRegIdx)),
5722 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5723 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5724 let Predicates = [HasAVX512] in
5725 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>,
5726 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5728 let Predicates = [HasAVX512, HasVLX] in {
5729 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>,
5730 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5731 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>,
5732 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5734 let Predicates = [HasAVX512, NoVLX] in {
5735 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>;
5736 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>;
5740 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5741 X86SchedWriteWidths sched> {
5742 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
5744 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
5745 avx512vl_i64_info>, VEX_W;
5748 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5749 PatFrag OpNode, X86SchedWriteWidths sched> {
5750 let Predicates = [HasBWI] in {
5751 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
5752 v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5753 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
5754 v64i8_info, NAME#"B">, EVEX_V512;
5756 let Predicates = [HasVLX, HasBWI] in {
5758 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
5759 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5760 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
5761 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5762 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
5763 v32i8x_info, NAME#"B">, EVEX_V256;
5764 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
5765 v16i8x_info, NAME#"B">, EVEX_V128;
5768 let Predicates = [HasAVX512, NoVLX] in {
5769 defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
5770 defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
5771 defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
5772 defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">;
5776 // These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
5777 // as commutable here because we already canonicalized all zeros vectors to the
5778 // RHS during lowering.
5779 def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
5780 (setcc node:$src1, node:$src2, SETEQ)>;
5781 def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
5782 (setcc node:$src1, node:$src2, SETNE)>;
5784 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5785 PatFrag OpNode, X86SchedWriteWidths sched> :
5786 avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
5787 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
5789 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
5790 SchedWriteVecLogic>, T8PD;
5791 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
5792 SchedWriteVecLogic>, T8XS;
5794 //===----------------------------------------------------------------------===//
5795 // AVX-512 Shift instructions
5796 //===----------------------------------------------------------------------===//
5798 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5799 string OpcodeStr, SDNode OpNode,
5800 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5801 let ExeDomain = _.ExeDomain in {
5802 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5803 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5804 "$src2, $src1", "$src1, $src2",
5805 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
5807 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5808 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5809 "$src2, $src1", "$src1, $src2",
5810 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5812 Sched<[sched.Folded]>;
5816 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5817 string OpcodeStr, SDNode OpNode,
5818 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5819 let ExeDomain = _.ExeDomain in
5820 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5821 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5822 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5823 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
5824 EVEX_B, Sched<[sched.Folded]>;
5827 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5828 X86FoldableSchedWrite sched, ValueType SrcVT,
5829 PatFrag bc_frag, X86VectorVTInfo _> {
5830 // src2 is always 128-bit
5831 let ExeDomain = _.ExeDomain in {
5832 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5833 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5834 "$src2, $src1", "$src1, $src2",
5835 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5836 AVX512BIBase, EVEX_4V, Sched<[sched]>;
5837 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5838 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5839 "$src2, $src1", "$src1, $src2",
5840 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>,
5842 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5846 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5847 X86SchedWriteWidths sched, ValueType SrcVT,
5848 PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo,
5850 let Predicates = [prd] in
5851 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5852 bc_frag, VTInfo.info512>, EVEX_V512,
5853 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5854 let Predicates = [prd, HasVLX] in {
5855 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5856 bc_frag, VTInfo.info256>, EVEX_V256,
5857 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5858 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5859 bc_frag, VTInfo.info128>, EVEX_V128,
5860 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5864 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5865 string OpcodeStr, SDNode OpNode,
5866 X86SchedWriteWidths sched,
5867 bit NotEVEX2VEXConvertibleQ = 0> {
5868 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5869 bc_v4i32, avx512vl_i32_info, HasAVX512>;
5870 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5871 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5872 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5873 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5874 bc_v2i64, avx512vl_i16_info, HasBWI>;
5877 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5878 string OpcodeStr, SDNode OpNode,
5879 X86SchedWriteWidths sched,
5880 AVX512VLVectorVTInfo VTInfo> {
5881 let Predicates = [HasAVX512] in
5882 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5883 sched.ZMM, VTInfo.info512>,
5884 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5885 VTInfo.info512>, EVEX_V512;
5886 let Predicates = [HasAVX512, HasVLX] in {
5887 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5888 sched.YMM, VTInfo.info256>,
5889 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5890 VTInfo.info256>, EVEX_V256;
5891 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5892 sched.XMM, VTInfo.info128>,
5893 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5894 VTInfo.info128>, EVEX_V128;
5898 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5899 string OpcodeStr, SDNode OpNode,
5900 X86SchedWriteWidths sched> {
5901 let Predicates = [HasBWI] in
5902 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5903 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5904 let Predicates = [HasVLX, HasBWI] in {
5905 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5906 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5907 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5908 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5912 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5913 Format ImmFormR, Format ImmFormM,
5914 string OpcodeStr, SDNode OpNode,
5915 X86SchedWriteWidths sched,
5916 bit NotEVEX2VEXConvertibleQ = 0> {
5917 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5918 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5919 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5920 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5921 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5924 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5925 SchedWriteVecShiftImm>,
5926 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5927 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5929 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5930 SchedWriteVecShiftImm>,
5931 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5932 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5934 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5935 SchedWriteVecShiftImm, 1>,
5936 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5937 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5939 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5940 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5941 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5942 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5944 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5945 SchedWriteVecShift>;
5946 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5947 SchedWriteVecShift, 1>;
5948 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5949 SchedWriteVecShift>;
5951 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5952 let Predicates = [HasAVX512, NoVLX] in {
5953 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5954 (EXTRACT_SUBREG (v8i64
5956 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5957 VR128X:$src2)), sub_ymm)>;
5959 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5960 (EXTRACT_SUBREG (v8i64
5962 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5963 VR128X:$src2)), sub_xmm)>;
5965 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5966 (EXTRACT_SUBREG (v8i64
5968 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5969 imm:$src2)), sub_ymm)>;
5971 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5972 (EXTRACT_SUBREG (v8i64
5974 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5975 imm:$src2)), sub_xmm)>;
5978 //===-------------------------------------------------------------------===//
5979 // Variable Bit Shifts
5980 //===-------------------------------------------------------------------===//
5982 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5983 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5984 let ExeDomain = _.ExeDomain in {
5985 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5986 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5987 "$src2, $src1", "$src1, $src2",
5988 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5989 AVX5128IBase, EVEX_4V, Sched<[sched]>;
5990 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5991 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5992 "$src2, $src1", "$src1, $src2",
5993 (_.VT (OpNode _.RC:$src1,
5994 (_.VT (bitconvert (_.LdFrag addr:$src2)))))>,
5995 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5996 Sched<[sched.Folded, ReadAfterLd]>;
6000 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6001 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6002 let ExeDomain = _.ExeDomain in
6003 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6004 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6005 "${src2}"##_.BroadcastStr##", $src1",
6006 "$src1, ${src2}"##_.BroadcastStr,
6007 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
6008 (_.ScalarLdFrag addr:$src2)))))>,
6009 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6010 Sched<[sched.Folded, ReadAfterLd]>;
6013 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6014 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6015 let Predicates = [HasAVX512] in
6016 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6017 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6019 let Predicates = [HasAVX512, HasVLX] in {
6020 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6021 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6022 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6023 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6027 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6028 SDNode OpNode, X86SchedWriteWidths sched> {
6029 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6031 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6032 avx512vl_i64_info>, VEX_W;
6035 // Use 512bit version to implement 128/256 bit in case NoVLX.
6036 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6037 SDNode OpNode, list<Predicate> p> {
6038 let Predicates = p in {
6039 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6040 (_.info256.VT _.info256.RC:$src2))),
6042 (!cast<Instruction>(OpcodeStr#"Zrr")
6043 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6044 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6047 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6048 (_.info128.VT _.info128.RC:$src2))),
6050 (!cast<Instruction>(OpcodeStr#"Zrr")
6051 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6052 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6056 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6057 SDNode OpNode, X86SchedWriteWidths sched> {
6058 let Predicates = [HasBWI] in
6059 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6061 let Predicates = [HasVLX, HasBWI] in {
6063 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6065 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6070 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>,
6071 avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>;
6073 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>,
6074 avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>;
6076 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>,
6077 avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>;
6079 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6080 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6082 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
6083 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
6084 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
6085 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
6087 // Special handing for handling VPSRAV intrinsics.
6088 multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
6089 list<Predicate> p> {
6090 let Predicates = p in {
6091 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
6092 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
6094 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
6095 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
6096 _.RC:$src1, addr:$src2)>;
6097 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6098 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
6099 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
6100 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
6101 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6102 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6104 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
6105 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6106 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6107 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
6108 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
6109 _.RC:$src1, _.RC:$src2)>;
6110 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6111 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6113 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
6114 _.RC:$src1, addr:$src2)>;
6118 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
6119 list<Predicate> p> :
6120 avx512_var_shift_int_lowering<InstrStr, _, p> {
6121 let Predicates = p in {
6122 def : Pat<(_.VT (X86vsrav _.RC:$src1,
6123 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
6124 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
6125 _.RC:$src1, addr:$src2)>;
6126 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6127 (X86vsrav _.RC:$src1,
6128 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6130 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
6131 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6132 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6133 (X86vsrav _.RC:$src1,
6134 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6136 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
6137 _.RC:$src1, addr:$src2)>;
6141 defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
6142 defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
6143 defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
6144 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
6145 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
6146 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
6147 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
6148 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
6149 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
6151 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6152 let Predicates = [HasAVX512, NoVLX] in {
6153 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6154 (EXTRACT_SUBREG (v8i64
6156 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6157 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6159 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6160 (EXTRACT_SUBREG (v8i64
6162 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6163 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6166 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6167 (EXTRACT_SUBREG (v16i32
6169 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6170 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6172 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6173 (EXTRACT_SUBREG (v16i32
6175 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6176 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6179 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6180 (EXTRACT_SUBREG (v8i64
6182 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6183 imm:$src2)), sub_xmm)>;
6184 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6185 (EXTRACT_SUBREG (v8i64
6187 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6188 imm:$src2)), sub_ymm)>;
6190 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6191 (EXTRACT_SUBREG (v16i32
6193 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6194 imm:$src2)), sub_xmm)>;
6195 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6196 (EXTRACT_SUBREG (v16i32
6198 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6199 imm:$src2)), sub_ymm)>;
6202 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6203 let Predicates = [HasAVX512, NoVLX] in {
6204 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6205 (EXTRACT_SUBREG (v8i64
6207 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6208 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6210 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6211 (EXTRACT_SUBREG (v8i64
6213 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6214 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6217 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6218 (EXTRACT_SUBREG (v16i32
6220 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6221 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6223 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6224 (EXTRACT_SUBREG (v16i32
6226 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6227 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6230 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6231 (EXTRACT_SUBREG (v8i64
6233 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6234 imm:$src2)), sub_xmm)>;
6235 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6236 (EXTRACT_SUBREG (v8i64
6238 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6239 imm:$src2)), sub_ymm)>;
6241 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6242 (EXTRACT_SUBREG (v16i32
6244 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6245 imm:$src2)), sub_xmm)>;
6246 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6247 (EXTRACT_SUBREG (v16i32
6249 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6250 imm:$src2)), sub_ymm)>;
6253 //===-------------------------------------------------------------------===//
6254 // 1-src variable permutation VPERMW/D/Q
6255 //===-------------------------------------------------------------------===//
6257 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6258 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6259 let Predicates = [HasAVX512] in
6260 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6261 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6263 let Predicates = [HasAVX512, HasVLX] in
6264 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6265 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6268 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6269 string OpcodeStr, SDNode OpNode,
6270 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6271 let Predicates = [HasAVX512] in
6272 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6273 sched, VTInfo.info512>,
6274 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6275 sched, VTInfo.info512>, EVEX_V512;
6276 let Predicates = [HasAVX512, HasVLX] in
6277 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6278 sched, VTInfo.info256>,
6279 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6280 sched, VTInfo.info256>, EVEX_V256;
6283 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6284 Predicate prd, SDNode OpNode,
6285 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6286 let Predicates = [prd] in
6287 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6289 let Predicates = [HasVLX, prd] in {
6290 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6292 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6297 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6298 WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6299 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6300 WriteVarShuffle256, avx512vl_i8_info>;
6302 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6303 WriteVarShuffle256, avx512vl_i32_info>;
6304 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6305 WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6306 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6307 WriteFVarShuffle256, avx512vl_f32_info>;
6308 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6309 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6311 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6312 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6313 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6314 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6315 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6316 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6318 //===----------------------------------------------------------------------===//
6319 // AVX-512 - VPERMIL
6320 //===----------------------------------------------------------------------===//
6322 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6323 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6324 X86VectorVTInfo Ctrl> {
6325 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6326 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6327 "$src2, $src1", "$src1, $src2",
6328 (_.VT (OpNode _.RC:$src1,
6329 (Ctrl.VT Ctrl.RC:$src2)))>,
6330 T8PD, EVEX_4V, Sched<[sched]>;
6331 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6332 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6333 "$src2, $src1", "$src1, $src2",
6336 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
6337 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6338 Sched<[sched.Folded, ReadAfterLd]>;
6339 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6340 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6341 "${src2}"##_.BroadcastStr##", $src1",
6342 "$src1, ${src2}"##_.BroadcastStr,
6345 (Ctrl.VT (X86VBroadcast
6346 (Ctrl.ScalarLdFrag addr:$src2)))))>,
6347 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6348 Sched<[sched.Folded, ReadAfterLd]>;
6351 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6352 X86SchedWriteWidths sched,
6353 AVX512VLVectorVTInfo _,
6354 AVX512VLVectorVTInfo Ctrl> {
6355 let Predicates = [HasAVX512] in {
6356 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6357 _.info512, Ctrl.info512>, EVEX_V512;
6359 let Predicates = [HasAVX512, HasVLX] in {
6360 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6361 _.info128, Ctrl.info128>, EVEX_V128;
6362 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6363 _.info256, Ctrl.info256>, EVEX_V256;
6367 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6368 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6369 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6371 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6372 X86VPermilpi, SchedWriteFShuffle, _>,
6373 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6376 let ExeDomain = SSEPackedSingle in
6377 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6379 let ExeDomain = SSEPackedDouble in
6380 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6381 avx512vl_i64_info>, VEX_W1X;
6383 //===----------------------------------------------------------------------===//
6384 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6385 //===----------------------------------------------------------------------===//
6387 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6388 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6389 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6390 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6391 X86PShufhw, SchedWriteShuffle>,
6392 EVEX, AVX512XSIi8Base;
6393 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6394 X86PShuflw, SchedWriteShuffle>,
6395 EVEX, AVX512XDIi8Base;
6397 //===----------------------------------------------------------------------===//
6398 // AVX-512 - VPSHUFB
6399 //===----------------------------------------------------------------------===//
6401 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6402 X86SchedWriteWidths sched> {
6403 let Predicates = [HasBWI] in
6404 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6407 let Predicates = [HasVLX, HasBWI] in {
6408 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6410 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6415 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6416 SchedWriteVarShuffle>, VEX_WIG;
6418 //===----------------------------------------------------------------------===//
6419 // Move Low to High and High to Low packed FP Instructions
6420 //===----------------------------------------------------------------------===//
6422 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6423 (ins VR128X:$src1, VR128X:$src2),
6424 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6425 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6426 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6427 let isCommutable = 1 in
6428 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6429 (ins VR128X:$src1, VR128X:$src2),
6430 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6431 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6432 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6434 //===----------------------------------------------------------------------===//
6435 // VMOVHPS/PD VMOVLPS Instructions
6436 // All patterns was taken from SSS implementation.
6437 //===----------------------------------------------------------------------===//
6439 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6440 SDPatternOperator OpNode,
6441 X86VectorVTInfo _> {
6442 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6443 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6444 (ins _.RC:$src1, f64mem:$src2),
6445 !strconcat(OpcodeStr,
6446 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6450 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6451 Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V;
6454 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6455 // SSE1. And MOVLPS pattern is even more complex.
6456 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6457 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6458 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6459 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6460 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6461 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6462 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6463 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6465 let Predicates = [HasAVX512] in {
6467 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6468 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6469 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6472 let SchedRW = [WriteFStore] in {
6473 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6474 (ins f64mem:$dst, VR128X:$src),
6475 "vmovhps\t{$src, $dst|$dst, $src}",
6476 [(store (f64 (extractelt
6477 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6478 (bc_v2f64 (v4f32 VR128X:$src))),
6479 (iPTR 0))), addr:$dst)]>,
6480 EVEX, EVEX_CD8<32, CD8VT2>;
6481 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6482 (ins f64mem:$dst, VR128X:$src),
6483 "vmovhpd\t{$src, $dst|$dst, $src}",
6484 [(store (f64 (extractelt
6485 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6486 (iPTR 0))), addr:$dst)]>,
6487 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6488 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6489 (ins f64mem:$dst, VR128X:$src),
6490 "vmovlps\t{$src, $dst|$dst, $src}",
6491 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
6492 (iPTR 0))), addr:$dst)]>,
6493 EVEX, EVEX_CD8<32, CD8VT2>;
6494 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6495 (ins f64mem:$dst, VR128X:$src),
6496 "vmovlpd\t{$src, $dst|$dst, $src}",
6497 [(store (f64 (extractelt (v2f64 VR128X:$src),
6498 (iPTR 0))), addr:$dst)]>,
6499 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6502 let Predicates = [HasAVX512] in {
6504 def : Pat<(store (f64 (extractelt
6505 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6506 (iPTR 0))), addr:$dst),
6507 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6509 //===----------------------------------------------------------------------===//
6510 // FMA - Fused Multiply Operations
6513 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6514 X86FoldableSchedWrite sched,
6515 X86VectorVTInfo _, string Suff> {
6516 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6517 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6518 (ins _.RC:$src2, _.RC:$src3),
6519 OpcodeStr, "$src3, $src2", "$src2, $src3",
6520 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6521 AVX512FMA3Base, Sched<[sched]>;
6523 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6524 (ins _.RC:$src2, _.MemOp:$src3),
6525 OpcodeStr, "$src3, $src2", "$src2, $src3",
6526 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6527 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6529 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6530 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6531 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6532 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6534 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6535 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
6539 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6540 X86FoldableSchedWrite sched,
6541 X86VectorVTInfo _, string Suff> {
6542 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6543 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6544 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6545 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6546 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
6547 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6550 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6551 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6552 AVX512VLVectorVTInfo _, string Suff> {
6553 let Predicates = [HasAVX512] in {
6554 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6556 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6558 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6560 let Predicates = [HasVLX, HasAVX512] in {
6561 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6563 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6564 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6566 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6570 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6572 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6573 SchedWriteFMA, avx512vl_f32_info, "PS">;
6574 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6575 SchedWriteFMA, avx512vl_f64_info, "PD">,
6579 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6580 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6581 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6582 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6583 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6584 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6587 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6588 X86FoldableSchedWrite sched,
6589 X86VectorVTInfo _, string Suff> {
6590 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6591 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6592 (ins _.RC:$src2, _.RC:$src3),
6593 OpcodeStr, "$src3, $src2", "$src2, $src3",
6594 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6595 vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6597 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6598 (ins _.RC:$src2, _.MemOp:$src3),
6599 OpcodeStr, "$src3, $src2", "$src2, $src3",
6600 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6601 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6603 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6604 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6605 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6606 "$src2, ${src3}"##_.BroadcastStr,
6607 (_.VT (OpNode _.RC:$src2,
6608 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6609 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6610 Sched<[sched.Folded, ReadAfterLd]>;
6614 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6615 X86FoldableSchedWrite sched,
6616 X86VectorVTInfo _, string Suff> {
6617 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6618 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6619 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6620 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6621 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6623 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6626 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6627 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6628 AVX512VLVectorVTInfo _, string Suff> {
6629 let Predicates = [HasAVX512] in {
6630 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6632 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6634 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6636 let Predicates = [HasVLX, HasAVX512] in {
6637 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6639 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6640 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6642 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6646 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6647 SDNode OpNodeRnd > {
6648 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6649 SchedWriteFMA, avx512vl_f32_info, "PS">;
6650 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6651 SchedWriteFMA, avx512vl_f64_info, "PD">,
6655 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6656 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6657 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6658 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6659 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6660 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6662 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6663 X86FoldableSchedWrite sched,
6664 X86VectorVTInfo _, string Suff> {
6665 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6666 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6667 (ins _.RC:$src2, _.RC:$src3),
6668 OpcodeStr, "$src3, $src2", "$src2, $src3",
6669 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6670 AVX512FMA3Base, Sched<[sched]>;
6672 // Pattern is 312 order so that the load is in a different place from the
6673 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6674 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6675 (ins _.RC:$src2, _.MemOp:$src3),
6676 OpcodeStr, "$src3, $src2", "$src2, $src3",
6677 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6678 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6680 // Pattern is 312 order so that the load is in a different place from the
6681 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6682 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6683 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6684 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6685 "$src2, ${src3}"##_.BroadcastStr,
6686 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6687 _.RC:$src1, _.RC:$src2)), 1, 0>,
6688 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
6692 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6693 X86FoldableSchedWrite sched,
6694 X86VectorVTInfo _, string Suff> {
6695 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6696 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6697 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6698 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6699 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6701 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6704 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6705 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6706 AVX512VLVectorVTInfo _, string Suff> {
6707 let Predicates = [HasAVX512] in {
6708 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6710 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6712 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6714 let Predicates = [HasVLX, HasAVX512] in {
6715 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6717 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6718 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6720 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6724 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6725 SDNode OpNodeRnd > {
6726 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6727 SchedWriteFMA, avx512vl_f32_info, "PS">;
6728 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6729 SchedWriteFMA, avx512vl_f64_info, "PD">,
6733 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6734 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6735 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6736 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6737 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6738 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6741 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6742 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6743 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6744 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6745 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6746 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6747 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6750 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6751 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6752 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6753 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
6755 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6756 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6757 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6758 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6760 let isCodeGenOnly = 1, isCommutable = 1 in {
6761 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6762 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6763 !strconcat(OpcodeStr,
6764 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6765 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6766 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6767 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6768 !strconcat(OpcodeStr,
6769 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6770 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
6772 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6773 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6774 !strconcat(OpcodeStr,
6775 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6776 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6777 Sched<[SchedWriteFMA.Scl]>;
6778 }// isCodeGenOnly = 1
6779 }// Constraints = "$src1 = $dst"
6782 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6783 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6784 X86VectorVTInfo _, string SUFF> {
6785 let ExeDomain = _.ExeDomain in {
6786 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6787 // Operands for intrinsic are in 123 order to preserve passthu
6789 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6791 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6792 (_.ScalarLdFrag addr:$src3)))),
6793 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6794 _.FRC:$src3, (i32 imm:$rc)))), 0>;
6796 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6797 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6799 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6800 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6801 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6802 _.FRC:$src1, (i32 imm:$rc)))), 1>;
6804 // One pattern is 312 order so that the load is in a different place from the
6805 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6806 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6807 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6809 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6810 _.FRC:$src1, _.FRC:$src2))),
6811 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6812 _.FRC:$src2, (i32 imm:$rc)))), 1>;
6816 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6817 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6818 let Predicates = [HasAVX512] in {
6819 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6820 OpNodeRnd, f32x_info, "SS">,
6821 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6822 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6823 OpNodeRnd, f64x_info, "SD">,
6824 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6828 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6829 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6830 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6831 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6833 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6834 string Suffix, SDNode Move,
6835 X86VectorVTInfo _, PatLeaf ZeroFP> {
6836 let Predicates = [HasAVX512] in {
6837 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6839 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6841 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6842 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6843 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6845 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6846 (Op _.FRC:$src2, _.FRC:$src3,
6847 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6848 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6849 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6850 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6852 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6854 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6855 (_.ScalarLdFrag addr:$src3)))))),
6856 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6857 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6860 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6861 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6862 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6863 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6864 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6867 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6868 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6869 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6870 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6871 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6874 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6875 (X86selects VK1WM:$mask,
6877 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6879 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6880 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6881 VR128X:$src1, VK1WM:$mask,
6882 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6883 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6885 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6886 (X86selects VK1WM:$mask,
6888 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6889 (_.ScalarLdFrag addr:$src3)),
6890 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6891 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6892 VR128X:$src1, VK1WM:$mask,
6893 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6895 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6896 (X86selects VK1WM:$mask,
6897 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6898 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6899 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6900 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6901 VR128X:$src1, VK1WM:$mask,
6902 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6904 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6905 (X86selects VK1WM:$mask,
6906 (Op _.FRC:$src2, _.FRC:$src3,
6907 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6908 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6909 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6910 VR128X:$src1, VK1WM:$mask,
6911 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6912 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6914 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6915 (X86selects VK1WM:$mask,
6916 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6917 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6918 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6919 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6920 VR128X:$src1, VK1WM:$mask,
6921 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6923 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6924 (X86selects VK1WM:$mask,
6926 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6928 (_.EltVT ZeroFP)))))),
6929 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6930 VR128X:$src1, VK1WM:$mask,
6931 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6932 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6934 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6935 (X86selects VK1WM:$mask,
6936 (Op _.FRC:$src2, _.FRC:$src3,
6937 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6938 (_.EltVT ZeroFP)))))),
6939 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6940 VR128X:$src1, VK1WM:$mask,
6941 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6942 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6944 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6945 (X86selects VK1WM:$mask,
6947 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6948 (_.ScalarLdFrag addr:$src3)),
6949 (_.EltVT ZeroFP)))))),
6950 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6951 VR128X:$src1, VK1WM:$mask,
6952 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6954 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6955 (X86selects VK1WM:$mask,
6956 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6957 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6958 (_.EltVT ZeroFP)))))),
6959 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6960 VR128X:$src1, VK1WM:$mask,
6961 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6963 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6964 (X86selects VK1WM:$mask,
6965 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6966 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6967 (_.EltVT ZeroFP)))))),
6968 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6969 VR128X:$src1, VK1WM:$mask,
6970 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6972 // Patterns with rounding mode.
6973 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6975 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6976 _.FRC:$src3, (i32 imm:$rc)))))),
6977 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6978 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6979 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
6981 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6982 (RndOp _.FRC:$src2, _.FRC:$src3,
6983 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6985 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6986 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6987 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
6989 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6990 (X86selects VK1WM:$mask,
6992 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6993 _.FRC:$src3, (i32 imm:$rc)),
6994 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6995 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6996 VR128X:$src1, VK1WM:$mask,
6997 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6998 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7000 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7001 (X86selects VK1WM:$mask,
7002 (RndOp _.FRC:$src2, _.FRC:$src3,
7003 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7005 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7006 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7007 VR128X:$src1, VK1WM:$mask,
7008 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7009 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7011 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7012 (X86selects VK1WM:$mask,
7014 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7015 _.FRC:$src3, (i32 imm:$rc)),
7016 (_.EltVT ZeroFP)))))),
7017 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7018 VR128X:$src1, VK1WM:$mask,
7019 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7020 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7022 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7023 (X86selects VK1WM:$mask,
7024 (RndOp _.FRC:$src2, _.FRC:$src3,
7025 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7027 (_.EltVT ZeroFP)))))),
7028 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7029 VR128X:$src1, VK1WM:$mask,
7030 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7031 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7035 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
7036 X86Movss, v4f32x_info, fp32imm0>;
7037 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
7038 X86Movss, v4f32x_info, fp32imm0>;
7039 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
7040 X86Movss, v4f32x_info, fp32imm0>;
7041 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
7042 X86Movss, v4f32x_info, fp32imm0>;
7044 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
7045 X86Movsd, v2f64x_info, fp64imm0>;
7046 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
7047 X86Movsd, v2f64x_info, fp64imm0>;
7048 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
7049 X86Movsd, v2f64x_info, fp64imm0>;
7050 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
7051 X86Movsd, v2f64x_info, fp64imm0>;
7053 //===----------------------------------------------------------------------===//
7054 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7055 //===----------------------------------------------------------------------===//
7056 let Constraints = "$src1 = $dst" in {
7057 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7058 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7059 // NOTE: The SDNode have the multiply operands first with the add last.
7060 // This enables commuted load patterns to be autogenerated by tablegen.
7061 let ExeDomain = _.ExeDomain in {
7062 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7063 (ins _.RC:$src2, _.RC:$src3),
7064 OpcodeStr, "$src3, $src2", "$src2, $src3",
7065 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7066 AVX512FMA3Base, Sched<[sched]>;
7068 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7069 (ins _.RC:$src2, _.MemOp:$src3),
7070 OpcodeStr, "$src3, $src2", "$src2, $src3",
7071 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7072 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
7074 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7075 (ins _.RC:$src2, _.ScalarMemOp:$src3),
7076 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
7077 !strconcat("$src2, ${src3}", _.BroadcastStr ),
7079 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
7081 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
7084 } // Constraints = "$src1 = $dst"
7086 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7087 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7088 let Predicates = [HasIFMA] in {
7089 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7090 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7092 let Predicates = [HasVLX, HasIFMA] in {
7093 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7094 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7095 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7096 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7100 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7101 SchedWriteVecIMul, avx512vl_i64_info>,
7103 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7104 SchedWriteVecIMul, avx512vl_i64_info>,
7107 //===----------------------------------------------------------------------===//
7108 // AVX-512 Scalar convert from sign integer to float/double
7109 //===----------------------------------------------------------------------===//
7111 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched,
7112 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7113 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7114 let hasSideEffects = 0 in {
7115 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7116 (ins DstVT.FRC:$src1, SrcRC:$src),
7117 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7118 EVEX_4V, Sched<[sched]>;
7120 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7121 (ins DstVT.FRC:$src1, x86memop:$src),
7122 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7123 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
7124 } // hasSideEffects = 0
7125 let isCodeGenOnly = 1 in {
7126 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7127 (ins DstVT.RC:$src1, SrcRC:$src2),
7128 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7129 [(set DstVT.RC:$dst,
7130 (OpNode (DstVT.VT DstVT.RC:$src1),
7132 (i32 FROUND_CURRENT)))]>,
7133 EVEX_4V, Sched<[sched]>;
7135 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7136 (ins DstVT.RC:$src1, x86memop:$src2),
7137 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7138 [(set DstVT.RC:$dst,
7139 (OpNode (DstVT.VT DstVT.RC:$src1),
7140 (ld_frag addr:$src2),
7141 (i32 FROUND_CURRENT)))]>,
7142 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
7143 }//isCodeGenOnly = 1
7146 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7147 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7148 X86VectorVTInfo DstVT, string asm> {
7149 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7150 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7152 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7153 [(set DstVT.RC:$dst,
7154 (OpNode (DstVT.VT DstVT.RC:$src1),
7157 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
7160 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode,
7161 X86FoldableSchedWrite sched,
7162 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7163 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7164 defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>,
7165 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7166 ld_frag, asm>, VEX_LIG;
7169 let Predicates = [HasAVX512] in {
7170 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32,
7171 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
7172 XS, EVEX_CD8<32, CD8VT1>;
7173 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64,
7174 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
7175 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7176 defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32,
7177 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
7178 XD, EVEX_CD8<32, CD8VT1>;
7179 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64,
7180 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
7181 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7183 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7184 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7185 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7186 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7188 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7189 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7190 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7191 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7192 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7193 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7194 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7195 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7197 def : Pat<(f32 (sint_to_fp GR32:$src)),
7198 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7199 def : Pat<(f32 (sint_to_fp GR64:$src)),
7200 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7201 def : Pat<(f64 (sint_to_fp GR32:$src)),
7202 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7203 def : Pat<(f64 (sint_to_fp GR64:$src)),
7204 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7206 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32,
7207 v4f32x_info, i32mem, loadi32,
7208 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
7209 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64,
7210 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
7211 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7212 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info,
7213 i32mem, loadi32, "cvtusi2sd{l}">,
7214 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7215 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64,
7216 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
7217 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7219 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7220 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7221 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7222 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7224 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7225 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7226 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7227 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7228 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7229 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7230 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7231 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7233 def : Pat<(f32 (uint_to_fp GR32:$src)),
7234 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7235 def : Pat<(f32 (uint_to_fp GR64:$src)),
7236 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7237 def : Pat<(f64 (uint_to_fp GR32:$src)),
7238 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7239 def : Pat<(f64 (uint_to_fp GR64:$src)),
7240 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7243 //===----------------------------------------------------------------------===//
7244 // AVX-512 Scalar convert from float/double to integer
7245 //===----------------------------------------------------------------------===//
7247 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7248 X86VectorVTInfo DstVT, SDNode OpNode,
7250 X86FoldableSchedWrite sched, string asm,
7252 bit CodeGenOnly = 1> {
7253 let Predicates = [HasAVX512] in {
7254 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7255 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7256 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7257 EVEX, VEX_LIG, Sched<[sched]>;
7258 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7259 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7260 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
7261 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7263 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7264 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7265 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7266 [(set DstVT.RC:$dst, (OpNode
7267 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7268 EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7270 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7271 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7272 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7273 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7274 } // Predicates = [HasAVX512]
7277 multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
7278 X86VectorVTInfo DstVT, SDNode OpNode,
7280 X86FoldableSchedWrite sched, string asm,
7282 avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, OpNodeRnd, sched, asm, aliasStr, 0> {
7283 let Predicates = [HasAVX512] in {
7284 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7285 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7286 SrcVT.IntScalarMemOp:$src), 0, "att">;
7287 } // Predicates = [HasAVX512]
7290 // Convert float/double to signed/unsigned int 32/64
7291 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7292 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7293 XS, EVEX_CD8<32, CD8VT1>;
7294 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7295 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7296 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7297 defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, X86cvts2usi,
7298 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7299 XS, EVEX_CD8<32, CD8VT1>;
7300 defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, X86cvts2usi,
7301 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7302 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7303 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7304 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7305 XD, EVEX_CD8<64, CD8VT1>;
7306 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7307 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7308 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7309 defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, X86cvts2usi,
7310 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7311 XD, EVEX_CD8<64, CD8VT1>;
7312 defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, X86cvts2usi,
7313 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7314 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7316 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7317 // which produce unnecessary vmovs{s,d} instructions
7318 let Predicates = [HasAVX512] in {
7319 def : Pat<(v4f32 (X86Movss
7320 (v4f32 VR128X:$dst),
7321 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7322 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7324 def : Pat<(v4f32 (X86Movss
7325 (v4f32 VR128X:$dst),
7326 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7327 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7329 def : Pat<(v4f32 (X86Movss
7330 (v4f32 VR128X:$dst),
7331 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7332 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7334 def : Pat<(v4f32 (X86Movss
7335 (v4f32 VR128X:$dst),
7336 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7337 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7339 def : Pat<(v2f64 (X86Movsd
7340 (v2f64 VR128X:$dst),
7341 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7342 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7344 def : Pat<(v2f64 (X86Movsd
7345 (v2f64 VR128X:$dst),
7346 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7347 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7349 def : Pat<(v2f64 (X86Movsd
7350 (v2f64 VR128X:$dst),
7351 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7352 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7354 def : Pat<(v2f64 (X86Movsd
7355 (v2f64 VR128X:$dst),
7356 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7357 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7359 def : Pat<(v4f32 (X86Movss
7360 (v4f32 VR128X:$dst),
7361 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7362 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7364 def : Pat<(v4f32 (X86Movss
7365 (v4f32 VR128X:$dst),
7366 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7367 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7369 def : Pat<(v4f32 (X86Movss
7370 (v4f32 VR128X:$dst),
7371 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7372 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7374 def : Pat<(v4f32 (X86Movss
7375 (v4f32 VR128X:$dst),
7376 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7377 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7379 def : Pat<(v2f64 (X86Movsd
7380 (v2f64 VR128X:$dst),
7381 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7382 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7384 def : Pat<(v2f64 (X86Movsd
7385 (v2f64 VR128X:$dst),
7386 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7387 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7389 def : Pat<(v2f64 (X86Movsd
7390 (v2f64 VR128X:$dst),
7391 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7392 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7394 def : Pat<(v2f64 (X86Movsd
7395 (v2f64 VR128X:$dst),
7396 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7397 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7398 } // Predicates = [HasAVX512]
7400 // Convert float/double to signed/unsigned int 32/64 with truncation
7401 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7402 X86VectorVTInfo _DstRC, SDNode OpNode,
7403 SDNode OpNodeInt, SDNode OpNodeRnd,
7404 X86FoldableSchedWrite sched, string aliasStr,
7405 bit CodeGenOnly = 1>{
7406 let Predicates = [HasAVX512] in {
7407 let isCodeGenOnly = 1 in {
7408 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7409 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7410 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7411 EVEX, Sched<[sched]>;
7412 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7413 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7414 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7415 EVEX, Sched<[sched.Folded, ReadAfterLd]>;
7418 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7419 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7420 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7421 EVEX, VEX_LIG, Sched<[sched]>;
7422 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7423 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7424 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
7425 (i32 FROUND_NO_EXC)))]>,
7426 EVEX,VEX_LIG , EVEX_B, Sched<[sched]>;
7427 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7428 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7429 (ins _SrcRC.IntScalarMemOp:$src),
7430 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7431 [(set _DstRC.RC:$dst,
7432 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7433 EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7435 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7436 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7437 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7438 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7442 multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
7443 X86VectorVTInfo _SrcRC,
7444 X86VectorVTInfo _DstRC, SDNode OpNode,
7445 SDNode OpNodeInt, SDNode OpNodeRnd,
7446 X86FoldableSchedWrite sched,
7448 avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeInt, OpNodeRnd, sched,
7450 let Predicates = [HasAVX512] in {
7451 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7452 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7453 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7457 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7458 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
7459 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7460 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7461 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
7462 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7463 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7464 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
7465 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7466 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7467 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
7468 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7470 defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
7471 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
7472 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7473 defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
7474 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
7475 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7476 defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7477 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
7478 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7479 defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7480 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
7481 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7483 //===----------------------------------------------------------------------===//
7484 // AVX-512 Convert form float to double and back
7485 //===----------------------------------------------------------------------===//
7487 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7488 X86VectorVTInfo _Src, SDNode OpNode,
7489 X86FoldableSchedWrite sched> {
7490 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7491 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7492 "$src2, $src1", "$src1, $src2",
7493 (_.VT (OpNode (_.VT _.RC:$src1),
7494 (_Src.VT _Src.RC:$src2),
7495 (i32 FROUND_CURRENT)))>,
7496 EVEX_4V, VEX_LIG, Sched<[sched]>;
7497 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7498 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7499 "$src2, $src1", "$src1, $src2",
7500 (_.VT (OpNode (_.VT _.RC:$src1),
7501 (_Src.VT _Src.ScalarIntMemCPat:$src2),
7502 (i32 FROUND_CURRENT)))>,
7504 Sched<[sched.Folded, ReadAfterLd]>;
7506 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7507 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7508 (ins _.FRC:$src1, _Src.FRC:$src2),
7509 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7510 EVEX_4V, VEX_LIG, Sched<[sched]>;
7512 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7513 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7514 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7515 EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7519 // Scalar Coversion with SAE - suppress all exceptions
7520 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7521 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7522 X86FoldableSchedWrite sched> {
7523 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7524 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7525 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7526 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7527 (_Src.VT _Src.RC:$src2),
7528 (i32 FROUND_NO_EXC)))>,
7529 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7532 // Scalar Conversion with rounding control (RC)
7533 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7534 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7535 X86FoldableSchedWrite sched> {
7536 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7537 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7538 "$rc, $src2, $src1", "$src1, $src2, $rc",
7539 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7540 (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
7541 EVEX_4V, VEX_LIG, Sched<[sched]>,
7544 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7545 SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7546 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7547 let Predicates = [HasAVX512] in {
7548 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7549 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7550 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7554 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
7555 X86FoldableSchedWrite sched,
7556 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7557 let Predicates = [HasAVX512] in {
7558 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7559 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7560 EVEX_CD8<32, CD8VT1>, XS;
7563 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
7564 X86froundRnd, WriteCvtSD2SS, f64x_info,
7566 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
7567 X86fpextRnd, WriteCvtSS2SD, f32x_info,
7570 def : Pat<(f64 (fpextend FR32X:$src)),
7571 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7572 Requires<[HasAVX512]>;
7573 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7574 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7575 Requires<[HasAVX512, OptForSize]>;
7577 def : Pat<(f64 (extloadf32 addr:$src)),
7578 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7579 Requires<[HasAVX512, OptForSize]>;
7581 def : Pat<(f64 (extloadf32 addr:$src)),
7582 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
7583 Requires<[HasAVX512, OptForSpeed]>;
7585 def : Pat<(f32 (fpround FR64X:$src)),
7586 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7587 Requires<[HasAVX512]>;
7589 def : Pat<(v4f32 (X86Movss
7590 (v4f32 VR128X:$dst),
7591 (v4f32 (scalar_to_vector
7592 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7593 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7594 Requires<[HasAVX512]>;
7596 def : Pat<(v2f64 (X86Movsd
7597 (v2f64 VR128X:$dst),
7598 (v2f64 (scalar_to_vector
7599 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7600 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7601 Requires<[HasAVX512]>;
7603 //===----------------------------------------------------------------------===//
7604 // AVX-512 Vector convert from signed/unsigned integer to float/double
7605 // and from float/double to signed/unsigned integer
7606 //===----------------------------------------------------------------------===//
7608 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7609 X86VectorVTInfo _Src, SDNode OpNode,
7610 X86FoldableSchedWrite sched,
7611 string Broadcast = _.BroadcastStr,
7612 string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
7614 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7615 (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
7616 (_.VT (OpNode (_Src.VT _Src.RC:$src)))>,
7617 EVEX, Sched<[sched]>;
7619 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7620 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
7621 (_.VT (OpNode (_Src.VT
7622 (bitconvert (_Src.LdFrag addr:$src)))))>,
7623 EVEX, Sched<[sched.Folded]>;
7625 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7626 (ins _Src.ScalarMemOp:$src), OpcodeStr,
7627 "${src}"##Broadcast, "${src}"##Broadcast,
7628 (_.VT (OpNode (_Src.VT
7629 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
7631 Sched<[sched.Folded]>;
7633 // Coversion with SAE - suppress all exceptions
7634 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7635 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7636 X86FoldableSchedWrite sched> {
7637 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7638 (ins _Src.RC:$src), OpcodeStr,
7639 "{sae}, $src", "$src, {sae}",
7640 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
7641 (i32 FROUND_NO_EXC)))>,
7642 EVEX, EVEX_B, Sched<[sched]>;
7645 // Conversion with rounding control (RC)
7646 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7647 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7648 X86FoldableSchedWrite sched> {
7649 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7650 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7651 "$rc, $src", "$src, $rc",
7652 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
7653 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7656 // Extend Float to Double
7657 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7658 X86SchedWriteWidths sched> {
7659 let Predicates = [HasAVX512] in {
7660 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
7661 fpextend, sched.ZMM>,
7662 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7663 X86vfpextRnd, sched.ZMM>, EVEX_V512;
7665 let Predicates = [HasVLX] in {
7666 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7667 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7668 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7669 sched.YMM>, EVEX_V256;
7673 // Truncate Double to Float
7674 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7675 let Predicates = [HasAVX512] in {
7676 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
7677 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7678 X86vfproundRnd, sched.ZMM>, EVEX_V512;
7680 let Predicates = [HasVLX] in {
7681 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7682 X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7683 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
7684 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7686 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7687 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7688 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7689 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7690 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7691 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7692 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7693 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7697 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7698 VEX_W, PD, EVEX_CD8<64, CD8VF>;
7699 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7700 PS, EVEX_CD8<32, CD8VH>;
7702 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7703 (VCVTPS2PDZrm addr:$src)>;
7705 let Predicates = [HasVLX] in {
7706 def : Pat<(X86vzmovl (v2f64 (bitconvert
7707 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
7708 (VCVTPD2PSZ128rr VR128X:$src)>;
7709 def : Pat<(X86vzmovl (v2f64 (bitconvert
7710 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
7711 (VCVTPD2PSZ128rm addr:$src)>;
7712 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
7713 (VCVTPS2PDZ128rm addr:$src)>;
7714 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
7715 (VCVTPS2PDZ256rm addr:$src)>;
7718 // Convert Signed/Unsigned Doubleword to Double
7719 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7720 SDNode OpNode128, X86SchedWriteWidths sched> {
7721 // No rounding in this op
7722 let Predicates = [HasAVX512] in
7723 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7724 sched.ZMM>, EVEX_V512;
7726 let Predicates = [HasVLX] in {
7727 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7728 OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
7729 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7730 sched.YMM>, EVEX_V256;
7734 // Convert Signed/Unsigned Doubleword to Float
7735 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7736 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7737 let Predicates = [HasAVX512] in
7738 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7740 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7741 OpNodeRnd, sched.ZMM>, EVEX_V512;
7743 let Predicates = [HasVLX] in {
7744 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7745 sched.XMM>, EVEX_V128;
7746 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7747 sched.YMM>, EVEX_V256;
7751 // Convert Float to Signed/Unsigned Doubleword with truncation
7752 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7753 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7754 let Predicates = [HasAVX512] in {
7755 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7757 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7758 OpNodeRnd, sched.ZMM>, EVEX_V512;
7760 let Predicates = [HasVLX] in {
7761 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7762 sched.XMM>, EVEX_V128;
7763 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7764 sched.YMM>, EVEX_V256;
7768 // Convert Float to Signed/Unsigned Doubleword
7769 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7770 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7771 let Predicates = [HasAVX512] in {
7772 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7774 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7775 OpNodeRnd, sched.ZMM>, EVEX_V512;
7777 let Predicates = [HasVLX] in {
7778 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7779 sched.XMM>, EVEX_V128;
7780 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7781 sched.YMM>, EVEX_V256;
7785 // Convert Double to Signed/Unsigned Doubleword with truncation
7786 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7787 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7788 let Predicates = [HasAVX512] in {
7789 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7791 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7792 OpNodeRnd, sched.ZMM>, EVEX_V512;
7794 let Predicates = [HasVLX] in {
7795 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7796 // memory forms of these instructions in Asm Parser. They have the same
7797 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7798 // due to the same reason.
7799 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7800 OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7801 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7802 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7804 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7805 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7806 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7807 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
7808 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7809 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7810 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7811 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
7815 // Convert Double to Signed/Unsigned Doubleword
7816 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7817 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7818 let Predicates = [HasAVX512] in {
7819 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7821 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7822 OpNodeRnd, sched.ZMM>, EVEX_V512;
7824 let Predicates = [HasVLX] in {
7825 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7826 // memory forms of these instructions in Asm Parcer. They have the same
7827 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7828 // due to the same reason.
7829 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
7830 sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7831 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7832 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7834 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7835 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7836 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7837 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7838 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7839 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7840 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7841 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7845 // Convert Double to Signed/Unsigned Quardword
7846 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7847 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7848 let Predicates = [HasDQI] in {
7849 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7851 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7852 OpNodeRnd, sched.ZMM>, EVEX_V512;
7854 let Predicates = [HasDQI, HasVLX] in {
7855 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7856 sched.XMM>, EVEX_V128;
7857 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7858 sched.YMM>, EVEX_V256;
7862 // Convert Double to Signed/Unsigned Quardword with truncation
7863 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7864 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7865 let Predicates = [HasDQI] in {
7866 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7868 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7869 OpNodeRnd, sched.ZMM>, EVEX_V512;
7871 let Predicates = [HasDQI, HasVLX] in {
7872 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7873 sched.XMM>, EVEX_V128;
7874 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7875 sched.YMM>, EVEX_V256;
7879 // Convert Signed/Unsigned Quardword to Double
7880 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7881 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7882 let Predicates = [HasDQI] in {
7883 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7885 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7886 OpNodeRnd, sched.ZMM>, EVEX_V512;
7888 let Predicates = [HasDQI, HasVLX] in {
7889 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7890 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7891 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7892 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7896 // Convert Float to Signed/Unsigned Quardword
7897 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7898 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7899 let Predicates = [HasDQI] in {
7900 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7902 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7903 OpNodeRnd, sched.ZMM>, EVEX_V512;
7905 let Predicates = [HasDQI, HasVLX] in {
7906 // Explicitly specified broadcast string, since we take only 2 elements
7907 // from v4f32x_info source
7908 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7909 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7910 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7911 sched.YMM>, EVEX_V256;
7915 // Convert Float to Signed/Unsigned Quardword with truncation
7916 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7917 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7918 let Predicates = [HasDQI] in {
7919 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7920 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7921 OpNodeRnd, sched.ZMM>, EVEX_V512;
7923 let Predicates = [HasDQI, HasVLX] in {
7924 // Explicitly specified broadcast string, since we take only 2 elements
7925 // from v4f32x_info source
7926 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7927 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7928 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7929 sched.YMM>, EVEX_V256;
7933 // Convert Signed/Unsigned Quardword to Float
7934 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7935 SDNode OpNode128, SDNode OpNodeRnd,
7936 X86SchedWriteWidths sched> {
7937 let Predicates = [HasDQI] in {
7938 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7940 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7941 OpNodeRnd, sched.ZMM>, EVEX_V512;
7943 let Predicates = [HasDQI, HasVLX] in {
7944 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7945 // memory forms of these instructions in Asm Parcer. They have the same
7946 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7947 // due to the same reason.
7948 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
7949 sched.XMM, "{1to2}", "{x}">, EVEX_V128,
7950 NotEVEX2VEXConvertible;
7951 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7952 sched.YMM, "{1to4}", "{y}">, EVEX_V256,
7953 NotEVEX2VEXConvertible;
7955 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7956 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7957 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7958 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
7959 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7960 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7961 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7962 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
7966 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7967 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
7969 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
7970 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
7971 PS, EVEX_CD8<32, CD8VF>;
7973 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
7974 X86cvttp2siRnd, SchedWriteCvtPS2DQ>,
7975 XS, EVEX_CD8<32, CD8VF>;
7977 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
7978 X86cvttp2siRnd, SchedWriteCvtPD2DQ>,
7979 PD, VEX_W, EVEX_CD8<64, CD8VF>;
7981 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
7982 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS,
7983 EVEX_CD8<32, CD8VF>;
7985 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
7986 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
7987 PS, VEX_W, EVEX_CD8<64, CD8VF>;
7989 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7990 X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
7991 EVEX_CD8<32, CD8VH>;
7993 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
7994 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
7995 EVEX_CD8<32, CD8VF>;
7997 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
7998 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
7999 EVEX_CD8<32, CD8VF>;
8001 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8002 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8003 VEX_W, EVEX_CD8<64, CD8VF>;
8005 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8006 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8007 PS, EVEX_CD8<32, CD8VF>;
8009 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8010 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8011 PS, EVEX_CD8<64, CD8VF>;
8013 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8014 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8015 PD, EVEX_CD8<64, CD8VF>;
8017 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8018 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8019 EVEX_CD8<32, CD8VH>;
8021 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8022 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8023 PD, EVEX_CD8<64, CD8VF>;
8025 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8026 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8027 EVEX_CD8<32, CD8VH>;
8029 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8030 X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W,
8031 PD, EVEX_CD8<64, CD8VF>;
8033 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8034 X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD,
8035 EVEX_CD8<32, CD8VH>;
8037 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8038 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W,
8039 PD, EVEX_CD8<64, CD8VF>;
8041 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8042 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD,
8043 EVEX_CD8<32, CD8VH>;
8045 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8046 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8047 EVEX_CD8<64, CD8VF>;
8049 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8050 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8051 EVEX_CD8<64, CD8VF>;
8053 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
8054 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8055 EVEX_CD8<64, CD8VF>;
8057 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
8058 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8059 EVEX_CD8<64, CD8VF>;
8061 let Predicates = [HasAVX512] in {
8062 def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))),
8063 (VCVTTPS2DQZrr VR512:$src)>;
8064 def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))),
8065 (VCVTTPS2DQZrm addr:$src)>;
8067 def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))),
8068 (VCVTTPS2UDQZrr VR512:$src)>;
8069 def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))),
8070 (VCVTTPS2UDQZrm addr:$src)>;
8072 def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))),
8073 (VCVTTPD2DQZrr VR512:$src)>;
8074 def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))),
8075 (VCVTTPD2DQZrm addr:$src)>;
8077 def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))),
8078 (VCVTTPD2UDQZrr VR512:$src)>;
8079 def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))),
8080 (VCVTTPD2UDQZrm addr:$src)>;
8083 let Predicates = [HasVLX] in {
8084 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))),
8085 (VCVTTPS2DQZ128rr VR128X:$src)>;
8086 def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
8087 (VCVTTPS2DQZ128rm addr:$src)>;
8089 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))),
8090 (VCVTTPS2UDQZ128rr VR128X:$src)>;
8091 def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))),
8092 (VCVTTPS2UDQZ128rm addr:$src)>;
8094 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))),
8095 (VCVTTPS2DQZ256rr VR256X:$src)>;
8096 def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
8097 (VCVTTPS2DQZ256rm addr:$src)>;
8099 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))),
8100 (VCVTTPS2UDQZ256rr VR256X:$src)>;
8101 def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))),
8102 (VCVTTPS2UDQZ256rm addr:$src)>;
8104 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))),
8105 (VCVTTPD2DQZ256rr VR256X:$src)>;
8106 def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
8107 (VCVTTPD2DQZ256rm addr:$src)>;
8109 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))),
8110 (VCVTTPD2UDQZ256rr VR256X:$src)>;
8111 def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
8112 (VCVTTPD2UDQZ256rm addr:$src)>;
8115 let Predicates = [HasDQI] in {
8116 def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))),
8117 (VCVTTPS2QQZrr VR256X:$src)>;
8118 def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))),
8119 (VCVTTPS2QQZrm addr:$src)>;
8121 def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))),
8122 (VCVTTPS2UQQZrr VR256X:$src)>;
8123 def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))),
8124 (VCVTTPS2UQQZrm addr:$src)>;
8126 def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))),
8127 (VCVTTPD2QQZrr VR512:$src)>;
8128 def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))),
8129 (VCVTTPD2QQZrm addr:$src)>;
8131 def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))),
8132 (VCVTTPD2UQQZrr VR512:$src)>;
8133 def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))),
8134 (VCVTTPD2UQQZrm addr:$src)>;
8137 let Predicates = [HasDQI, HasVLX] in {
8138 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
8139 (VCVTTPS2QQZ256rr VR128X:$src)>;
8140 def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
8141 (VCVTTPS2QQZ256rm addr:$src)>;
8143 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))),
8144 (VCVTTPS2UQQZ256rr VR128X:$src)>;
8145 def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))),
8146 (VCVTTPS2UQQZ256rm addr:$src)>;
8148 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))),
8149 (VCVTTPD2QQZ128rr VR128X:$src)>;
8150 def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))),
8151 (VCVTTPD2QQZ128rm addr:$src)>;
8153 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))),
8154 (VCVTTPD2UQQZ128rr VR128X:$src)>;
8155 def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))),
8156 (VCVTTPD2UQQZ128rm addr:$src)>;
8158 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))),
8159 (VCVTTPD2QQZ256rr VR256X:$src)>;
8160 def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))),
8161 (VCVTTPD2QQZ256rm addr:$src)>;
8163 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))),
8164 (VCVTTPD2UQQZ256rr VR256X:$src)>;
8165 def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))),
8166 (VCVTTPD2UQQZ256rm addr:$src)>;
8169 let Predicates = [HasAVX512, NoVLX] in {
8170 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
8171 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8172 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8173 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8175 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
8176 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8177 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8178 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8180 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
8181 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8182 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8183 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8185 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8186 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8187 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8188 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8190 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8191 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8192 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8193 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8195 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8196 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8197 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8198 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8200 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8201 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8202 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8203 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8206 let Predicates = [HasAVX512, HasVLX] in {
8207 def : Pat<(X86vzmovl (v2i64 (bitconvert
8208 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
8209 (VCVTPD2DQZ128rr VR128X:$src)>;
8210 def : Pat<(X86vzmovl (v2i64 (bitconvert
8211 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
8212 (VCVTPD2DQZ128rm addr:$src)>;
8213 def : Pat<(X86vzmovl (v2i64 (bitconvert
8214 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
8215 (VCVTPD2UDQZ128rr VR128X:$src)>;
8216 def : Pat<(X86vzmovl (v2i64 (bitconvert
8217 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
8218 (VCVTTPD2DQZ128rr VR128X:$src)>;
8219 def : Pat<(X86vzmovl (v2i64 (bitconvert
8220 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
8221 (VCVTTPD2DQZ128rm addr:$src)>;
8222 def : Pat<(X86vzmovl (v2i64 (bitconvert
8223 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
8224 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8226 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8227 (VCVTDQ2PDZ128rm addr:$src)>;
8228 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8229 (VCVTDQ2PDZ128rm addr:$src)>;
8231 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8232 (VCVTUDQ2PDZ128rm addr:$src)>;
8233 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8234 (VCVTUDQ2PDZ128rm addr:$src)>;
8237 let Predicates = [HasAVX512] in {
8238 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
8239 (VCVTPD2PSZrm addr:$src)>;
8240 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
8241 (VCVTPS2PDZrm addr:$src)>;
8244 let Predicates = [HasDQI, HasVLX] in {
8245 def : Pat<(X86vzmovl (v2f64 (bitconvert
8246 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
8247 (VCVTQQ2PSZ128rr VR128X:$src)>;
8248 def : Pat<(X86vzmovl (v2f64 (bitconvert
8249 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
8250 (VCVTUQQ2PSZ128rr VR128X:$src)>;
8253 let Predicates = [HasDQI, NoVLX] in {
8254 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
8255 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8256 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8257 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8259 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
8260 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8261 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8262 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8264 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
8265 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8266 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8267 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8269 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
8270 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8271 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8272 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8274 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
8275 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8276 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8277 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8279 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
8280 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8281 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8282 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8284 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8285 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8286 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8287 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8289 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8290 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8291 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8292 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8294 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8295 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8296 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8297 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8299 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8300 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8301 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8302 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8304 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8305 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8306 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8307 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8309 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8310 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8311 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8312 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8315 //===----------------------------------------------------------------------===//
8316 // Half precision conversion instructions
8317 //===----------------------------------------------------------------------===//
8319 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8320 X86MemOperand x86memop, PatFrag ld_frag,
8321 X86FoldableSchedWrite sched> {
8322 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8323 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8324 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8325 T8PD, Sched<[sched]>;
8326 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8327 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8328 (X86cvtph2ps (_src.VT
8330 (ld_frag addr:$src))))>,
8331 T8PD, Sched<[sched.Folded]>;
8334 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8335 X86FoldableSchedWrite sched> {
8336 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8337 (ins _src.RC:$src), "vcvtph2ps",
8338 "{sae}, $src", "$src, {sae}",
8339 (X86cvtph2psRnd (_src.VT _src.RC:$src),
8340 (i32 FROUND_NO_EXC))>,
8341 T8PD, EVEX_B, Sched<[sched]>;
8344 let Predicates = [HasAVX512] in
8345 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
8347 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8348 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8350 let Predicates = [HasVLX] in {
8351 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8352 loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8353 EVEX_CD8<32, CD8VH>;
8354 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8355 loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
8356 EVEX_CD8<32, CD8VH>;
8358 // Pattern match vcvtph2ps of a scalar i64 load.
8359 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
8360 (VCVTPH2PSZ128rm addr:$src)>;
8361 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
8362 (VCVTPH2PSZ128rm addr:$src)>;
8363 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8364 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8365 (VCVTPH2PSZ128rm addr:$src)>;
8368 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8369 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8370 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
8371 (ins _src.RC:$src1, i32u8imm:$src2),
8372 "vcvtps2ph", "$src2, $src1", "$src1, $src2",
8373 (X86cvtps2ph (_src.VT _src.RC:$src1),
8374 (i32 imm:$src2)), 0, 0>,
8375 AVX512AIi8Base, Sched<[RR]>;
8376 let hasSideEffects = 0, mayStore = 1 in {
8377 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8378 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8379 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8381 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8382 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8383 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8384 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8388 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8390 let hasSideEffects = 0 in
8391 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8392 (outs _dest.RC:$dst),
8393 (ins _src.RC:$src1, i32u8imm:$src2),
8394 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8395 EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8398 let Predicates = [HasAVX512] in {
8399 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8400 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8401 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8402 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8403 let Predicates = [HasVLX] in {
8404 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8405 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8406 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8407 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8408 WriteCvtPS2PH, WriteCvtPS2PHSt>,
8409 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8412 def : Pat<(store (f64 (extractelt
8413 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8414 (iPTR 0))), addr:$dst),
8415 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8416 def : Pat<(store (i64 (extractelt
8417 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8418 (iPTR 0))), addr:$dst),
8419 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8420 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
8421 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
8422 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
8423 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
8426 // Patterns for matching conversions from float to half-float and vice versa.
8427 let Predicates = [HasVLX] in {
8428 // Use MXCSR.RC for rounding instead of explicitly specifying the default
8429 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8430 // configurations we support (the default). However, falling back to MXCSR is
8431 // more consistent with other instructions, which are always controlled by it.
8432 // It's encoded as 0b100.
8433 def : Pat<(fp_to_f16 FR32X:$src),
8434 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8435 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8437 def : Pat<(f16_to_fp GR16:$src),
8438 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8439 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8441 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8442 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8443 (v8i16 (VCVTPS2PHZ128rr
8444 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8447 // Unordered/Ordered scalar fp compare with Sea and set EFLAGS
8448 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8449 string OpcodeStr, X86FoldableSchedWrite sched> {
8450 let hasSideEffects = 0 in
8451 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8452 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8453 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8456 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8457 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8458 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8459 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8460 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8461 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8462 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8463 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8464 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8467 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8468 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8469 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8470 EVEX_CD8<32, CD8VT1>;
8471 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8472 "ucomisd", WriteFCom>, PD, EVEX,
8473 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8474 let Pattern = []<dag> in {
8475 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8476 "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8477 EVEX_CD8<32, CD8VT1>;
8478 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8479 "comisd", WriteFCom>, PD, EVEX,
8480 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8482 let isCodeGenOnly = 1 in {
8483 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8484 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8485 EVEX_CD8<32, CD8VT1>;
8486 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8487 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8488 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8490 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8491 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8492 EVEX_CD8<32, CD8VT1>;
8493 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8494 sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8495 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8499 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8500 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8501 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8502 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8503 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8504 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8505 "$src2, $src1", "$src1, $src2",
8506 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8507 EVEX_4V, Sched<[sched]>;
8508 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8509 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8510 "$src2, $src1", "$src1, $src2",
8511 (OpNode (_.VT _.RC:$src1),
8512 _.ScalarIntMemCPat:$src2)>, EVEX_4V,
8513 Sched<[sched.Folded, ReadAfterLd]>;
8517 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8518 f32x_info>, EVEX_CD8<32, CD8VT1>,
8520 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8521 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8523 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8524 SchedWriteFRsqrt.Scl, f32x_info>,
8525 EVEX_CD8<32, CD8VT1>, T8PD;
8526 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8527 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8528 EVEX_CD8<64, CD8VT1>, T8PD;
8530 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8531 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8532 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8533 let ExeDomain = _.ExeDomain in {
8534 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8535 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8536 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8538 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8539 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8541 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8542 Sched<[sched.Folded, ReadAfterLd]>;
8543 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8544 (ins _.ScalarMemOp:$src), OpcodeStr,
8545 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8547 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8548 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
8552 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8553 X86SchedWriteWidths sched> {
8554 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8555 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8556 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8557 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8559 // Define only if AVX512VL feature is present.
8560 let Predicates = [HasVLX] in {
8561 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8562 OpNode, sched.XMM, v4f32x_info>,
8563 EVEX_V128, EVEX_CD8<32, CD8VF>;
8564 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8565 OpNode, sched.YMM, v8f32x_info>,
8566 EVEX_V256, EVEX_CD8<32, CD8VF>;
8567 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8568 OpNode, sched.XMM, v2f64x_info>,
8569 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8570 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8571 OpNode, sched.YMM, v4f64x_info>,
8572 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8576 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8577 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8579 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8580 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8581 SDNode OpNode, X86FoldableSchedWrite sched> {
8582 let ExeDomain = _.ExeDomain in {
8583 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8584 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8585 "$src2, $src1", "$src1, $src2",
8586 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8587 (i32 FROUND_CURRENT))>,
8590 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8591 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8592 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8593 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8594 (i32 FROUND_NO_EXC))>, EVEX_B,
8597 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8598 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8599 "$src2, $src1", "$src1, $src2",
8600 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
8601 (i32 FROUND_CURRENT))>,
8602 Sched<[sched.Folded, ReadAfterLd]>;
8606 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8607 X86FoldableSchedWrite sched> {
8608 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>,
8609 EVEX_CD8<32, CD8VT1>;
8610 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>,
8611 EVEX_CD8<64, CD8VT1>, VEX_W;
8614 let Predicates = [HasERI] in {
8615 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
8617 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
8618 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8621 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
8622 SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8623 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8625 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8626 SDNode OpNode, X86FoldableSchedWrite sched> {
8627 let ExeDomain = _.ExeDomain in {
8628 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8629 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8630 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>,
8633 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8634 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8636 (bitconvert (_.LdFrag addr:$src))),
8637 (i32 FROUND_CURRENT))>,
8638 Sched<[sched.Folded, ReadAfterLd]>;
8640 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8641 (ins _.ScalarMemOp:$src), OpcodeStr,
8642 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8644 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
8645 (i32 FROUND_CURRENT))>, EVEX_B,
8646 Sched<[sched.Folded, ReadAfterLd]>;
8649 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8650 SDNode OpNode, X86FoldableSchedWrite sched> {
8651 let ExeDomain = _.ExeDomain in
8652 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8653 (ins _.RC:$src), OpcodeStr,
8654 "{sae}, $src", "$src, {sae}",
8655 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>,
8656 EVEX_B, Sched<[sched]>;
8659 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8660 X86SchedWriteWidths sched> {
8661 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8662 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8663 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8664 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8665 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8666 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8669 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8670 SDNode OpNode, X86SchedWriteWidths sched> {
8671 // Define only if AVX512VL feature is present.
8672 let Predicates = [HasVLX] in {
8673 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
8674 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8675 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
8676 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8677 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
8678 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8679 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
8680 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8684 let Predicates = [HasERI] in {
8685 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
8686 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
8687 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
8689 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
8690 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
8691 SchedWriteFRnd>, EVEX;
8693 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8694 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8695 let ExeDomain = _.ExeDomain in
8696 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8697 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8698 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>,
8699 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8702 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8703 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8704 let ExeDomain = _.ExeDomain in {
8705 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8706 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8707 (_.VT (fsqrt _.RC:$src))>, EVEX,
8709 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8710 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8712 (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8713 Sched<[sched.Folded, ReadAfterLd]>;
8714 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8715 (ins _.ScalarMemOp:$src), OpcodeStr,
8716 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8718 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8719 EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
8723 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8724 X86SchedWriteSizes sched> {
8725 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8726 sched.PS.ZMM, v16f32_info>,
8727 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8728 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8729 sched.PD.ZMM, v8f64_info>,
8730 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8731 // Define only if AVX512VL feature is present.
8732 let Predicates = [HasVLX] in {
8733 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8734 sched.PS.XMM, v4f32x_info>,
8735 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8736 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8737 sched.PS.YMM, v8f32x_info>,
8738 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8739 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8740 sched.PD.XMM, v2f64x_info>,
8741 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8742 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8743 sched.PD.YMM, v4f64x_info>,
8744 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8748 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8749 X86SchedWriteSizes sched> {
8750 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8751 sched.PS.ZMM, v16f32_info>,
8752 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8753 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8754 sched.PD.ZMM, v8f64_info>,
8755 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8758 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8759 X86VectorVTInfo _, string Name> {
8760 let ExeDomain = _.ExeDomain in {
8761 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8762 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8763 "$src2, $src1", "$src1, $src2",
8764 (X86fsqrtRnds (_.VT _.RC:$src1),
8766 (i32 FROUND_CURRENT))>,
8768 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8769 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8770 "$src2, $src1", "$src1, $src2",
8771 (X86fsqrtRnds (_.VT _.RC:$src1),
8772 _.ScalarIntMemCPat:$src2,
8773 (i32 FROUND_CURRENT))>,
8774 Sched<[sched.Folded, ReadAfterLd]>;
8775 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8776 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8777 "$rc, $src2, $src1", "$src1, $src2, $rc",
8778 (X86fsqrtRnds (_.VT _.RC:$src1),
8781 EVEX_B, EVEX_RC, Sched<[sched]>;
8783 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8784 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8785 (ins _.FRC:$src1, _.FRC:$src2),
8786 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8789 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8790 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8791 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8792 Sched<[sched.Folded, ReadAfterLd]>;
8796 let Predicates = [HasAVX512] in {
8797 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8798 (!cast<Instruction>(Name#Zr)
8799 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8802 let Predicates = [HasAVX512, OptForSize] in {
8803 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8804 (!cast<Instruction>(Name#Zm)
8805 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8809 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8810 X86SchedWriteSizes sched> {
8811 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8812 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8813 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
8814 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
8817 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
8818 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
8820 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
8822 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8823 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8824 let ExeDomain = _.ExeDomain in {
8825 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8826 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8827 "$src3, $src2, $src1", "$src1, $src2, $src3",
8828 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8832 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8833 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8834 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
8835 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8836 (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B,
8839 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8840 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
8842 "$src3, $src2, $src1", "$src1, $src2, $src3",
8843 (_.VT (X86RndScales _.RC:$src1,
8844 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
8845 Sched<[sched.Folded, ReadAfterLd]>;
8847 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
8848 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8849 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8850 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8851 []>, Sched<[sched]>;
8854 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8855 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8856 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8857 []>, Sched<[sched.Folded, ReadAfterLd]>;
8861 let Predicates = [HasAVX512] in {
8862 def : Pat<(ffloor _.FRC:$src),
8863 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8864 _.FRC:$src, (i32 0x9)))>;
8865 def : Pat<(fceil _.FRC:$src),
8866 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8867 _.FRC:$src, (i32 0xa)))>;
8868 def : Pat<(ftrunc _.FRC:$src),
8869 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8870 _.FRC:$src, (i32 0xb)))>;
8871 def : Pat<(frint _.FRC:$src),
8872 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8873 _.FRC:$src, (i32 0x4)))>;
8874 def : Pat<(fnearbyint _.FRC:$src),
8875 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8876 _.FRC:$src, (i32 0xc)))>;
8879 let Predicates = [HasAVX512, OptForSize] in {
8880 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8881 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8882 addr:$src, (i32 0x9)))>;
8883 def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8884 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8885 addr:$src, (i32 0xa)))>;
8886 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8887 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8888 addr:$src, (i32 0xb)))>;
8889 def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8890 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8891 addr:$src, (i32 0x4)))>;
8892 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8893 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8894 addr:$src, (i32 0xc)))>;
8898 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
8899 SchedWriteFRnd.Scl, f32x_info>,
8900 AVX512AIi8Base, EVEX_4V,
8901 EVEX_CD8<32, CD8VT1>;
8903 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
8904 SchedWriteFRnd.Scl, f64x_info>,
8905 VEX_W, AVX512AIi8Base, EVEX_4V,
8906 EVEX_CD8<64, CD8VT1>;
8908 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
8909 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
8910 dag OutMask, Predicate BasePredicate> {
8911 let Predicates = [BasePredicate] in {
8912 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8913 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8914 (extractelt _.VT:$dst, (iPTR 0))))),
8915 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
8916 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
8918 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8919 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8921 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
8922 OutMask, _.VT:$src2, _.VT:$src1)>;
8926 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
8927 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
8928 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8929 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
8930 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
8931 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8933 multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
8934 X86VectorVTInfo _, PatLeaf ZeroFP,
8935 bits<8> ImmV, Predicate BasePredicate> {
8936 let Predicates = [BasePredicate] in {
8937 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
8938 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8939 (extractelt _.VT:$dst, (iPTR 0))))),
8940 (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
8941 _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8943 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
8944 (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
8945 (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
8946 VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8950 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
8951 v4f32x_info, fp32imm0, 0x01, HasAVX512>;
8952 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
8953 v4f32x_info, fp32imm0, 0x02, HasAVX512>;
8954 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
8955 v2f64x_info, fp64imm0, 0x01, HasAVX512>;
8956 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
8957 v2f64x_info, fp64imm0, 0x02, HasAVX512>;
8960 //-------------------------------------------------
8961 // Integer truncate and extend operations
8962 //-------------------------------------------------
8964 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
8965 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
8966 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
8967 let ExeDomain = DestInfo.ExeDomain in
8968 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8969 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
8970 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
8971 EVEX, T8XS, Sched<[sched]>;
8973 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
8974 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8975 (ins x86memop:$dst, SrcInfo.RC:$src),
8976 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
8977 EVEX, Sched<[sched.Folded]>;
8979 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8980 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
8981 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
8982 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
8983 }//mayStore = 1, hasSideEffects = 0
8986 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8987 X86VectorVTInfo DestInfo,
8988 PatFrag truncFrag, PatFrag mtruncFrag,
8991 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8992 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
8993 addr:$dst, SrcInfo.RC:$src)>;
8995 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
8996 SrcInfo.KRCWM:$mask),
8997 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
8998 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9001 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9002 SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched,
9003 AVX512VLVectorVTInfo VTSrcInfo,
9004 X86VectorVTInfo DestInfoZ128,
9005 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9006 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9007 X86MemOperand x86memopZ, PatFrag truncFrag,
9008 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9010 let Predicates = [HasVLX, prd] in {
9011 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, sched,
9012 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9013 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9014 truncFrag, mtruncFrag, NAME>, EVEX_V128;
9016 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, sched,
9017 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9018 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9019 truncFrag, mtruncFrag, NAME>, EVEX_V256;
9021 let Predicates = [prd] in
9022 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, sched,
9023 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9024 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9025 truncFrag, mtruncFrag, NAME>, EVEX_V512;
9028 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9029 X86FoldableSchedWrite sched, PatFrag StoreNode,
9030 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9031 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched,
9032 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9033 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9034 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9037 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9038 X86FoldableSchedWrite sched, PatFrag StoreNode,
9039 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9040 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
9041 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9042 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9043 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9046 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9047 X86FoldableSchedWrite sched, PatFrag StoreNode,
9048 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9049 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9050 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9051 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9052 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9055 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9056 X86FoldableSchedWrite sched, PatFrag StoreNode,
9057 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9058 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
9059 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9060 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9061 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9064 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9065 X86FoldableSchedWrite sched, PatFrag StoreNode,
9066 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9067 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9068 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9069 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9070 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9073 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9074 X86FoldableSchedWrite sched, PatFrag StoreNode,
9075 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9076 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9077 sched, avx512vl_i16_info, v16i8x_info, v16i8x_info,
9078 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9079 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9082 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, WriteShuffle256,
9083 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9084 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, WriteShuffle256,
9085 truncstore_s_vi8, masked_truncstore_s_vi8>;
9086 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256,
9087 truncstore_us_vi8, masked_truncstore_us_vi8>;
9089 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, WriteShuffle256,
9090 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9091 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, WriteShuffle256,
9092 truncstore_s_vi16, masked_truncstore_s_vi16>;
9093 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256,
9094 truncstore_us_vi16, masked_truncstore_us_vi16>;
9096 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, WriteShuffle256,
9097 truncstorevi32, masked_truncstorevi32, X86vtrunc>;
9098 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, WriteShuffle256,
9099 truncstore_s_vi32, masked_truncstore_s_vi32>;
9100 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256,
9101 truncstore_us_vi32, masked_truncstore_us_vi32>;
9103 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256,
9104 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9105 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, WriteShuffle256,
9106 truncstore_s_vi8, masked_truncstore_s_vi8>;
9107 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, WriteShuffle256,
9108 truncstore_us_vi8, masked_truncstore_us_vi8>;
9110 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256,
9111 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9112 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, WriteShuffle256,
9113 truncstore_s_vi16, masked_truncstore_s_vi16>;
9114 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, WriteShuffle256,
9115 truncstore_us_vi16, masked_truncstore_us_vi16>;
9117 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256,
9118 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9119 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, WriteShuffle256,
9120 truncstore_s_vi8, masked_truncstore_s_vi8>;
9121 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, WriteShuffle256,
9122 truncstore_us_vi8, masked_truncstore_us_vi8>;
9124 let Predicates = [HasAVX512, NoVLX] in {
9125 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9126 (v8i16 (EXTRACT_SUBREG
9127 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9128 VR256X:$src, sub_ymm)))), sub_xmm))>;
9129 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9130 (v4i32 (EXTRACT_SUBREG
9131 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9132 VR256X:$src, sub_ymm)))), sub_xmm))>;
9135 let Predicates = [HasBWI, NoVLX] in {
9136 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9137 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9138 VR256X:$src, sub_ymm))), sub_xmm))>;
9141 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9142 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9143 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9144 let ExeDomain = DestInfo.ExeDomain in {
9145 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9146 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9147 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9148 EVEX, Sched<[sched]>;
9150 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9151 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9152 (DestInfo.VT (LdFrag addr:$src))>,
9153 EVEX, Sched<[sched.Folded]>;
9157 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9158 SDNode OpNode, SDNode InVecNode, string ExtTy,
9159 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9160 let Predicates = [HasVLX, HasBWI] in {
9161 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9162 v16i8x_info, i64mem, LdFrag, InVecNode>,
9163 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9165 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9166 v16i8x_info, i128mem, LdFrag, OpNode>,
9167 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9169 let Predicates = [HasBWI] in {
9170 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9171 v32i8x_info, i256mem, LdFrag, OpNode>,
9172 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9176 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9177 SDNode OpNode, SDNode InVecNode, string ExtTy,
9178 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9179 let Predicates = [HasVLX, HasAVX512] in {
9180 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9181 v16i8x_info, i32mem, LdFrag, InVecNode>,
9182 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9184 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9185 v16i8x_info, i64mem, LdFrag, OpNode>,
9186 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9188 let Predicates = [HasAVX512] in {
9189 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9190 v16i8x_info, i128mem, LdFrag, OpNode>,
9191 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9195 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9196 SDNode OpNode, SDNode InVecNode, string ExtTy,
9197 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9198 let Predicates = [HasVLX, HasAVX512] in {
9199 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9200 v16i8x_info, i16mem, LdFrag, InVecNode>,
9201 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9203 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9204 v16i8x_info, i32mem, LdFrag, OpNode>,
9205 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9207 let Predicates = [HasAVX512] in {
9208 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9209 v16i8x_info, i64mem, LdFrag, OpNode>,
9210 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9214 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9215 SDNode OpNode, SDNode InVecNode, string ExtTy,
9216 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9217 let Predicates = [HasVLX, HasAVX512] in {
9218 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9219 v8i16x_info, i64mem, LdFrag, InVecNode>,
9220 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9222 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9223 v8i16x_info, i128mem, LdFrag, OpNode>,
9224 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9226 let Predicates = [HasAVX512] in {
9227 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9228 v16i16x_info, i256mem, LdFrag, OpNode>,
9229 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9233 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9234 SDNode OpNode, SDNode InVecNode, string ExtTy,
9235 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9236 let Predicates = [HasVLX, HasAVX512] in {
9237 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9238 v8i16x_info, i32mem, LdFrag, InVecNode>,
9239 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9241 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9242 v8i16x_info, i64mem, LdFrag, OpNode>,
9243 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9245 let Predicates = [HasAVX512] in {
9246 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9247 v8i16x_info, i128mem, LdFrag, OpNode>,
9248 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9252 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9253 SDNode OpNode, SDNode InVecNode, string ExtTy,
9254 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9256 let Predicates = [HasVLX, HasAVX512] in {
9257 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9258 v4i32x_info, i64mem, LdFrag, InVecNode>,
9259 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9261 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9262 v4i32x_info, i128mem, LdFrag, OpNode>,
9263 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9265 let Predicates = [HasAVX512] in {
9266 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9267 v8i32x_info, i256mem, LdFrag, OpNode>,
9268 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9272 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>;
9273 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>;
9274 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>;
9275 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>;
9276 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>;
9277 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>;
9279 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>;
9280 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>;
9281 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>;
9282 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>;
9283 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>;
9284 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>;
9287 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9290 let Predicates = [HasVLX, HasBWI] in {
9291 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9292 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9293 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9294 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9295 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9296 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9297 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9298 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9299 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9300 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9302 let Predicates = [HasVLX] in {
9303 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9304 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9305 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9306 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9307 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9308 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9309 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9310 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9312 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9313 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9314 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9315 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9316 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9317 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9318 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9319 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9321 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9322 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9323 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9324 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9325 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9326 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9327 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9328 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9329 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9330 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9332 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9333 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9334 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
9335 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9336 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9337 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9338 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9339 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9341 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9342 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9343 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9344 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9345 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9346 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9347 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
9348 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9349 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
9350 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9353 let Predicates = [HasVLX, HasBWI] in {
9354 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9355 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9356 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9357 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9358 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9359 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9361 let Predicates = [HasVLX] in {
9362 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9363 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9364 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9365 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9366 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9367 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9368 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9369 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9371 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9372 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9373 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9374 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9375 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9376 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9377 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9378 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9380 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9381 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9382 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9383 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9384 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9385 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9387 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9388 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9389 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9390 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9391 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9392 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9393 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9394 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9396 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
9397 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9398 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9399 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9400 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
9401 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9404 let Predicates = [HasBWI] in {
9405 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
9406 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9408 let Predicates = [HasAVX512] in {
9409 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9410 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9412 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9413 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9414 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9415 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9417 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
9418 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9420 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9421 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9423 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
9424 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9428 defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>;
9429 defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>;
9431 //===----------------------------------------------------------------------===//
9432 // GATHER - SCATTER Operations
9434 // FIXME: Improve scheduling of gather/scatter instructions.
9435 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9436 X86MemOperand memop, PatFrag GatherNode,
9437 RegisterClass MaskRC = _.KRCWM> {
9438 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9439 ExeDomain = _.ExeDomain in
9440 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9441 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9442 !strconcat(OpcodeStr#_.Suffix,
9443 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9444 [(set _.RC:$dst, MaskRC:$mask_wb,
9445 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
9446 vectoraddr:$src2))]>, EVEX, EVEX_K,
9447 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9450 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9451 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9452 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9453 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9454 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9455 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
9456 let Predicates = [HasVLX] in {
9457 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9458 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9459 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9460 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9461 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9462 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9463 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9464 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9468 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9469 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9470 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9471 mgatherv16i32>, EVEX_V512;
9472 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9473 mgatherv8i64>, EVEX_V512;
9474 let Predicates = [HasVLX] in {
9475 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9476 vy256xmem, mgatherv8i32>, EVEX_V256;
9477 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9478 vy128xmem, mgatherv4i64>, EVEX_V256;
9479 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9480 vx128xmem, mgatherv4i32>, EVEX_V128;
9481 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9482 vx64xmem, mgatherv2i64, VK2WM>,
9488 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9489 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9491 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9492 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9494 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9495 X86MemOperand memop, PatFrag ScatterNode,
9496 RegisterClass MaskRC = _.KRCWM> {
9498 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9500 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9501 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9502 !strconcat(OpcodeStr#_.Suffix,
9503 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9504 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9505 MaskRC:$mask, vectoraddr:$dst))]>,
9506 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9507 Sched<[WriteStore]>;
9510 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9511 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9512 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9513 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9514 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9515 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
9516 let Predicates = [HasVLX] in {
9517 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9518 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9519 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9520 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9521 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9522 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9523 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9524 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9528 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9529 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9530 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9531 mscatterv16i32>, EVEX_V512;
9532 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9533 mscatterv8i64>, EVEX_V512;
9534 let Predicates = [HasVLX] in {
9535 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9536 vy256xmem, mscatterv8i32>, EVEX_V256;
9537 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9538 vy128xmem, mscatterv4i64>, EVEX_V256;
9539 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9540 vx128xmem, mscatterv4i32>, EVEX_V128;
9541 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9542 vx64xmem, mscatterv2i64, VK2WM>,
9547 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9548 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9550 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9551 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9554 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9555 RegisterClass KRC, X86MemOperand memop> {
9556 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9557 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9558 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9559 EVEX, EVEX_K, Sched<[WriteLoad]>;
9562 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9563 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9565 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9566 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9568 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9569 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9571 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9572 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9574 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9575 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9577 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9578 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9580 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9581 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9583 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9584 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9586 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9587 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9589 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9590 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9592 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9593 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9595 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9596 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9598 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9599 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9601 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9602 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9604 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9605 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9607 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9608 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9610 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9611 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9612 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9613 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9614 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9617 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9618 string OpcodeStr, Predicate prd> {
9619 let Predicates = [prd] in
9620 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9622 let Predicates = [prd, HasVLX] in {
9623 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9624 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9628 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9629 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9630 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9631 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9633 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9634 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9635 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9636 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9637 EVEX, Sched<[WriteMove]>;
9640 // Use 512bit version to implement 128/256 bit in case NoVLX.
9641 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9645 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9646 (_.KVT (COPY_TO_REGCLASS
9647 (!cast<Instruction>(Name#"Zrr")
9648 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9649 _.RC:$src, _.SubRegIdx)),
9653 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9654 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9655 let Predicates = [prd] in
9656 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9659 let Predicates = [prd, HasVLX] in {
9660 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9662 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9665 let Predicates = [prd, NoVLX] in {
9666 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9667 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9671 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9672 avx512vl_i8_info, HasBWI>;
9673 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9674 avx512vl_i16_info, HasBWI>, VEX_W;
9675 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9676 avx512vl_i32_info, HasDQI>;
9677 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9678 avx512vl_i64_info, HasDQI>, VEX_W;
9680 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9681 // is available, but BWI is not. We can't handle this in lowering because
9682 // a target independent DAG combine likes to combine sext and trunc.
9683 let Predicates = [HasDQI, NoBWI] in {
9684 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9685 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9686 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9687 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9690 //===----------------------------------------------------------------------===//
9691 // AVX-512 - COMPRESS and EXPAND
9694 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9695 string OpcodeStr, X86FoldableSchedWrite sched> {
9696 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9697 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9698 (_.VT (X86compress _.RC:$src1))>, AVX5128IBase,
9701 let mayStore = 1, hasSideEffects = 0 in
9702 def mr : AVX5128I<opc, MRMDestMem, (outs),
9703 (ins _.MemOp:$dst, _.RC:$src),
9704 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9705 []>, EVEX_CD8<_.EltSize, CD8VT1>,
9706 Sched<[sched.Folded]>;
9708 def mrk : AVX5128I<opc, MRMDestMem, (outs),
9709 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9710 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9712 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9713 Sched<[sched.Folded]>;
9716 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9717 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9718 (!cast<Instruction>(Name#_.ZSuffix##mrk)
9719 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9722 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9723 X86FoldableSchedWrite sched,
9724 AVX512VLVectorVTInfo VTInfo,
9725 Predicate Pred = HasAVX512> {
9726 let Predicates = [Pred] in
9727 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9728 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9730 let Predicates = [Pred, HasVLX] in {
9731 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9732 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9733 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9734 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9738 // FIXME: Is there a better scheduler class for VPCOMPRESS?
9739 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9740 avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9741 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9742 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9743 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9744 avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9745 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9746 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9749 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9750 string OpcodeStr, X86FoldableSchedWrite sched> {
9751 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9752 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9753 (_.VT (X86expand _.RC:$src1))>, AVX5128IBase,
9756 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9757 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9758 (_.VT (X86expand (_.VT (bitconvert
9759 (_.LdFrag addr:$src1)))))>,
9760 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9761 Sched<[sched.Folded, ReadAfterLd]>;
9764 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9766 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9767 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9768 _.KRCWM:$mask, addr:$src)>;
9770 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
9771 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9772 _.KRCWM:$mask, addr:$src)>;
9774 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9775 (_.VT _.RC:$src0))),
9776 (!cast<Instruction>(Name#_.ZSuffix##rmk)
9777 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9780 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
9781 X86FoldableSchedWrite sched,
9782 AVX512VLVectorVTInfo VTInfo,
9783 Predicate Pred = HasAVX512> {
9784 let Predicates = [Pred] in
9785 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
9786 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9788 let Predicates = [Pred, HasVLX] in {
9789 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
9790 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9791 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
9792 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9796 // FIXME: Is there a better scheduler class for VPEXPAND?
9797 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
9798 avx512vl_i32_info>, EVEX;
9799 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
9800 avx512vl_i64_info>, EVEX, VEX_W;
9801 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
9802 avx512vl_f32_info>, EVEX;
9803 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
9804 avx512vl_f64_info>, EVEX, VEX_W;
9806 //handle instruction reg_vec1 = op(reg_vec,imm)
9808 // op(broadcast(eltVt),imm)
9809 //all instruction created with FROUND_CURRENT
9810 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9811 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9812 let ExeDomain = _.ExeDomain in {
9813 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9814 (ins _.RC:$src1, i32u8imm:$src2),
9815 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9816 (OpNode (_.VT _.RC:$src1),
9817 (i32 imm:$src2))>, Sched<[sched]>;
9818 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9819 (ins _.MemOp:$src1, i32u8imm:$src2),
9820 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9821 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
9823 Sched<[sched.Folded, ReadAfterLd]>;
9824 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9825 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
9826 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
9827 "${src1}"##_.BroadcastStr##", $src2",
9828 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
9829 (i32 imm:$src2))>, EVEX_B,
9830 Sched<[sched.Folded, ReadAfterLd]>;
9834 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9835 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9836 SDNode OpNode, X86FoldableSchedWrite sched,
9837 X86VectorVTInfo _> {
9838 let ExeDomain = _.ExeDomain in
9839 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9840 (ins _.RC:$src1, i32u8imm:$src2),
9841 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
9842 "$src1, {sae}, $src2",
9843 (OpNode (_.VT _.RC:$src1),
9845 (i32 FROUND_NO_EXC))>,
9846 EVEX_B, Sched<[sched]>;
9849 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
9850 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9851 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
9852 let Predicates = [prd] in {
9853 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
9855 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
9856 sched.ZMM, _.info512>, EVEX_V512;
9858 let Predicates = [prd, HasVLX] in {
9859 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
9860 _.info128>, EVEX_V128;
9861 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
9862 _.info256>, EVEX_V256;
9866 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9867 // op(reg_vec2,mem_vec,imm)
9868 // op(reg_vec2,broadcast(eltVt),imm)
9869 //all instruction created with FROUND_CURRENT
9870 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9871 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9872 let ExeDomain = _.ExeDomain in {
9873 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9874 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9875 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9876 (OpNode (_.VT _.RC:$src1),
9880 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9881 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
9882 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9883 (OpNode (_.VT _.RC:$src1),
9884 (_.VT (bitconvert (_.LdFrag addr:$src2))),
9886 Sched<[sched.Folded, ReadAfterLd]>;
9887 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9888 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9889 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9890 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9891 (OpNode (_.VT _.RC:$src1),
9892 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9893 (i32 imm:$src3))>, EVEX_B,
9894 Sched<[sched.Folded, ReadAfterLd]>;
9898 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9899 // op(reg_vec2,mem_vec,imm)
9900 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9901 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
9902 X86VectorVTInfo SrcInfo>{
9903 let ExeDomain = DestInfo.ExeDomain in {
9904 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9905 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9906 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9907 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9908 (SrcInfo.VT SrcInfo.RC:$src2),
9911 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9912 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9913 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9914 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9915 (SrcInfo.VT (bitconvert
9916 (SrcInfo.LdFrag addr:$src2))),
9918 Sched<[sched.Folded, ReadAfterLd]>;
9922 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9923 // op(reg_vec2,mem_vec,imm)
9924 // op(reg_vec2,broadcast(eltVt),imm)
9925 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9926 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
9927 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
9929 let ExeDomain = _.ExeDomain in
9930 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9931 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9932 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9933 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9934 (OpNode (_.VT _.RC:$src1),
9935 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9936 (i8 imm:$src3))>, EVEX_B,
9937 Sched<[sched.Folded, ReadAfterLd]>;
9940 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9941 // op(reg_vec2,mem_scalar,imm)
9942 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9943 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9944 let ExeDomain = _.ExeDomain in {
9945 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9946 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9947 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9948 (OpNode (_.VT _.RC:$src1),
9952 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9953 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9954 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9955 (OpNode (_.VT _.RC:$src1),
9956 (_.VT (scalar_to_vector
9957 (_.ScalarLdFrag addr:$src2))),
9959 Sched<[sched.Folded, ReadAfterLd]>;
9963 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9964 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9965 SDNode OpNode, X86FoldableSchedWrite sched,
9966 X86VectorVTInfo _> {
9967 let ExeDomain = _.ExeDomain in
9968 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9969 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9970 OpcodeStr, "$src3, {sae}, $src2, $src1",
9971 "$src1, $src2, {sae}, $src3",
9972 (OpNode (_.VT _.RC:$src1),
9975 (i32 FROUND_NO_EXC))>,
9976 EVEX_B, Sched<[sched]>;
9979 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9980 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9981 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9982 let ExeDomain = _.ExeDomain in
9983 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9984 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9985 OpcodeStr, "$src3, {sae}, $src2, $src1",
9986 "$src1, $src2, {sae}, $src3",
9987 (OpNode (_.VT _.RC:$src1),
9990 (i32 FROUND_NO_EXC))>,
9991 EVEX_B, Sched<[sched]>;
9994 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
9995 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9996 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
9997 let Predicates = [prd] in {
9998 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
9999 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
10003 let Predicates = [prd, HasVLX] in {
10004 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10006 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10011 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10012 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10013 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10014 let Predicates = [Pred] in {
10015 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10016 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10018 let Predicates = [Pred, HasVLX] in {
10019 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10020 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10021 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10022 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10026 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10027 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10028 Predicate Pred = HasAVX512> {
10029 let Predicates = [Pred] in {
10030 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10033 let Predicates = [Pred, HasVLX] in {
10034 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10036 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10041 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10042 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10043 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
10044 let Predicates = [prd] in {
10045 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10046 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
10050 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10051 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10052 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10053 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10054 opcPs, OpNode, OpNodeRnd, sched, prd>,
10055 EVEX_CD8<32, CD8VF>;
10056 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10057 opcPd, OpNode, OpNodeRnd, sched, prd>,
10058 EVEX_CD8<64, CD8VF>, VEX_W;
10061 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10062 X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
10063 AVX512AIi8Base, EVEX;
10064 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10065 X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
10066 AVX512AIi8Base, EVEX;
10067 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10068 X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
10069 AVX512AIi8Base, EVEX;
10071 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10072 0x50, X86VRange, X86VRangeRnd,
10073 SchedWriteFAdd, HasDQI>,
10074 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10075 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10076 0x50, X86VRange, X86VRangeRnd,
10077 SchedWriteFAdd, HasDQI>,
10078 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10080 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10081 f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10082 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10083 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10084 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10085 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10087 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10088 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10089 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10090 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10091 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10092 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10094 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10095 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10096 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10097 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10098 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10099 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10102 multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> {
10104 def : Pat<(_.VT (ffloor _.RC:$src)),
10105 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10106 _.RC:$src, (i32 0x9))>;
10107 def : Pat<(_.VT (fnearbyint _.RC:$src)),
10108 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10109 _.RC:$src, (i32 0xC))>;
10110 def : Pat<(_.VT (fceil _.RC:$src)),
10111 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10112 _.RC:$src, (i32 0xA))>;
10113 def : Pat<(_.VT (frint _.RC:$src)),
10114 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10115 _.RC:$src, (i32 0x4))>;
10116 def : Pat<(_.VT (ftrunc _.RC:$src)),
10117 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10118 _.RC:$src, (i32 0xB))>;
10121 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)),
10122 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10123 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
10124 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)),
10125 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10126 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
10127 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)),
10128 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10129 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
10130 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)),
10131 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10132 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
10133 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)),
10134 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10135 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
10138 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src),
10140 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10141 _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
10142 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src),
10144 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10145 _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
10146 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src),
10148 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10149 _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
10150 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src),
10152 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10153 _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
10154 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src),
10156 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10157 _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
10160 def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))),
10161 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10162 addr:$src, (i32 0x9))>;
10163 def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))),
10164 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10165 addr:$src, (i32 0xC))>;
10166 def : Pat<(_.VT (fceil (_.LdFrag addr:$src))),
10167 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10168 addr:$src, (i32 0xA))>;
10169 def : Pat<(_.VT (frint (_.LdFrag addr:$src))),
10170 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10171 addr:$src, (i32 0x4))>;
10172 def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))),
10173 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10174 addr:$src, (i32 0xB))>;
10176 // Merge-masking + load
10177 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
10179 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10180 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10181 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
10183 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10184 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10185 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
10187 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10188 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10189 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
10191 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10192 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10193 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
10195 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10196 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10198 // Zero-masking + load
10199 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
10201 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10202 _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10203 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
10205 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10206 _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10207 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
10209 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10210 _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10211 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
10213 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10214 _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10215 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
10217 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10218 _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10221 def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10222 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10223 addr:$src, (i32 0x9))>;
10224 def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10225 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10226 addr:$src, (i32 0xC))>;
10227 def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10228 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10229 addr:$src, (i32 0xA))>;
10230 def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10231 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10232 addr:$src, (i32 0x4))>;
10233 def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10234 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10235 addr:$src, (i32 0xB))>;
10237 // Merge-masking + broadcast load
10238 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10239 (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10241 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10242 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10243 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10244 (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10246 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10247 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10248 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10249 (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10251 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10252 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10253 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10254 (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10256 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10257 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10258 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10259 (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10261 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10262 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10264 // Zero-masking + broadcast load
10265 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10266 (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10268 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10269 _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10270 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10271 (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10273 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10274 _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10275 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10276 (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10278 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10279 _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10280 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10281 (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10283 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10284 _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10285 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10286 (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10288 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10289 _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10292 let Predicates = [HasAVX512] in {
10293 defm : AVX512_rndscale_lowering<v16f32_info, "PS">;
10294 defm : AVX512_rndscale_lowering<v8f64_info, "PD">;
10297 let Predicates = [HasVLX] in {
10298 defm : AVX512_rndscale_lowering<v8f32x_info, "PS">;
10299 defm : AVX512_rndscale_lowering<v4f64x_info, "PD">;
10300 defm : AVX512_rndscale_lowering<v4f32x_info, "PS">;
10301 defm : AVX512_rndscale_lowering<v2f64x_info, "PD">;
10304 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10305 X86FoldableSchedWrite sched,
10307 X86VectorVTInfo CastInfo,
10308 string EVEX2VEXOvrd> {
10309 let ExeDomain = _.ExeDomain in {
10310 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10311 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10312 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10314 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10315 (i8 imm:$src3)))))>,
10316 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10317 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10318 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10319 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10322 (CastInfo.VT (X86Shuf128 _.RC:$src1,
10323 (bitconvert (_.LdFrag addr:$src2)),
10324 (i8 imm:$src3)))))>,
10325 Sched<[sched.Folded, ReadAfterLd]>,
10326 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10327 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10328 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10329 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10330 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10334 (X86Shuf128 _.RC:$src1,
10335 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
10336 (i8 imm:$src3)))))>, EVEX_B,
10337 Sched<[sched.Folded, ReadAfterLd]>;
10341 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10342 AVX512VLVectorVTInfo _,
10343 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10344 string EVEX2VEXOvrd>{
10345 let Predicates = [HasAVX512] in
10346 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10347 _.info512, CastInfo.info512, "">, EVEX_V512;
10349 let Predicates = [HasAVX512, HasVLX] in
10350 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10351 _.info256, CastInfo.info256,
10352 EVEX2VEXOvrd>, EVEX_V256;
10355 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10356 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10357 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10358 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10359 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10360 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10361 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10362 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10364 let Predicates = [HasAVX512] in {
10365 // Provide fallback in case the load node that is used in the broadcast
10366 // patterns above is used by additional users, which prevents the pattern
10368 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10369 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10370 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10372 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10373 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10374 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10377 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10378 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10379 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10381 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10382 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10383 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10386 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10387 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10388 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10391 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10392 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10393 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10397 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10398 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10399 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10400 // instantiation of this class.
10401 let ExeDomain = _.ExeDomain in {
10402 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10403 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10404 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10405 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
10406 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10407 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10408 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10409 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10410 (_.VT (X86VAlign _.RC:$src1,
10411 (bitconvert (_.LdFrag addr:$src2)),
10413 Sched<[sched.Folded, ReadAfterLd]>,
10414 EVEX2VEXOverride<"VPALIGNRrmi">;
10416 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10417 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10418 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10419 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10420 (X86VAlign _.RC:$src1,
10421 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10422 (i8 imm:$src3))>, EVEX_B,
10423 Sched<[sched.Folded, ReadAfterLd]>;
10427 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10428 AVX512VLVectorVTInfo _> {
10429 let Predicates = [HasAVX512] in {
10430 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10431 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10433 let Predicates = [HasAVX512, HasVLX] in {
10434 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10435 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10436 // We can't really override the 256-bit version so change it back to unset.
10437 let EVEX2VEXOverride = ? in
10438 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10439 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10443 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10444 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10445 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10446 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10449 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10450 SchedWriteShuffle, avx512vl_i8_info,
10451 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10453 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10455 def ValignqImm32XForm : SDNodeXForm<imm, [{
10456 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10458 def ValignqImm8XForm : SDNodeXForm<imm, [{
10459 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10461 def ValigndImm8XForm : SDNodeXForm<imm, [{
10462 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10465 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10466 X86VectorVTInfo From, X86VectorVTInfo To,
10467 SDNodeXForm ImmXForm> {
10468 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10470 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10473 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10474 To.RC:$src1, To.RC:$src2,
10475 (ImmXForm imm:$src3))>;
10477 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10479 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10482 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10483 To.RC:$src1, To.RC:$src2,
10484 (ImmXForm imm:$src3))>;
10486 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10488 (From.VT (OpNode From.RC:$src1,
10489 (bitconvert (To.LdFrag addr:$src2)),
10492 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10493 To.RC:$src1, addr:$src2,
10494 (ImmXForm imm:$src3))>;
10496 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10498 (From.VT (OpNode From.RC:$src1,
10499 (bitconvert (To.LdFrag addr:$src2)),
10502 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10503 To.RC:$src1, addr:$src2,
10504 (ImmXForm imm:$src3))>;
10507 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10508 X86VectorVTInfo From,
10509 X86VectorVTInfo To,
10510 SDNodeXForm ImmXForm> :
10511 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10512 def : Pat<(From.VT (OpNode From.RC:$src1,
10513 (bitconvert (To.VT (X86VBroadcast
10514 (To.ScalarLdFrag addr:$src2)))),
10516 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10517 (ImmXForm imm:$src3))>;
10519 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10521 (From.VT (OpNode From.RC:$src1,
10523 (To.VT (X86VBroadcast
10524 (To.ScalarLdFrag addr:$src2)))),
10527 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10528 To.RC:$src1, addr:$src2,
10529 (ImmXForm imm:$src3))>;
10531 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10533 (From.VT (OpNode From.RC:$src1,
10535 (To.VT (X86VBroadcast
10536 (To.ScalarLdFrag addr:$src2)))),
10539 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10540 To.RC:$src1, addr:$src2,
10541 (ImmXForm imm:$src3))>;
10544 let Predicates = [HasAVX512] in {
10545 // For 512-bit we lower to the widest element type we can. So we only need
10546 // to handle converting valignq to valignd.
10547 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10548 v16i32_info, ValignqImm32XForm>;
10551 let Predicates = [HasVLX] in {
10552 // For 128-bit we lower to the widest element type we can. So we only need
10553 // to handle converting valignq to valignd.
10554 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10555 v4i32x_info, ValignqImm32XForm>;
10556 // For 256-bit we lower to the widest element type we can. So we only need
10557 // to handle converting valignq to valignd.
10558 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10559 v8i32x_info, ValignqImm32XForm>;
10562 let Predicates = [HasVLX, HasBWI] in {
10563 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10564 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10565 v16i8x_info, ValignqImm8XForm>;
10566 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10567 v16i8x_info, ValigndImm8XForm>;
10570 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10571 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10572 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10574 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10575 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10576 let ExeDomain = _.ExeDomain in {
10577 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10578 (ins _.RC:$src1), OpcodeStr,
10580 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
10583 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10584 (ins _.MemOp:$src1), OpcodeStr,
10586 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
10587 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10588 Sched<[sched.Folded]>;
10592 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10593 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10594 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10595 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10596 (ins _.ScalarMemOp:$src1), OpcodeStr,
10597 "${src1}"##_.BroadcastStr,
10598 "${src1}"##_.BroadcastStr,
10599 (_.VT (OpNode (X86VBroadcast
10600 (_.ScalarLdFrag addr:$src1))))>,
10601 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10602 Sched<[sched.Folded]>;
10605 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10606 X86SchedWriteWidths sched,
10607 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10608 let Predicates = [prd] in
10609 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10612 let Predicates = [prd, HasVLX] in {
10613 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10615 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10620 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10621 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10623 let Predicates = [prd] in
10624 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10627 let Predicates = [prd, HasVLX] in {
10628 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10630 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10635 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10636 SDNode OpNode, X86SchedWriteWidths sched,
10638 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10639 avx512vl_i64_info, prd>, VEX_W;
10640 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10641 avx512vl_i32_info, prd>;
10644 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10645 SDNode OpNode, X86SchedWriteWidths sched,
10647 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10648 avx512vl_i16_info, prd>, VEX_WIG;
10649 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10650 avx512vl_i8_info, prd>, VEX_WIG;
10653 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10654 bits<8> opc_d, bits<8> opc_q,
10655 string OpcodeStr, SDNode OpNode,
10656 X86SchedWriteWidths sched> {
10657 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10659 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10663 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10666 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10667 let Predicates = [HasAVX512, NoVLX] in {
10668 def : Pat<(v4i64 (abs VR256X:$src)),
10671 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10673 def : Pat<(v2i64 (abs VR128X:$src)),
10676 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10680 // Use 512bit version to implement 128/256 bit.
10681 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10682 AVX512VLVectorVTInfo _, Predicate prd> {
10683 let Predicates = [prd, NoVLX] in {
10684 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
10686 (!cast<Instruction>(InstrStr # "Zrr")
10687 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10688 _.info256.RC:$src1,
10689 _.info256.SubRegIdx)),
10690 _.info256.SubRegIdx)>;
10692 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
10694 (!cast<Instruction>(InstrStr # "Zrr")
10695 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10696 _.info128.RC:$src1,
10697 _.info128.SubRegIdx)),
10698 _.info128.SubRegIdx)>;
10702 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10703 SchedWriteVecIMul, HasCDI>;
10705 // FIXME: Is there a better scheduler class for VPCONFLICT?
10706 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10707 SchedWriteVecALU, HasCDI>;
10709 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10710 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10711 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10713 //===---------------------------------------------------------------------===//
10714 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10715 //===---------------------------------------------------------------------===//
10717 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10718 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10719 SchedWriteVecALU, HasVPOPCNTDQ>;
10721 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10722 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10724 //===---------------------------------------------------------------------===//
10725 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10726 //===---------------------------------------------------------------------===//
10728 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10729 X86SchedWriteWidths sched> {
10730 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10731 avx512vl_f32_info, HasAVX512>, XS;
10734 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10735 SchedWriteFShuffle>;
10736 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10737 SchedWriteFShuffle>;
10739 //===----------------------------------------------------------------------===//
10740 // AVX-512 - MOVDDUP
10741 //===----------------------------------------------------------------------===//
10743 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
10744 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10745 let ExeDomain = _.ExeDomain in {
10746 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10747 (ins _.RC:$src), OpcodeStr, "$src", "$src",
10748 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
10750 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10751 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10752 (_.VT (OpNode (_.VT (scalar_to_vector
10753 (_.ScalarLdFrag addr:$src)))))>,
10754 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10755 Sched<[sched.Folded]>;
10759 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10760 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10761 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10762 VTInfo.info512>, EVEX_V512;
10764 let Predicates = [HasAVX512, HasVLX] in {
10765 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10766 VTInfo.info256>, EVEX_V256;
10767 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
10768 VTInfo.info128>, EVEX_V128;
10772 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10773 X86SchedWriteWidths sched> {
10774 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10775 avx512vl_f64_info>, XD, VEX_W;
10778 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10780 let Predicates = [HasVLX] in {
10781 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
10782 (VMOVDDUPZ128rm addr:$src)>;
10783 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10784 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10785 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10786 (VMOVDDUPZ128rm addr:$src)>;
10788 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10789 (v2f64 VR128X:$src0)),
10790 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10791 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10792 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10793 (bitconvert (v4i32 immAllZerosV))),
10794 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10796 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10797 (v2f64 VR128X:$src0)),
10798 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10799 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10800 (bitconvert (v4i32 immAllZerosV))),
10801 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10803 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10804 (v2f64 VR128X:$src0)),
10805 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10806 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10807 (bitconvert (v4i32 immAllZerosV))),
10808 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10811 //===----------------------------------------------------------------------===//
10812 // AVX-512 - Unpack Instructions
10813 //===----------------------------------------------------------------------===//
10815 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10816 SchedWriteFShuffleSizes, 0, 1>;
10817 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10818 SchedWriteFShuffleSizes>;
10820 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10821 SchedWriteShuffle, HasBWI>;
10822 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10823 SchedWriteShuffle, HasBWI>;
10824 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10825 SchedWriteShuffle, HasBWI>;
10826 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10827 SchedWriteShuffle, HasBWI>;
10829 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10830 SchedWriteShuffle, HasAVX512>;
10831 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10832 SchedWriteShuffle, HasAVX512>;
10833 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10834 SchedWriteShuffle, HasAVX512>;
10835 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10836 SchedWriteShuffle, HasAVX512>;
10838 //===----------------------------------------------------------------------===//
10839 // AVX-512 - Extract & Insert Integer Instructions
10840 //===----------------------------------------------------------------------===//
10842 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10843 X86VectorVTInfo _> {
10844 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10845 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10846 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10847 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10849 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10852 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10853 let Predicates = [HasBWI] in {
10854 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10855 (ins _.RC:$src1, u8imm:$src2),
10856 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10857 [(set GR32orGR64:$dst,
10858 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10859 EVEX, TAPD, Sched<[WriteVecExtract]>;
10861 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10865 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10866 let Predicates = [HasBWI] in {
10867 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10868 (ins _.RC:$src1, u8imm:$src2),
10869 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10870 [(set GR32orGR64:$dst,
10871 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10872 EVEX, PD, Sched<[WriteVecExtract]>;
10874 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10875 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10876 (ins _.RC:$src1, u8imm:$src2),
10877 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10878 EVEX, TAPD, FoldGenData<NAME#rr>,
10879 Sched<[WriteVecExtract]>;
10881 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10885 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10886 RegisterClass GRC> {
10887 let Predicates = [HasDQI] in {
10888 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10889 (ins _.RC:$src1, u8imm:$src2),
10890 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10892 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10893 EVEX, TAPD, Sched<[WriteVecExtract]>;
10895 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10896 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10897 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10898 [(store (extractelt (_.VT _.RC:$src1),
10899 imm:$src2),addr:$dst)]>,
10900 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10901 Sched<[WriteVecExtractSt]>;
10905 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10906 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10907 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10908 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10910 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10911 X86VectorVTInfo _, PatFrag LdFrag> {
10912 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10913 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10914 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10916 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10917 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
10920 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10921 X86VectorVTInfo _, PatFrag LdFrag> {
10922 let Predicates = [HasBWI] in {
10923 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10924 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10925 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10927 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10928 Sched<[WriteVecInsert]>;
10930 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10934 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10935 X86VectorVTInfo _, RegisterClass GRC> {
10936 let Predicates = [HasDQI] in {
10937 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10938 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10939 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10941 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10942 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10944 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10945 _.ScalarLdFrag>, TAPD;
10949 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10950 extloadi8>, TAPD, VEX_WIG;
10951 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10952 extloadi16>, PD, VEX_WIG;
10953 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10954 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10956 //===----------------------------------------------------------------------===//
10957 // VSHUFPS - VSHUFPD Operations
10958 //===----------------------------------------------------------------------===//
10960 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10961 AVX512VLVectorVTInfo VTInfo_FP>{
10962 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10963 SchedWriteFShuffle>,
10964 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
10965 AVX512AIi8Base, EVEX_4V;
10968 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
10969 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
10971 //===----------------------------------------------------------------------===//
10972 // AVX-512 - Byte shift Left/Right
10973 //===----------------------------------------------------------------------===//
10975 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
10976 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
10977 Format MRMm, string OpcodeStr,
10978 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10979 def rr : AVX512<opc, MRMr,
10980 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
10981 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10982 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
10984 def rm : AVX512<opc, MRMm,
10985 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
10986 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10987 [(set _.RC:$dst,(_.VT (OpNode
10988 (_.VT (bitconvert (_.LdFrag addr:$src1))),
10989 (i8 imm:$src2))))]>,
10990 Sched<[sched.Folded, ReadAfterLd]>;
10993 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
10994 Format MRMm, string OpcodeStr,
10995 X86SchedWriteWidths sched, Predicate prd>{
10996 let Predicates = [prd] in
10997 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10998 sched.ZMM, v64i8_info>, EVEX_V512;
10999 let Predicates = [prd, HasVLX] in {
11000 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11001 sched.YMM, v32i8x_info>, EVEX_V256;
11002 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11003 sched.XMM, v16i8x_info>, EVEX_V128;
11006 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11007 SchedWriteShuffle, HasBWI>,
11008 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11009 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11010 SchedWriteShuffle, HasBWI>,
11011 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11013 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11014 string OpcodeStr, X86FoldableSchedWrite sched,
11015 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11016 def rr : AVX512BI<opc, MRMSrcReg,
11017 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11018 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11019 [(set _dst.RC:$dst,(_dst.VT
11020 (OpNode (_src.VT _src.RC:$src1),
11021 (_src.VT _src.RC:$src2))))]>,
11023 def rm : AVX512BI<opc, MRMSrcMem,
11024 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11025 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11026 [(set _dst.RC:$dst,(_dst.VT
11027 (OpNode (_src.VT _src.RC:$src1),
11028 (_src.VT (bitconvert
11029 (_src.LdFrag addr:$src2))))))]>,
11030 Sched<[sched.Folded, ReadAfterLd]>;
11033 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11034 string OpcodeStr, X86SchedWriteWidths sched,
11036 let Predicates = [prd] in
11037 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11038 v8i64_info, v64i8_info>, EVEX_V512;
11039 let Predicates = [prd, HasVLX] in {
11040 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11041 v4i64x_info, v32i8x_info>, EVEX_V256;
11042 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11043 v2i64x_info, v16i8x_info>, EVEX_V128;
11047 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11048 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11050 // Transforms to swizzle an immediate to enable better matching when
11051 // memory operand isn't in the right place.
11052 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
11053 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11054 uint8_t Imm = N->getZExtValue();
11055 // Swap bits 1/4 and 3/6.
11056 uint8_t NewImm = Imm & 0xa5;
11057 if (Imm & 0x02) NewImm |= 0x10;
11058 if (Imm & 0x10) NewImm |= 0x02;
11059 if (Imm & 0x08) NewImm |= 0x40;
11060 if (Imm & 0x40) NewImm |= 0x08;
11061 return getI8Imm(NewImm, SDLoc(N));
11063 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
11064 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11065 uint8_t Imm = N->getZExtValue();
11066 // Swap bits 2/4 and 3/5.
11067 uint8_t NewImm = Imm & 0xc3;
11068 if (Imm & 0x04) NewImm |= 0x10;
11069 if (Imm & 0x10) NewImm |= 0x04;
11070 if (Imm & 0x08) NewImm |= 0x20;
11071 if (Imm & 0x20) NewImm |= 0x08;
11072 return getI8Imm(NewImm, SDLoc(N));
11074 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11075 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11076 uint8_t Imm = N->getZExtValue();
11077 // Swap bits 1/2 and 5/6.
11078 uint8_t NewImm = Imm & 0x99;
11079 if (Imm & 0x02) NewImm |= 0x04;
11080 if (Imm & 0x04) NewImm |= 0x02;
11081 if (Imm & 0x20) NewImm |= 0x40;
11082 if (Imm & 0x40) NewImm |= 0x20;
11083 return getI8Imm(NewImm, SDLoc(N));
11085 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11086 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11087 uint8_t Imm = N->getZExtValue();
11088 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11089 uint8_t NewImm = Imm & 0x81;
11090 if (Imm & 0x02) NewImm |= 0x04;
11091 if (Imm & 0x04) NewImm |= 0x10;
11092 if (Imm & 0x08) NewImm |= 0x40;
11093 if (Imm & 0x10) NewImm |= 0x02;
11094 if (Imm & 0x20) NewImm |= 0x08;
11095 if (Imm & 0x40) NewImm |= 0x20;
11096 return getI8Imm(NewImm, SDLoc(N));
11098 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11099 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11100 uint8_t Imm = N->getZExtValue();
11101 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11102 uint8_t NewImm = Imm & 0x81;
11103 if (Imm & 0x02) NewImm |= 0x10;
11104 if (Imm & 0x04) NewImm |= 0x02;
11105 if (Imm & 0x08) NewImm |= 0x20;
11106 if (Imm & 0x10) NewImm |= 0x04;
11107 if (Imm & 0x20) NewImm |= 0x40;
11108 if (Imm & 0x40) NewImm |= 0x08;
11109 return getI8Imm(NewImm, SDLoc(N));
11112 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11113 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11115 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11116 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11117 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11118 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11119 (OpNode (_.VT _.RC:$src1),
11122 (i8 imm:$src4)), 1, 1>,
11123 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11124 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11125 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11126 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11127 (OpNode (_.VT _.RC:$src1),
11129 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11130 (i8 imm:$src4)), 1, 0>,
11131 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11132 Sched<[sched.Folded, ReadAfterLd]>;
11133 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11134 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11135 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11136 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11137 (OpNode (_.VT _.RC:$src1),
11139 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11140 (i8 imm:$src4)), 1, 0>, EVEX_B,
11141 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11142 Sched<[sched.Folded, ReadAfterLd]>;
11143 }// Constraints = "$src1 = $dst"
11145 // Additional patterns for matching passthru operand in other positions.
11146 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11147 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11149 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11150 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11151 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11152 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11154 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11155 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11157 // Additional patterns for matching loads in other positions.
11158 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11159 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11160 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11161 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11162 def : Pat<(_.VT (OpNode _.RC:$src1,
11163 (bitconvert (_.LdFrag addr:$src3)),
11164 _.RC:$src2, (i8 imm:$src4))),
11165 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11166 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11168 // Additional patterns for matching zero masking with loads in other
11170 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11171 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11172 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11174 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11175 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11176 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11177 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11178 _.RC:$src2, (i8 imm:$src4)),
11180 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11181 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11183 // Additional patterns for matching masked loads with different
11185 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11186 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11187 _.RC:$src2, (i8 imm:$src4)),
11189 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11190 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11191 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11192 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11193 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11195 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11196 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11197 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11198 (OpNode _.RC:$src2, _.RC:$src1,
11199 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11201 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11202 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11203 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11204 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11205 _.RC:$src1, (i8 imm:$src4)),
11207 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11208 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11209 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11210 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11211 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11213 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11214 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11216 // Additional patterns for matching broadcasts in other positions.
11217 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11218 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11219 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11220 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11221 def : Pat<(_.VT (OpNode _.RC:$src1,
11222 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11223 _.RC:$src2, (i8 imm:$src4))),
11224 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11225 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11227 // Additional patterns for matching zero masking with broadcasts in other
11229 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11230 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11231 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11233 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11234 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11235 (VPTERNLOG321_imm8 imm:$src4))>;
11236 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11237 (OpNode _.RC:$src1,
11238 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11239 _.RC:$src2, (i8 imm:$src4)),
11241 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11242 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11243 (VPTERNLOG132_imm8 imm:$src4))>;
11245 // Additional patterns for matching masked broadcasts with different
11247 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11248 (OpNode _.RC:$src1,
11249 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11250 _.RC:$src2, (i8 imm:$src4)),
11252 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11253 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11254 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11255 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11256 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11258 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11259 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11260 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11261 (OpNode _.RC:$src2, _.RC:$src1,
11262 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11263 (i8 imm:$src4)), _.RC:$src1)),
11264 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11265 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11266 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11267 (OpNode _.RC:$src2,
11268 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11269 _.RC:$src1, (i8 imm:$src4)),
11271 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11272 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11273 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11274 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11275 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11277 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11278 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11281 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11282 AVX512VLVectorVTInfo _> {
11283 let Predicates = [HasAVX512] in
11284 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11285 _.info512, NAME>, EVEX_V512;
11286 let Predicates = [HasAVX512, HasVLX] in {
11287 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11288 _.info128, NAME>, EVEX_V128;
11289 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11290 _.info256, NAME>, EVEX_V256;
11294 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11295 avx512vl_i32_info>;
11296 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11297 avx512vl_i64_info>, VEX_W;
11299 // Patterns to implement vnot using vpternlog instead of creating all ones
11300 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11301 // so that the result is only dependent on src0. But we use the same source
11302 // for all operands to prevent a false dependency.
11303 // TODO: We should maybe have a more generalized algorithm for folding to
11305 let Predicates = [HasAVX512] in {
11306 def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
11307 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11310 let Predicates = [HasAVX512, NoVLX] in {
11311 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11314 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11315 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11316 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11317 (i8 15)), sub_xmm)>;
11318 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11321 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11322 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11323 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11324 (i8 15)), sub_ymm)>;
11327 let Predicates = [HasVLX] in {
11328 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11329 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11330 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11331 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11334 //===----------------------------------------------------------------------===//
11335 // AVX-512 - FixupImm
11336 //===----------------------------------------------------------------------===//
11338 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
11339 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11340 X86VectorVTInfo TblVT>{
11341 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11342 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11343 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11344 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11345 (OpNode (_.VT _.RC:$src1),
11347 (TblVT.VT _.RC:$src3),
11349 (i32 FROUND_CURRENT))>, Sched<[sched]>;
11350 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11351 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11352 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11353 (OpNode (_.VT _.RC:$src1),
11355 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11357 (i32 FROUND_CURRENT))>,
11358 Sched<[sched.Folded, ReadAfterLd]>;
11359 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11360 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11361 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11362 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11363 (OpNode (_.VT _.RC:$src1),
11365 (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
11367 (i32 FROUND_CURRENT))>,
11368 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
11369 } // Constraints = "$src1 = $dst"
11372 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11373 SDNode OpNode, X86FoldableSchedWrite sched,
11374 X86VectorVTInfo _, X86VectorVTInfo TblVT>{
11375 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11376 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11377 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11378 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11379 "$src2, $src3, {sae}, $src4",
11380 (OpNode (_.VT _.RC:$src1),
11382 (TblVT.VT _.RC:$src3),
11384 (i32 FROUND_NO_EXC))>,
11385 EVEX_B, Sched<[sched]>;
11389 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
11390 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11391 X86VectorVTInfo _src3VT> {
11392 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11393 ExeDomain = _.ExeDomain in {
11394 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11395 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11396 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11397 (OpNode (_.VT _.RC:$src1),
11399 (_src3VT.VT _src3VT.RC:$src3),
11401 (i32 FROUND_CURRENT))>, Sched<[sched]>;
11402 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11403 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11404 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11405 "$src2, $src3, {sae}, $src4",
11406 (OpNode (_.VT _.RC:$src1),
11408 (_src3VT.VT _src3VT.RC:$src3),
11410 (i32 FROUND_NO_EXC))>,
11411 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
11412 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11413 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11414 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11415 (OpNode (_.VT _.RC:$src1),
11417 (_src3VT.VT (scalar_to_vector
11418 (_src3VT.ScalarLdFrag addr:$src3))),
11420 (i32 FROUND_CURRENT))>,
11421 Sched<[sched.Folded, ReadAfterLd]>;
11425 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11426 AVX512VLVectorVTInfo _Vec,
11427 AVX512VLVectorVTInfo _Tbl> {
11428 let Predicates = [HasAVX512] in
11429 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11430 _Vec.info512, _Tbl.info512>,
11431 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11432 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11433 EVEX_4V, EVEX_V512;
11434 let Predicates = [HasAVX512, HasVLX] in {
11435 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
11436 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11437 EVEX_4V, EVEX_V128;
11438 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
11439 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11440 EVEX_4V, EVEX_V256;
11444 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11445 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11446 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11447 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11448 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11449 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11450 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11451 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11452 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11453 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11455 // Patterns used to select SSE scalar fp arithmetic instructions from
11458 // (1) a scalar fp operation followed by a blend
11460 // The effect is that the backend no longer emits unnecessary vector
11461 // insert instructions immediately after SSE scalar fp instructions
11462 // like addss or mulss.
11464 // For example, given the following code:
11465 // __m128 foo(__m128 A, __m128 B) {
11470 // Previously we generated:
11471 // addss %xmm0, %xmm1
11472 // movss %xmm1, %xmm0
11474 // We now generate:
11475 // addss %xmm1, %xmm0
11477 // (2) a vector packed single/double fp operation followed by a vector insert
11479 // The effect is that the backend converts the packed fp instruction
11480 // followed by a vector insert into a single SSE scalar fp instruction.
11482 // For example, given the following code:
11483 // __m128 foo(__m128 A, __m128 B) {
11484 // __m128 C = A + B;
11485 // return (__m128) {c[0], a[1], a[2], a[3]};
11488 // Previously we generated:
11489 // addps %xmm0, %xmm1
11490 // movss %xmm1, %xmm0
11492 // We now generate:
11493 // addss %xmm1, %xmm0
11495 // TODO: Some canonicalization in lowering would simplify the number of
11496 // patterns we have to try to match.
11497 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11498 X86VectorVTInfo _, PatLeaf ZeroFP> {
11499 let Predicates = [HasAVX512] in {
11500 // extracted scalar math op with insert via movss
11501 def : Pat<(MoveNode
11502 (_.VT VR128X:$dst),
11503 (_.VT (scalar_to_vector
11504 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11506 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11507 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11509 // extracted masked scalar math op with insert via movss
11510 def : Pat<(MoveNode (_.VT VR128X:$src1),
11512 (X86selects VK1WM:$mask,
11514 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11517 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11518 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11519 VK1WM:$mask, _.VT:$src1,
11520 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11522 // extracted masked scalar math op with insert via movss
11523 def : Pat<(MoveNode (_.VT VR128X:$src1),
11525 (X86selects VK1WM:$mask,
11527 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11528 _.FRC:$src2), (_.EltVT ZeroFP)))),
11529 (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11530 VK1WM:$mask, _.VT:$src1,
11531 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11535 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11536 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11537 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11538 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11540 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11541 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11542 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11543 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11545 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11546 SDNode Move, X86VectorVTInfo _> {
11547 let Predicates = [HasAVX512] in {
11548 def : Pat<(_.VT (Move _.VT:$dst,
11549 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11550 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11554 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11555 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11557 multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix,
11558 SDNode Move, X86VectorVTInfo _,
11560 let Predicates = [HasAVX512] in {
11561 def : Pat<(_.VT (Move _.VT:$dst,
11562 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11563 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src,
11568 defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss,
11569 v4f32x_info, 0x01>;
11570 defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss,
11571 v4f32x_info, 0x02>;
11572 defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd,
11573 v2f64x_info, 0x01>;
11574 defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd,
11575 v2f64x_info, 0x02>;
11577 //===----------------------------------------------------------------------===//
11578 // AES instructions
11579 //===----------------------------------------------------------------------===//
11581 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11582 let Predicates = [HasVLX, HasVAES] in {
11583 defm Z128 : AESI_binop_rm_int<Op, OpStr,
11584 !cast<Intrinsic>(IntPrefix),
11585 loadv2i64, 0, VR128X, i128mem>,
11586 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11587 defm Z256 : AESI_binop_rm_int<Op, OpStr,
11588 !cast<Intrinsic>(IntPrefix##"_256"),
11589 loadv4i64, 0, VR256X, i256mem>,
11590 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11592 let Predicates = [HasAVX512, HasVAES] in
11593 defm Z : AESI_binop_rm_int<Op, OpStr,
11594 !cast<Intrinsic>(IntPrefix##"_512"),
11595 loadv8i64, 0, VR512, i512mem>,
11596 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11599 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11600 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11601 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11602 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11604 //===----------------------------------------------------------------------===//
11605 // PCLMUL instructions - Carry less multiplication
11606 //===----------------------------------------------------------------------===//
11608 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11609 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11610 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11612 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11613 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11614 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11616 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11617 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11618 EVEX_CD8<64, CD8VF>, VEX_WIG;
11622 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11623 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11624 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11626 //===----------------------------------------------------------------------===//
11628 //===----------------------------------------------------------------------===//
11630 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11631 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11632 let Constraints = "$src1 = $dst",
11633 ExeDomain = VTI.ExeDomain in {
11634 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11635 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11636 "$src3, $src2", "$src2, $src3",
11637 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11638 AVX512FMA3Base, Sched<[sched]>;
11639 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11640 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11641 "$src3, $src2", "$src2, $src3",
11642 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11643 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
11645 Sched<[sched.Folded, ReadAfterLd]>;
11649 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11650 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11651 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11652 let Constraints = "$src1 = $dst",
11653 ExeDomain = VTI.ExeDomain in
11654 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11655 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11656 "${src3}"##VTI.BroadcastStr##", $src2",
11657 "$src2, ${src3}"##VTI.BroadcastStr,
11658 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11659 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
11660 AVX512FMA3Base, EVEX_B,
11661 Sched<[sched.Folded, ReadAfterLd]>;
11664 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11665 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11666 let Predicates = [HasVBMI2] in
11667 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11669 let Predicates = [HasVBMI2, HasVLX] in {
11670 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11672 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11677 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11678 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11679 let Predicates = [HasVBMI2] in
11680 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11682 let Predicates = [HasVBMI2, HasVLX] in {
11683 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11685 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11689 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11690 SDNode OpNode, X86SchedWriteWidths sched> {
11691 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11692 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11693 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11694 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11695 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11696 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11699 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11700 SDNode OpNode, X86SchedWriteWidths sched> {
11701 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11702 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11703 VEX_W, EVEX_CD8<16, CD8VF>;
11704 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11705 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11706 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11707 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11711 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11712 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11713 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11714 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11717 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11718 avx512vl_i8_info, HasVBMI2>, EVEX,
11720 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11721 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11724 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11725 avx512vl_i8_info, HasVBMI2>, EVEX;
11726 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11727 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11729 //===----------------------------------------------------------------------===//
11731 //===----------------------------------------------------------------------===//
11733 let Constraints = "$src1 = $dst" in
11734 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11735 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11736 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11737 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11738 "$src3, $src2", "$src2, $src3",
11739 (VTI.VT (OpNode VTI.RC:$src1,
11740 VTI.RC:$src2, VTI.RC:$src3))>,
11741 EVEX_4V, T8PD, Sched<[sched]>;
11742 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11743 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11744 "$src3, $src2", "$src2, $src3",
11745 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11746 (VTI.VT (bitconvert
11747 (VTI.LdFrag addr:$src3)))))>,
11748 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11749 Sched<[sched.Folded, ReadAfterLd]>;
11750 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11751 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11752 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11753 "$src2, ${src3}"##VTI.BroadcastStr,
11754 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11755 (VTI.VT (X86VBroadcast
11756 (VTI.ScalarLdFrag addr:$src3))))>,
11757 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11758 T8PD, Sched<[sched.Folded, ReadAfterLd]>;
11761 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11762 X86SchedWriteWidths sched> {
11763 let Predicates = [HasVNNI] in
11764 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512;
11765 let Predicates = [HasVNNI, HasVLX] in {
11766 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256;
11767 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128;
11771 // FIXME: Is there a better scheduler class for VPDP?
11772 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>;
11773 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>;
11774 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>;
11775 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>;
11777 //===----------------------------------------------------------------------===//
11779 //===----------------------------------------------------------------------===//
11781 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11782 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11783 avx512vl_i8_info, HasBITALG>;
11784 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11785 avx512vl_i16_info, HasBITALG>, VEX_W;
11787 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11788 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11790 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11791 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11792 (ins VTI.RC:$src1, VTI.RC:$src2),
11794 "$src2, $src1", "$src1, $src2",
11795 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11796 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11798 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11799 (ins VTI.RC:$src1, VTI.MemOp:$src2),
11801 "$src2, $src1", "$src1, $src2",
11802 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11803 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
11804 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11805 Sched<[sched.Folded, ReadAfterLd]>;
11808 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11809 let Predicates = [HasBITALG] in
11810 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11811 let Predicates = [HasBITALG, HasVLX] in {
11812 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11813 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11817 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11818 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11820 //===----------------------------------------------------------------------===//
11822 //===----------------------------------------------------------------------===//
11824 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11825 X86SchedWriteWidths sched> {
11826 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11827 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11829 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11830 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11832 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11837 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11839 EVEX_CD8<8, CD8VF>, T8PD;
11841 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11842 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11843 X86VectorVTInfo BcstVTI>
11844 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
11845 let ExeDomain = VTI.ExeDomain in
11846 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11847 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
11848 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
11849 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
11850 (OpNode (VTI.VT VTI.RC:$src1),
11851 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
11852 (i8 imm:$src3))>, EVEX_B,
11853 Sched<[sched.Folded, ReadAfterLd]>;
11856 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11857 X86SchedWriteWidths sched> {
11858 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11859 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
11860 v64i8_info, v8i64_info>, EVEX_V512;
11861 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11862 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
11863 v32i8x_info, v4i64x_info>, EVEX_V256;
11864 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
11865 v16i8x_info, v2i64x_info>, EVEX_V128;
11869 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
11870 X86GF2P8affineinvqb, SchedWriteVecIMul>,
11871 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11872 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
11873 X86GF2P8affineqb, SchedWriteVecIMul>,
11874 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11877 //===----------------------------------------------------------------------===//
11879 //===----------------------------------------------------------------------===//
11881 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
11882 Constraints = "$src1 = $dst" in {
11883 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
11884 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11885 "v4fmaddps", "$src3, $src2", "$src2, $src3",
11886 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11887 Sched<[SchedWriteFMA.ZMM.Folded]>;
11889 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
11890 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11891 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
11892 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11893 Sched<[SchedWriteFMA.ZMM.Folded]>;
11895 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
11896 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
11897 "v4fmaddss", "$src3, $src2", "$src2, $src3",
11898 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11899 Sched<[SchedWriteFMA.Scl.Folded]>;
11901 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
11902 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
11903 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
11904 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11905 Sched<[SchedWriteFMA.Scl.Folded]>;
11908 //===----------------------------------------------------------------------===//
11910 //===----------------------------------------------------------------------===//
11912 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
11913 Constraints = "$src1 = $dst" in {
11914 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
11915 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11916 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
11917 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11918 Sched<[SchedWriteFMA.ZMM.Folded]>;
11920 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
11921 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11922 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
11923 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11924 Sched<[SchedWriteFMA.ZMM.Folded]>;