1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT). These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22 RegisterClass RC = rc;
23 ValueType EltVT = eltvt;
24 int NumElts = numelts;
26 // Corresponding mask register class.
27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29 // Corresponding mask register pair class.
30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31 !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33 // Corresponding write-mask register class.
34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39 // Suffix used in the instruction mnemonic.
40 string Suffix = suffix;
42 // VTName is a string name for vector VT. For vector types it will be
43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44 // It is a little bit complex for scalar types, where NumElts = 1.
45 // In this case we build v4f32 or v2f64
46 string VTName = "v" # !if (!eq (NumElts, 1),
47 !if (!eq (EltVT.Size, 32), 4,
48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
51 ValueType VT = !cast<ValueType>(VTName);
53 string EltTypeName = !cast<string>(EltVT);
54 // Size of the element type in bits, e.g. 32 for v16i32.
55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56 int EltSize = EltVT.Size;
58 // "i" for integer types and "f" for floating-point types
59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61 // Size of RC in bits, e.g. 512 for VR512.
64 // The corresponding memory operand, e.g. i512mem for VR512.
65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67 // FP scalar memory operand for intrinsics - ssmem/sdmem.
68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
78 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
79 !cast<ComplexPattern>("sse_load_f32"),
80 !if (!eq (EltTypeName, "f64"),
81 !cast<ComplexPattern>("sse_load_f64"),
84 // The string to specify embedded broadcast in assembly.
85 string BroadcastStr = "{1to" # NumElts # "}";
87 // 8-bit compressed displacement tuple/subvector format. This is only
88 // defined for NumElts <= 8.
89 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
90 !cast<CD8VForm>("CD8VT" # NumElts), ?);
92 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
93 !if (!eq (Size, 256), sub_ymm, ?));
95 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
96 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
99 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
101 dag ImmAllZerosV = (VT immAllZerosV);
103 string ZSuffix = !if (!eq (Size, 128), "Z128",
104 !if (!eq (Size, 256), "Z256", "Z"));
107 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
108 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
109 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
110 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
111 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
112 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
114 // "x" in v32i8x_info means RC = VR256X
115 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
116 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
117 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
118 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
119 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
120 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
122 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
123 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
124 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
125 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
126 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
127 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
129 // We map scalar types to the smallest (128-bit) vector type
130 // with the appropriate element type. This allows to use the same masking logic.
131 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
132 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
133 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
134 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
136 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
137 X86VectorVTInfo i128> {
138 X86VectorVTInfo info512 = i512;
139 X86VectorVTInfo info256 = i256;
140 X86VectorVTInfo info128 = i128;
143 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
156 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158 RegisterClass KRC = _krc;
159 RegisterClass KRCWM = _krcwm;
163 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
164 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
165 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
166 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
167 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
168 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
169 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171 // This multiclass generates the masking variants from the non-masking
172 // variant. It only provides the assembly pieces for the masking variants.
173 // It assumes custom ISel patterns for masking which can be provided as
174 // template arguments.
175 multiclass AVX512_maskable_custom<bits<8> O, Format F,
177 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
179 string AttSrcAsm, string IntelSrcAsm,
181 list<dag> MaskingPattern,
182 list<dag> ZeroMaskingPattern,
183 string MaskingConstraint = "",
184 bit IsCommutable = 0,
185 bit IsKCommutable = 0,
186 bit IsKZCommutable = IsCommutable> {
187 let isCommutable = IsCommutable in
188 def NAME: AVX512<O, F, Outs, Ins,
189 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
190 "$dst, "#IntelSrcAsm#"}",
193 // Prefer over VMOV*rrk Pat<>
194 let isCommutable = IsKCommutable in
195 def NAME#k: AVX512<O, F, Outs, MaskingIns,
196 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
197 "$dst {${mask}}, "#IntelSrcAsm#"}",
200 // In case of the 3src subclass this is overridden with a let.
201 string Constraints = MaskingConstraint;
204 // Zero mask does not add any restrictions to commute operands transformation.
205 // So, it is Ok to use IsCommutable instead of IsKCommutable.
206 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
207 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
208 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
209 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
215 // Common base class of AVX512_maskable and AVX512_maskable_3src.
216 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
218 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
220 string AttSrcAsm, string IntelSrcAsm,
221 dag RHS, dag MaskingRHS,
222 SDNode Select = vselect,
223 string MaskingConstraint = "",
224 bit IsCommutable = 0,
225 bit IsKCommutable = 0,
226 bit IsKZCommutable = IsCommutable> :
227 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
228 AttSrcAsm, IntelSrcAsm,
229 [(set _.RC:$dst, RHS)],
230 [(set _.RC:$dst, MaskingRHS)],
232 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
233 MaskingConstraint, IsCommutable,
234 IsKCommutable, IsKZCommutable>;
236 // This multiclass generates the unconditional/non-masking, the masking and
237 // the zero-masking variant of the vector instruction. In the masking case, the
238 // perserved vector elements come from a new dummy input operand tied to $dst.
239 // This version uses a separate dag for non-masking and masking.
240 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
241 dag Outs, dag Ins, string OpcodeStr,
242 string AttSrcAsm, string IntelSrcAsm,
243 dag RHS, dag MaskRHS,
244 bit IsCommutable = 0, bit IsKCommutable = 0,
245 SDNode Select = vselect> :
246 AVX512_maskable_custom<O, F, Outs, Ins,
247 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
248 !con((ins _.KRCWM:$mask), Ins),
249 OpcodeStr, AttSrcAsm, IntelSrcAsm,
250 [(set _.RC:$dst, RHS)],
252 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
254 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
255 "$src0 = $dst", IsCommutable, IsKCommutable>;
257 // This multiclass generates the unconditional/non-masking, the masking and
258 // the zero-masking variant of the vector instruction. In the masking case, the
259 // perserved vector elements come from a new dummy input operand tied to $dst.
260 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
261 dag Outs, dag Ins, string OpcodeStr,
262 string AttSrcAsm, string IntelSrcAsm,
264 bit IsCommutable = 0, bit IsKCommutable = 0,
265 bit IsKZCommutable = IsCommutable,
266 SDNode Select = vselect> :
267 AVX512_maskable_common<O, F, _, Outs, Ins,
268 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
269 !con((ins _.KRCWM:$mask), Ins),
270 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
271 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
272 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
275 // This multiclass generates the unconditional/non-masking, the masking and
276 // the zero-masking variant of the scalar instruction.
277 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
278 dag Outs, dag Ins, string OpcodeStr,
279 string AttSrcAsm, string IntelSrcAsm,
281 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
282 RHS, 0, 0, 0, X86selects>;
284 // Similar to AVX512_maskable but in this case one of the source operands
285 // ($src1) is already tied to $dst so we just use that for the preserved
286 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
288 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
289 dag Outs, dag NonTiedIns, string OpcodeStr,
290 string AttSrcAsm, string IntelSrcAsm,
292 bit IsCommutable = 0,
293 bit IsKCommutable = 0,
294 SDNode Select = vselect,
296 AVX512_maskable_common<O, F, _, Outs,
297 !con((ins _.RC:$src1), NonTiedIns),
298 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
299 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300 OpcodeStr, AttSrcAsm, IntelSrcAsm,
301 !if(MaskOnly, (null_frag), RHS),
302 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
303 Select, "", IsCommutable, IsKCommutable>;
305 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
306 // operand differs from the output VT. This requires a bitconvert on
307 // the preserved vector going into the vselect.
308 // NOTE: The unmasked pattern is disabled.
309 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
310 X86VectorVTInfo InVT,
311 dag Outs, dag NonTiedIns, string OpcodeStr,
312 string AttSrcAsm, string IntelSrcAsm,
313 dag RHS, bit IsCommutable = 0> :
314 AVX512_maskable_common<O, F, OutVT, Outs,
315 !con((ins InVT.RC:$src1), NonTiedIns),
316 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
317 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
319 (vselect InVT.KRCWM:$mask, RHS,
320 (bitconvert InVT.RC:$src1)),
321 vselect, "", IsCommutable>;
323 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
324 dag Outs, dag NonTiedIns, string OpcodeStr,
325 string AttSrcAsm, string IntelSrcAsm,
327 bit IsCommutable = 0,
328 bit IsKCommutable = 0,
330 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
331 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
332 X86selects, MaskOnly>;
334 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
337 string AttSrcAsm, string IntelSrcAsm,
339 AVX512_maskable_custom<O, F, Outs, Ins,
340 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
341 !con((ins _.KRCWM:$mask), Ins),
342 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
345 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
346 dag Outs, dag NonTiedIns,
348 string AttSrcAsm, string IntelSrcAsm,
350 AVX512_maskable_custom<O, F, Outs,
351 !con((ins _.RC:$src1), NonTiedIns),
352 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
353 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
357 // Instruction with mask that puts result in mask register,
358 // like "compare" and "vptest"
359 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
361 dag Ins, dag MaskingIns,
363 string AttSrcAsm, string IntelSrcAsm,
365 list<dag> MaskingPattern,
366 bit IsCommutable = 0> {
367 let isCommutable = IsCommutable in {
368 def NAME: AVX512<O, F, Outs, Ins,
369 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
370 "$dst, "#IntelSrcAsm#"}",
373 def NAME#k: AVX512<O, F, Outs, MaskingIns,
374 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
375 "$dst {${mask}}, "#IntelSrcAsm#"}",
376 MaskingPattern>, EVEX_K;
380 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
382 dag Ins, dag MaskingIns,
384 string AttSrcAsm, string IntelSrcAsm,
385 dag RHS, dag MaskingRHS,
386 bit IsCommutable = 0> :
387 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
388 AttSrcAsm, IntelSrcAsm,
389 [(set _.KRC:$dst, RHS)],
390 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
392 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
393 dag Outs, dag Ins, string OpcodeStr,
394 string AttSrcAsm, string IntelSrcAsm,
395 dag RHS, dag RHS_su, bit IsCommutable = 0> :
396 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
397 !con((ins _.KRCWM:$mask), Ins),
398 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
399 (and _.KRCWM:$mask, RHS_su), IsCommutable>;
402 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
403 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
404 // swizzled by ExecutionDomainFix to pxor.
405 // We set canFoldAsLoad because this can be converted to a constant-pool
406 // load of an all-zeros value if folding it would be beneficial.
407 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
408 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
409 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
410 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
411 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
412 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
415 // Alias instructions that allow VPTERNLOG to be used with a mask to create
416 // a mix of all ones and all zeros elements. This is done this way to force
417 // the same register to be used as input for all three sources.
418 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
419 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
420 (ins VK16WM:$mask), "",
421 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
422 (v16i32 immAllOnesV),
423 (v16i32 immAllZerosV)))]>;
424 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
425 (ins VK8WM:$mask), "",
426 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
428 (v8i64 immAllZerosV)))]>;
431 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
432 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
433 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
434 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
435 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
436 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
439 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
440 // This is expanded by ExpandPostRAPseudos.
441 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
442 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
443 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
444 [(set FR32X:$dst, fp32imm0)]>;
445 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
446 [(set FR64X:$dst, fpimm0)]>;
449 //===----------------------------------------------------------------------===//
450 // AVX-512 - VECTOR INSERT
453 // Supports two different pattern operators for mask and unmasked ops. Allows
454 // null_frag to be passed for one.
455 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
457 SDPatternOperator vinsert_insert,
458 SDPatternOperator vinsert_for_mask,
459 X86FoldableSchedWrite sched> {
460 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
461 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
462 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
463 "vinsert" # From.EltTypeName # "x" # From.NumElts,
464 "$src3, $src2, $src1", "$src1, $src2, $src3",
465 (vinsert_insert:$src3 (To.VT To.RC:$src1),
466 (From.VT From.RC:$src2),
468 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
469 (From.VT From.RC:$src2),
471 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
473 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
474 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
475 "vinsert" # From.EltTypeName # "x" # From.NumElts,
476 "$src3, $src2, $src1", "$src1, $src2, $src3",
477 (vinsert_insert:$src3 (To.VT To.RC:$src1),
478 (From.VT (From.LdFrag addr:$src2)),
480 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
481 (From.VT (From.LdFrag addr:$src2)),
482 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
483 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
484 Sched<[sched.Folded, sched.ReadAfterFold]>;
488 // Passes the same pattern operator for masked and unmasked ops.
489 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
491 SDPatternOperator vinsert_insert,
492 X86FoldableSchedWrite sched> :
493 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
495 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
496 X86VectorVTInfo To, PatFrag vinsert_insert,
497 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
498 let Predicates = p in {
499 def : Pat<(vinsert_insert:$ins
500 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
501 (To.VT (!cast<Instruction>(InstrStr#"rr")
502 To.RC:$src1, From.RC:$src2,
503 (INSERT_get_vinsert_imm To.RC:$ins)))>;
505 def : Pat<(vinsert_insert:$ins
507 (From.VT (From.LdFrag addr:$src2)),
509 (To.VT (!cast<Instruction>(InstrStr#"rm")
510 To.RC:$src1, addr:$src2,
511 (INSERT_get_vinsert_imm To.RC:$ins)))>;
515 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
516 ValueType EltVT64, int Opcode256,
517 X86FoldableSchedWrite sched> {
519 let Predicates = [HasVLX] in
520 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
521 X86VectorVTInfo< 4, EltVT32, VR128X>,
522 X86VectorVTInfo< 8, EltVT32, VR256X>,
523 vinsert128_insert, sched>, EVEX_V256;
525 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
526 X86VectorVTInfo< 4, EltVT32, VR128X>,
527 X86VectorVTInfo<16, EltVT32, VR512>,
528 vinsert128_insert, sched>, EVEX_V512;
530 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
531 X86VectorVTInfo< 4, EltVT64, VR256X>,
532 X86VectorVTInfo< 8, EltVT64, VR512>,
533 vinsert256_insert, sched>, VEX_W, EVEX_V512;
535 // Even with DQI we'd like to only use these instructions for masking.
536 let Predicates = [HasVLX, HasDQI] in
537 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
538 X86VectorVTInfo< 2, EltVT64, VR128X>,
539 X86VectorVTInfo< 4, EltVT64, VR256X>,
540 null_frag, vinsert128_insert, sched>,
543 // Even with DQI we'd like to only use these instructions for masking.
544 let Predicates = [HasDQI] in {
545 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
546 X86VectorVTInfo< 2, EltVT64, VR128X>,
547 X86VectorVTInfo< 8, EltVT64, VR512>,
548 null_frag, vinsert128_insert, sched>,
551 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
552 X86VectorVTInfo< 8, EltVT32, VR256X>,
553 X86VectorVTInfo<16, EltVT32, VR512>,
554 null_frag, vinsert256_insert, sched>,
559 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
560 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
561 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
563 // Codegen pattern with the alternative types,
564 // Even with AVX512DQ we'll still use these for unmasked operations.
565 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
566 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
567 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
568 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
570 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
571 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
572 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
573 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
575 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
576 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
577 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
578 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
580 // Codegen pattern with the alternative types insert VEC128 into VEC256
581 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
582 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
583 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
584 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
585 // Codegen pattern with the alternative types insert VEC128 into VEC512
586 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
587 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
588 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
589 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
590 // Codegen pattern with the alternative types insert VEC256 into VEC512
591 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
592 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
593 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
594 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
597 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
598 X86VectorVTInfo To, X86VectorVTInfo Cast,
599 PatFrag vinsert_insert,
600 SDNodeXForm INSERT_get_vinsert_imm,
602 let Predicates = p in {
604 (vselect Cast.KRCWM:$mask,
606 (vinsert_insert:$ins (To.VT To.RC:$src1),
607 (From.VT From.RC:$src2),
610 (!cast<Instruction>(InstrStr#"rrk")
611 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
612 (INSERT_get_vinsert_imm To.RC:$ins))>;
614 (vselect Cast.KRCWM:$mask,
616 (vinsert_insert:$ins (To.VT To.RC:$src1),
619 (From.LdFrag addr:$src2))),
622 (!cast<Instruction>(InstrStr#"rmk")
623 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
624 (INSERT_get_vinsert_imm To.RC:$ins))>;
627 (vselect Cast.KRCWM:$mask,
629 (vinsert_insert:$ins (To.VT To.RC:$src1),
630 (From.VT From.RC:$src2),
633 (!cast<Instruction>(InstrStr#"rrkz")
634 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
635 (INSERT_get_vinsert_imm To.RC:$ins))>;
637 (vselect Cast.KRCWM:$mask,
639 (vinsert_insert:$ins (To.VT To.RC:$src1),
640 (From.VT (From.LdFrag addr:$src2)),
643 (!cast<Instruction>(InstrStr#"rmkz")
644 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
645 (INSERT_get_vinsert_imm To.RC:$ins))>;
649 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
650 v8f32x_info, vinsert128_insert,
651 INSERT_get_vinsert128_imm, [HasVLX]>;
652 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
653 v4f64x_info, vinsert128_insert,
654 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
656 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
657 v8i32x_info, vinsert128_insert,
658 INSERT_get_vinsert128_imm, [HasVLX]>;
659 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
660 v8i32x_info, vinsert128_insert,
661 INSERT_get_vinsert128_imm, [HasVLX]>;
662 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
663 v8i32x_info, vinsert128_insert,
664 INSERT_get_vinsert128_imm, [HasVLX]>;
665 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
666 v4i64x_info, vinsert128_insert,
667 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
668 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
669 v4i64x_info, vinsert128_insert,
670 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
671 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
672 v4i64x_info, vinsert128_insert,
673 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
675 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
676 v16f32_info, vinsert128_insert,
677 INSERT_get_vinsert128_imm, [HasAVX512]>;
678 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
679 v8f64_info, vinsert128_insert,
680 INSERT_get_vinsert128_imm, [HasDQI]>;
682 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
683 v16i32_info, vinsert128_insert,
684 INSERT_get_vinsert128_imm, [HasAVX512]>;
685 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
686 v16i32_info, vinsert128_insert,
687 INSERT_get_vinsert128_imm, [HasAVX512]>;
688 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
689 v16i32_info, vinsert128_insert,
690 INSERT_get_vinsert128_imm, [HasAVX512]>;
691 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
692 v8i64_info, vinsert128_insert,
693 INSERT_get_vinsert128_imm, [HasDQI]>;
694 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
695 v8i64_info, vinsert128_insert,
696 INSERT_get_vinsert128_imm, [HasDQI]>;
697 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
698 v8i64_info, vinsert128_insert,
699 INSERT_get_vinsert128_imm, [HasDQI]>;
701 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
702 v16f32_info, vinsert256_insert,
703 INSERT_get_vinsert256_imm, [HasDQI]>;
704 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
705 v8f64_info, vinsert256_insert,
706 INSERT_get_vinsert256_imm, [HasAVX512]>;
708 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
709 v16i32_info, vinsert256_insert,
710 INSERT_get_vinsert256_imm, [HasDQI]>;
711 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
712 v16i32_info, vinsert256_insert,
713 INSERT_get_vinsert256_imm, [HasDQI]>;
714 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
715 v16i32_info, vinsert256_insert,
716 INSERT_get_vinsert256_imm, [HasDQI]>;
717 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
718 v8i64_info, vinsert256_insert,
719 INSERT_get_vinsert256_imm, [HasAVX512]>;
720 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
721 v8i64_info, vinsert256_insert,
722 INSERT_get_vinsert256_imm, [HasAVX512]>;
723 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
724 v8i64_info, vinsert256_insert,
725 INSERT_get_vinsert256_imm, [HasAVX512]>;
727 // vinsertps - insert f32 to XMM
728 let ExeDomain = SSEPackedSingle in {
729 let isCommutable = 1 in
730 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
731 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
732 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
733 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
734 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
735 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
736 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
737 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
738 [(set VR128X:$dst, (X86insertps VR128X:$src1,
739 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
741 EVEX_4V, EVEX_CD8<32, CD8VT1>,
742 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
745 //===----------------------------------------------------------------------===//
746 // AVX-512 VECTOR EXTRACT
749 // Supports two different pattern operators for mask and unmasked ops. Allows
750 // null_frag to be passed for one.
751 multiclass vextract_for_size_split<int Opcode,
752 X86VectorVTInfo From, X86VectorVTInfo To,
753 SDPatternOperator vextract_extract,
754 SDPatternOperator vextract_for_mask,
755 SchedWrite SchedRR, SchedWrite SchedMR> {
757 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
758 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
759 (ins From.RC:$src1, u8imm:$idx),
760 "vextract" # To.EltTypeName # "x" # To.NumElts,
761 "$idx, $src1", "$src1, $idx",
762 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
763 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
764 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
766 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
767 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
768 "vextract" # To.EltTypeName # "x" # To.NumElts #
769 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
770 [(store (To.VT (vextract_extract:$idx
771 (From.VT From.RC:$src1), (iPTR imm))),
775 let mayStore = 1, hasSideEffects = 0 in
776 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
777 (ins To.MemOp:$dst, To.KRCWM:$mask,
778 From.RC:$src1, u8imm:$idx),
779 "vextract" # To.EltTypeName # "x" # To.NumElts #
780 "\t{$idx, $src1, $dst {${mask}}|"
781 "$dst {${mask}}, $src1, $idx}", []>,
782 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
786 // Passes the same pattern operator for masked and unmasked ops.
787 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
789 SDPatternOperator vextract_extract,
790 SchedWrite SchedRR, SchedWrite SchedMR> :
791 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
793 // Codegen pattern for the alternative types
794 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
795 X86VectorVTInfo To, PatFrag vextract_extract,
796 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
797 let Predicates = p in {
798 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
799 (To.VT (!cast<Instruction>(InstrStr#"rr")
801 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
802 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
803 (iPTR imm))), addr:$dst),
804 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
805 (EXTRACT_get_vextract_imm To.RC:$ext))>;
809 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
810 ValueType EltVT64, int Opcode256,
811 SchedWrite SchedRR, SchedWrite SchedMR> {
812 let Predicates = [HasAVX512] in {
813 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
814 X86VectorVTInfo<16, EltVT32, VR512>,
815 X86VectorVTInfo< 4, EltVT32, VR128X>,
816 vextract128_extract, SchedRR, SchedMR>,
817 EVEX_V512, EVEX_CD8<32, CD8VT4>;
818 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
819 X86VectorVTInfo< 8, EltVT64, VR512>,
820 X86VectorVTInfo< 4, EltVT64, VR256X>,
821 vextract256_extract, SchedRR, SchedMR>,
822 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
824 let Predicates = [HasVLX] in
825 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
826 X86VectorVTInfo< 8, EltVT32, VR256X>,
827 X86VectorVTInfo< 4, EltVT32, VR128X>,
828 vextract128_extract, SchedRR, SchedMR>,
829 EVEX_V256, EVEX_CD8<32, CD8VT4>;
831 // Even with DQI we'd like to only use these instructions for masking.
832 let Predicates = [HasVLX, HasDQI] in
833 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
834 X86VectorVTInfo< 4, EltVT64, VR256X>,
835 X86VectorVTInfo< 2, EltVT64, VR128X>,
836 null_frag, vextract128_extract, SchedRR, SchedMR>,
837 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
839 // Even with DQI we'd like to only use these instructions for masking.
840 let Predicates = [HasDQI] in {
841 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
842 X86VectorVTInfo< 8, EltVT64, VR512>,
843 X86VectorVTInfo< 2, EltVT64, VR128X>,
844 null_frag, vextract128_extract, SchedRR, SchedMR>,
845 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
846 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
847 X86VectorVTInfo<16, EltVT32, VR512>,
848 X86VectorVTInfo< 8, EltVT32, VR256X>,
849 null_frag, vextract256_extract, SchedRR, SchedMR>,
850 EVEX_V512, EVEX_CD8<32, CD8VT8>;
854 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
855 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
856 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
858 // extract_subvector codegen patterns with the alternative types.
859 // Even with AVX512DQ we'll still use these for unmasked operations.
860 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
861 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
862 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
863 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
865 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
866 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
867 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
868 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
870 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
871 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
872 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
873 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
875 // Codegen pattern with the alternative types extract VEC128 from VEC256
876 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
877 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
878 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
879 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
881 // Codegen pattern with the alternative types extract VEC128 from VEC512
882 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
883 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
884 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
885 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
886 // Codegen pattern with the alternative types extract VEC256 from VEC512
887 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
888 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
889 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
890 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
893 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
894 // smaller extract to enable EVEX->VEX.
895 let Predicates = [NoVLX] in {
896 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
897 (v2i64 (VEXTRACTI128rr
898 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
900 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
901 (v2f64 (VEXTRACTF128rr
902 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
904 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
905 (v4i32 (VEXTRACTI128rr
906 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
908 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
909 (v4f32 (VEXTRACTF128rr
910 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
912 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
913 (v8i16 (VEXTRACTI128rr
914 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
916 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
917 (v16i8 (VEXTRACTI128rr
918 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
922 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
923 // smaller extract to enable EVEX->VEX.
924 let Predicates = [HasVLX] in {
925 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
926 (v2i64 (VEXTRACTI32x4Z256rr
927 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
929 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
930 (v2f64 (VEXTRACTF32x4Z256rr
931 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
933 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
934 (v4i32 (VEXTRACTI32x4Z256rr
935 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
937 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
938 (v4f32 (VEXTRACTF32x4Z256rr
939 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
941 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
942 (v8i16 (VEXTRACTI32x4Z256rr
943 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
945 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
946 (v16i8 (VEXTRACTI32x4Z256rr
947 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
952 // Additional patterns for handling a bitcast between the vselect and the
953 // extract_subvector.
954 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
955 X86VectorVTInfo To, X86VectorVTInfo Cast,
956 PatFrag vextract_extract,
957 SDNodeXForm EXTRACT_get_vextract_imm,
959 let Predicates = p in {
960 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
962 (To.VT (vextract_extract:$ext
963 (From.VT From.RC:$src), (iPTR imm)))),
965 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
966 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
967 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
969 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
971 (To.VT (vextract_extract:$ext
972 (From.VT From.RC:$src), (iPTR imm)))),
974 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
975 Cast.KRCWM:$mask, From.RC:$src,
976 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
980 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
981 v4f32x_info, vextract128_extract,
982 EXTRACT_get_vextract128_imm, [HasVLX]>;
983 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
984 v2f64x_info, vextract128_extract,
985 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
987 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
988 v4i32x_info, vextract128_extract,
989 EXTRACT_get_vextract128_imm, [HasVLX]>;
990 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
991 v4i32x_info, vextract128_extract,
992 EXTRACT_get_vextract128_imm, [HasVLX]>;
993 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
994 v4i32x_info, vextract128_extract,
995 EXTRACT_get_vextract128_imm, [HasVLX]>;
996 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
997 v2i64x_info, vextract128_extract,
998 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
999 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1000 v2i64x_info, vextract128_extract,
1001 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1002 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1003 v2i64x_info, vextract128_extract,
1004 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1006 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1007 v4f32x_info, vextract128_extract,
1008 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1009 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1010 v2f64x_info, vextract128_extract,
1011 EXTRACT_get_vextract128_imm, [HasDQI]>;
1013 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1014 v4i32x_info, vextract128_extract,
1015 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1016 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1017 v4i32x_info, vextract128_extract,
1018 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1019 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1020 v4i32x_info, vextract128_extract,
1021 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1022 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1023 v2i64x_info, vextract128_extract,
1024 EXTRACT_get_vextract128_imm, [HasDQI]>;
1025 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1026 v2i64x_info, vextract128_extract,
1027 EXTRACT_get_vextract128_imm, [HasDQI]>;
1028 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1029 v2i64x_info, vextract128_extract,
1030 EXTRACT_get_vextract128_imm, [HasDQI]>;
1032 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1033 v8f32x_info, vextract256_extract,
1034 EXTRACT_get_vextract256_imm, [HasDQI]>;
1035 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1036 v4f64x_info, vextract256_extract,
1037 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1039 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1040 v8i32x_info, vextract256_extract,
1041 EXTRACT_get_vextract256_imm, [HasDQI]>;
1042 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1043 v8i32x_info, vextract256_extract,
1044 EXTRACT_get_vextract256_imm, [HasDQI]>;
1045 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1046 v8i32x_info, vextract256_extract,
1047 EXTRACT_get_vextract256_imm, [HasDQI]>;
1048 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1049 v4i64x_info, vextract256_extract,
1050 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1051 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1052 v4i64x_info, vextract256_extract,
1053 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1054 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1055 v4i64x_info, vextract256_extract,
1056 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1058 // vextractps - extract 32 bits from XMM
1059 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1060 (ins VR128X:$src1, u8imm:$src2),
1061 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1062 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1063 EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1065 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1066 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1067 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1068 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1070 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1072 //===---------------------------------------------------------------------===//
1073 // AVX-512 BROADCAST
1075 // broadcast with a scalar argument.
1076 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1078 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1079 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1080 (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1081 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1082 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1083 (X86VBroadcast SrcInfo.FRC:$src),
1084 DestInfo.RC:$src0)),
1085 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1086 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1087 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1088 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1089 (X86VBroadcast SrcInfo.FRC:$src),
1090 DestInfo.ImmAllZerosV)),
1091 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1092 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1095 // Split version to allow mask and broadcast node to be different types. This
1096 // helps support the 32x2 broadcasts.
1097 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1099 SchedWrite SchedRR, SchedWrite SchedRM,
1100 X86VectorVTInfo MaskInfo,
1101 X86VectorVTInfo DestInfo,
1102 X86VectorVTInfo SrcInfo,
1103 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1104 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1105 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1106 (outs MaskInfo.RC:$dst),
1107 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1111 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1115 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1116 T8PD, EVEX, Sched<[SchedRR]>;
1118 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1119 (outs MaskInfo.RC:$dst),
1120 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1123 (DestInfo.VT (UnmaskedOp
1124 (SrcInfo.ScalarLdFrag addr:$src))))),
1127 (DestInfo.VT (X86VBroadcast
1128 (SrcInfo.ScalarLdFrag addr:$src)))))>,
1129 T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1134 // Helper class to force mask and broadcast result to same type.
1135 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1136 SchedWrite SchedRR, SchedWrite SchedRM,
1137 X86VectorVTInfo DestInfo,
1138 X86VectorVTInfo SrcInfo> :
1139 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1140 DestInfo, DestInfo, SrcInfo>;
1142 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1143 AVX512VLVectorVTInfo _> {
1144 let Predicates = [HasAVX512] in {
1145 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1146 WriteFShuffle256Ld, _.info512, _.info128>,
1147 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1152 let Predicates = [HasVLX] in {
1153 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1154 WriteFShuffle256Ld, _.info256, _.info128>,
1155 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1161 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1162 AVX512VLVectorVTInfo _> {
1163 let Predicates = [HasAVX512] in {
1164 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1165 WriteFShuffle256Ld, _.info512, _.info128>,
1166 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1171 let Predicates = [HasVLX] in {
1172 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1173 WriteFShuffle256Ld, _.info256, _.info128>,
1174 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1177 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1178 WriteFShuffle256Ld, _.info128, _.info128>,
1179 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1184 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1186 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1187 avx512vl_f64_info>, VEX_W1X;
1189 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1190 X86VectorVTInfo _, SDPatternOperator OpNode,
1191 RegisterClass SrcRC> {
1192 let ExeDomain = _.ExeDomain in
1193 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1195 "vpbroadcast"##_.Suffix, "$src", "$src",
1196 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1200 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1201 X86VectorVTInfo _, SDPatternOperator OpNode,
1202 RegisterClass SrcRC, SubRegIndex Subreg> {
1203 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1204 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1205 (outs _.RC:$dst), (ins GR32:$src),
1206 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1207 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1208 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1209 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1211 def : Pat <(_.VT (OpNode SrcRC:$src)),
1212 (!cast<Instruction>(Name#r)
1213 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1215 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1216 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1217 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1219 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1220 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1221 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1224 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1225 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1226 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1227 let Predicates = [prd] in
1228 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1229 OpNode, SrcRC, Subreg>, EVEX_V512;
1230 let Predicates = [prd, HasVLX] in {
1231 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1232 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1233 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1234 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1238 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1239 SDPatternOperator OpNode,
1240 RegisterClass SrcRC, Predicate prd> {
1241 let Predicates = [prd] in
1242 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1244 let Predicates = [prd, HasVLX] in {
1245 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1247 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1252 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1253 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1254 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1255 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1257 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1258 X86VBroadcast, GR32, HasAVX512>;
1259 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1260 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1262 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1263 AVX512VLVectorVTInfo _, Predicate prd> {
1264 let Predicates = [prd] in {
1265 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1266 WriteShuffle256Ld, _.info512, _.info128>,
1269 let Predicates = [prd, HasVLX] in {
1270 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1271 WriteShuffle256Ld, _.info256, _.info128>,
1273 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1274 WriteShuffleXLd, _.info128, _.info128>,
1279 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1280 avx512vl_i8_info, HasBWI>;
1281 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1282 avx512vl_i16_info, HasBWI>;
1283 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1284 avx512vl_i32_info, HasAVX512>;
1285 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1286 avx512vl_i64_info, HasAVX512>, VEX_W1X;
1288 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1289 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1290 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1291 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1292 (_Dst.VT (X86SubVBroadcast
1293 (_Src.VT (_Src.LdFrag addr:$src))))>,
1294 Sched<[SchedWriteShuffle.YMM.Folded]>,
1298 // This should be used for the AVX512DQ broadcast instructions. It disables
1299 // the unmasked patterns so that we only use the DQ instructions when masking
1301 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1302 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1303 let hasSideEffects = 0, mayLoad = 1 in
1304 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1305 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1307 (_Dst.VT (X86SubVBroadcast
1308 (_Src.VT (_Src.LdFrag addr:$src))))>,
1309 Sched<[SchedWriteShuffle.YMM.Folded]>,
1313 let Predicates = [HasAVX512] in {
1314 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1315 def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1316 (VPBROADCASTQZm addr:$src)>;
1319 let Predicates = [HasVLX] in {
1320 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1321 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1322 (VPBROADCASTQZ128m addr:$src)>;
1323 def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1324 (VPBROADCASTQZ256m addr:$src)>;
1326 let Predicates = [HasVLX, HasBWI] in {
1327 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1328 // This means we'll encounter truncated i32 loads; match that here.
1329 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1330 (VPBROADCASTWZ128m addr:$src)>;
1331 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1332 (VPBROADCASTWZ256m addr:$src)>;
1333 def : Pat<(v8i16 (X86VBroadcast
1334 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1335 (VPBROADCASTWZ128m addr:$src)>;
1336 def : Pat<(v8i16 (X86VBroadcast
1337 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1338 (VPBROADCASTWZ128m addr:$src)>;
1339 def : Pat<(v16i16 (X86VBroadcast
1340 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1341 (VPBROADCASTWZ256m addr:$src)>;
1342 def : Pat<(v16i16 (X86VBroadcast
1343 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1344 (VPBROADCASTWZ256m addr:$src)>;
1346 let Predicates = [HasBWI] in {
1347 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1348 // This means we'll encounter truncated i32 loads; match that here.
1349 def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1350 (VPBROADCASTWZm addr:$src)>;
1351 def : Pat<(v32i16 (X86VBroadcast
1352 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1353 (VPBROADCASTWZm addr:$src)>;
1354 def : Pat<(v32i16 (X86VBroadcast
1355 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1356 (VPBROADCASTWZm addr:$src)>;
1359 //===----------------------------------------------------------------------===//
1360 // AVX-512 BROADCAST SUBVECTORS
1363 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1364 v16i32_info, v4i32x_info>,
1365 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1366 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1367 v16f32_info, v4f32x_info>,
1368 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1369 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1370 v8i64_info, v4i64x_info>, VEX_W,
1371 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1372 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1373 v8f64_info, v4f64x_info>, VEX_W,
1374 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1376 let Predicates = [HasAVX512] in {
1377 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1378 (VBROADCASTF64X4rm addr:$src)>;
1379 def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1380 (VBROADCASTI64X4rm addr:$src)>;
1381 def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1382 (VBROADCASTI64X4rm addr:$src)>;
1383 def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1384 (VBROADCASTI64X4rm addr:$src)>;
1386 // Provide fallback in case the load node that is used in the patterns above
1387 // is used by additional users, which prevents the pattern selection.
1388 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1389 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1390 (v4f64 VR256X:$src), 1)>;
1391 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1392 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1393 (v8f32 VR256X:$src), 1)>;
1394 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1395 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1396 (v4i64 VR256X:$src), 1)>;
1397 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1398 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1399 (v8i32 VR256X:$src), 1)>;
1400 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1401 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1402 (v16i16 VR256X:$src), 1)>;
1403 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1404 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1405 (v32i8 VR256X:$src), 1)>;
1407 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1408 (VBROADCASTF32X4rm addr:$src)>;
1409 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1410 (VBROADCASTI32X4rm addr:$src)>;
1411 def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1412 (VBROADCASTI32X4rm addr:$src)>;
1413 def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1414 (VBROADCASTI32X4rm addr:$src)>;
1416 // Patterns for selects of bitcasted operations.
1417 def : Pat<(vselect VK16WM:$mask,
1418 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1419 (v16f32 immAllZerosV)),
1420 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1421 def : Pat<(vselect VK16WM:$mask,
1422 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1424 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1425 def : Pat<(vselect VK16WM:$mask,
1426 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1427 (v16i32 immAllZerosV)),
1428 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1429 def : Pat<(vselect VK16WM:$mask,
1430 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1432 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1434 def : Pat<(vselect VK8WM:$mask,
1435 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1436 (v8f64 immAllZerosV)),
1437 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1438 def : Pat<(vselect VK8WM:$mask,
1439 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1441 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1442 def : Pat<(vselect VK8WM:$mask,
1443 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1444 (v8i64 immAllZerosV)),
1445 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1446 def : Pat<(vselect VK8WM:$mask,
1447 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1449 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1452 let Predicates = [HasVLX] in {
1453 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1454 v8i32x_info, v4i32x_info>,
1455 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1456 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1457 v8f32x_info, v4f32x_info>,
1458 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1460 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1461 (VBROADCASTF32X4Z256rm addr:$src)>;
1462 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1463 (VBROADCASTI32X4Z256rm addr:$src)>;
1464 def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1465 (VBROADCASTI32X4Z256rm addr:$src)>;
1466 def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1467 (VBROADCASTI32X4Z256rm addr:$src)>;
1469 // Patterns for selects of bitcasted operations.
1470 def : Pat<(vselect VK8WM:$mask,
1471 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1472 (v8f32 immAllZerosV)),
1473 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1474 def : Pat<(vselect VK8WM:$mask,
1475 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1477 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1478 def : Pat<(vselect VK8WM:$mask,
1479 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1480 (v8i32 immAllZerosV)),
1481 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1482 def : Pat<(vselect VK8WM:$mask,
1483 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1485 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1488 // Provide fallback in case the load node that is used in the patterns above
1489 // is used by additional users, which prevents the pattern selection.
1490 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1491 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1492 (v2f64 VR128X:$src), 1)>;
1493 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1494 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1495 (v4f32 VR128X:$src), 1)>;
1496 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1497 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1498 (v2i64 VR128X:$src), 1)>;
1499 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1500 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1501 (v4i32 VR128X:$src), 1)>;
1502 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1503 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1504 (v8i16 VR128X:$src), 1)>;
1505 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1506 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1507 (v16i8 VR128X:$src), 1)>;
1510 let Predicates = [HasVLX, HasDQI] in {
1511 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1512 v4i64x_info, v2i64x_info>, VEX_W1X,
1513 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1514 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1515 v4f64x_info, v2f64x_info>, VEX_W1X,
1516 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1518 // Patterns for selects of bitcasted operations.
1519 def : Pat<(vselect VK4WM:$mask,
1520 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1521 (v4f64 immAllZerosV)),
1522 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1523 def : Pat<(vselect VK4WM:$mask,
1524 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1526 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1527 def : Pat<(vselect VK4WM:$mask,
1528 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1529 (v4i64 immAllZerosV)),
1530 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1531 def : Pat<(vselect VK4WM:$mask,
1532 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1534 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1537 let Predicates = [HasDQI] in {
1538 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1539 v8i64_info, v2i64x_info>, VEX_W,
1540 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1541 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1542 v16i32_info, v8i32x_info>,
1543 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1544 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1545 v8f64_info, v2f64x_info>, VEX_W,
1546 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1547 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1548 v16f32_info, v8f32x_info>,
1549 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1551 // Patterns for selects of bitcasted operations.
1552 def : Pat<(vselect VK16WM:$mask,
1553 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1554 (v16f32 immAllZerosV)),
1555 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1556 def : Pat<(vselect VK16WM:$mask,
1557 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1559 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1560 def : Pat<(vselect VK16WM:$mask,
1561 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1562 (v16i32 immAllZerosV)),
1563 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1564 def : Pat<(vselect VK16WM:$mask,
1565 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1567 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1569 def : Pat<(vselect VK8WM:$mask,
1570 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1571 (v8f64 immAllZerosV)),
1572 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1573 def : Pat<(vselect VK8WM:$mask,
1574 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1576 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1577 def : Pat<(vselect VK8WM:$mask,
1578 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1579 (v8i64 immAllZerosV)),
1580 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1581 def : Pat<(vselect VK8WM:$mask,
1582 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1584 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1587 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1588 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1589 let Predicates = [HasDQI] in
1590 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1591 WriteShuffle256Ld, _Dst.info512,
1592 _Src.info512, _Src.info128, null_frag>,
1594 let Predicates = [HasDQI, HasVLX] in
1595 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1596 WriteShuffle256Ld, _Dst.info256,
1597 _Src.info256, _Src.info128, null_frag>,
1601 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1602 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1603 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1605 let Predicates = [HasDQI, HasVLX] in
1606 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1607 WriteShuffleXLd, _Dst.info128,
1608 _Src.info128, _Src.info128, null_frag>,
1612 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1613 avx512vl_i32_info, avx512vl_i64_info>;
1614 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1615 avx512vl_f32_info, avx512vl_f64_info>;
1617 //===----------------------------------------------------------------------===//
1618 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1620 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1621 X86VectorVTInfo _, RegisterClass KRC> {
1622 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1623 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1624 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1625 EVEX, Sched<[WriteShuffle]>;
1628 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1629 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1630 let Predicates = [HasCDI] in
1631 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1632 let Predicates = [HasCDI, HasVLX] in {
1633 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1634 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1638 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1639 avx512vl_i32_info, VK16>;
1640 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1641 avx512vl_i64_info, VK8>, VEX_W;
1643 //===----------------------------------------------------------------------===//
1644 // -- VPERMI2 - 3 source operands form --
1645 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1646 X86FoldableSchedWrite sched,
1647 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1648 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1649 hasSideEffects = 0 in {
1650 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1651 (ins _.RC:$src2, _.RC:$src3),
1652 OpcodeStr, "$src3, $src2", "$src2, $src3",
1653 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1654 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1657 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1658 (ins _.RC:$src2, _.MemOp:$src3),
1659 OpcodeStr, "$src3, $src2", "$src2, $src3",
1660 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1661 (_.VT (_.LdFrag addr:$src3)))), 1>,
1662 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1666 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1667 X86FoldableSchedWrite sched,
1668 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1669 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1670 hasSideEffects = 0, mayLoad = 1 in
1671 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1672 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1673 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1674 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1675 (_.VT (X86VPermt2 _.RC:$src2,
1676 IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1677 AVX5128IBase, EVEX_4V, EVEX_B,
1678 Sched<[sched.Folded, sched.ReadAfterFold]>;
1681 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1682 X86FoldableSchedWrite sched,
1683 AVX512VLVectorVTInfo VTInfo,
1684 AVX512VLVectorVTInfo ShuffleMask> {
1685 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1686 ShuffleMask.info512>,
1687 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1688 ShuffleMask.info512>, EVEX_V512;
1689 let Predicates = [HasVLX] in {
1690 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1691 ShuffleMask.info128>,
1692 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1693 ShuffleMask.info128>, EVEX_V128;
1694 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1695 ShuffleMask.info256>,
1696 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1697 ShuffleMask.info256>, EVEX_V256;
1701 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1702 X86FoldableSchedWrite sched,
1703 AVX512VLVectorVTInfo VTInfo,
1704 AVX512VLVectorVTInfo Idx,
1706 let Predicates = [Prd] in
1707 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1708 Idx.info512>, EVEX_V512;
1709 let Predicates = [Prd, HasVLX] in {
1710 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1711 Idx.info128>, EVEX_V128;
1712 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1713 Idx.info256>, EVEX_V256;
1717 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1718 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1719 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1720 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1721 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1722 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1723 VEX_W, EVEX_CD8<16, CD8VF>;
1724 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1725 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1727 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1728 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1729 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1730 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1732 // Extra patterns to deal with extra bitcasts due to passthru and index being
1733 // different types on the fp versions.
1734 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1735 X86VectorVTInfo IdxVT,
1736 X86VectorVTInfo CastVT> {
1737 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1738 (X86VPermt2 (_.VT _.RC:$src2),
1739 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1740 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1741 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1742 _.RC:$src2, _.RC:$src3)>;
1743 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1744 (X86VPermt2 _.RC:$src2,
1745 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1746 (_.LdFrag addr:$src3)),
1747 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1748 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1749 _.RC:$src2, addr:$src3)>;
1750 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1751 (X86VPermt2 _.RC:$src2,
1752 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1753 (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1754 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1755 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1756 _.RC:$src2, addr:$src3)>;
1759 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1760 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1761 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1762 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1765 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1766 X86FoldableSchedWrite sched,
1767 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1768 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1769 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1770 (ins IdxVT.RC:$src2, _.RC:$src3),
1771 OpcodeStr, "$src3, $src2", "$src2, $src3",
1772 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1773 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1775 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1776 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1777 OpcodeStr, "$src3, $src2", "$src2, $src3",
1778 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1779 (_.LdFrag addr:$src3))), 1>,
1780 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1783 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1784 X86FoldableSchedWrite sched,
1785 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1786 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1787 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1788 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1789 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1790 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1791 (_.VT (X86VPermt2 _.RC:$src1,
1792 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1793 AVX5128IBase, EVEX_4V, EVEX_B,
1794 Sched<[sched.Folded, sched.ReadAfterFold]>;
1797 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1798 X86FoldableSchedWrite sched,
1799 AVX512VLVectorVTInfo VTInfo,
1800 AVX512VLVectorVTInfo ShuffleMask> {
1801 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1802 ShuffleMask.info512>,
1803 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1804 ShuffleMask.info512>, EVEX_V512;
1805 let Predicates = [HasVLX] in {
1806 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1807 ShuffleMask.info128>,
1808 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1809 ShuffleMask.info128>, EVEX_V128;
1810 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1811 ShuffleMask.info256>,
1812 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1813 ShuffleMask.info256>, EVEX_V256;
1817 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1818 X86FoldableSchedWrite sched,
1819 AVX512VLVectorVTInfo VTInfo,
1820 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1821 let Predicates = [Prd] in
1822 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1823 Idx.info512>, EVEX_V512;
1824 let Predicates = [Prd, HasVLX] in {
1825 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1826 Idx.info128>, EVEX_V128;
1827 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1828 Idx.info256>, EVEX_V256;
1832 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1833 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1834 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1835 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1836 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1837 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1838 VEX_W, EVEX_CD8<16, CD8VF>;
1839 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1840 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1842 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1843 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1844 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1845 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1847 //===----------------------------------------------------------------------===//
1848 // AVX-512 - BLEND using mask
1851 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1852 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1853 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1854 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1855 (ins _.RC:$src1, _.RC:$src2),
1856 !strconcat(OpcodeStr,
1857 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1858 EVEX_4V, Sched<[sched]>;
1859 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1860 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1861 !strconcat(OpcodeStr,
1862 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1863 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1864 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1865 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1866 !strconcat(OpcodeStr,
1867 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1868 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1869 let mayLoad = 1 in {
1870 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1871 (ins _.RC:$src1, _.MemOp:$src2),
1872 !strconcat(OpcodeStr,
1873 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1874 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1875 Sched<[sched.Folded, sched.ReadAfterFold]>;
1876 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1877 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1878 !strconcat(OpcodeStr,
1879 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1880 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1881 Sched<[sched.Folded, sched.ReadAfterFold]>;
1882 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1883 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1884 !strconcat(OpcodeStr,
1885 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1886 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1887 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1891 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1892 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1893 let mayLoad = 1, hasSideEffects = 0 in {
1894 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1895 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1896 !strconcat(OpcodeStr,
1897 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1898 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1899 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1900 Sched<[sched.Folded, sched.ReadAfterFold]>;
1902 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1903 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1904 !strconcat(OpcodeStr,
1905 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1906 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1907 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1908 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1910 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1911 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1912 !strconcat(OpcodeStr,
1913 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1914 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1915 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1916 Sched<[sched.Folded, sched.ReadAfterFold]>;
1920 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1921 AVX512VLVectorVTInfo VTInfo> {
1922 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1923 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1926 let Predicates = [HasVLX] in {
1927 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1928 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1930 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1931 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1936 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1937 AVX512VLVectorVTInfo VTInfo> {
1938 let Predicates = [HasBWI] in
1939 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1942 let Predicates = [HasBWI, HasVLX] in {
1943 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1945 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1950 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
1952 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
1953 avx512vl_f64_info>, VEX_W;
1954 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
1956 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
1957 avx512vl_i64_info>, VEX_W;
1958 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
1960 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
1961 avx512vl_i16_info>, VEX_W;
1963 //===----------------------------------------------------------------------===//
1964 // Compare Instructions
1965 //===----------------------------------------------------------------------===//
1967 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1969 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
1970 PatFrag OpNode_su, PatFrag OpNodeSAE_su,
1971 X86FoldableSchedWrite sched> {
1972 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1974 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1976 "$cc, $src2, $src1", "$src1, $src2, $cc",
1977 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
1978 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1979 imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
1981 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1983 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
1985 "$cc, $src2, $src1", "$src1, $src2, $cc",
1986 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
1988 (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
1989 imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1990 Sched<[sched.Folded, sched.ReadAfterFold]>;
1992 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1994 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1996 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
1997 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1999 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2001 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2003 let isCodeGenOnly = 1 in {
2004 let isCommutable = 1 in
2005 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2006 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2007 !strconcat("vcmp", _.Suffix,
2008 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2009 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2012 EVEX_4V, VEX_LIG, Sched<[sched]>;
2013 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2015 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2016 !strconcat("vcmp", _.Suffix,
2017 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2018 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2019 (_.ScalarLdFrag addr:$src2),
2021 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2022 Sched<[sched.Folded, sched.ReadAfterFold]>;
2026 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2027 (X86cmpms node:$src1, node:$src2, node:$cc), [{
2028 return N->hasOneUse();
2030 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2031 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2032 return N->hasOneUse();
2035 let Predicates = [HasAVX512] in {
2036 let ExeDomain = SSEPackedSingle in
2037 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2038 X86cmpms_su, X86cmpmsSAE_su,
2039 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2040 let ExeDomain = SSEPackedDouble in
2041 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2042 X86cmpms_su, X86cmpmsSAE_su,
2043 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2046 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2047 PatFrag OpNode_su, X86FoldableSchedWrite sched,
2048 X86VectorVTInfo _, bit IsCommutable> {
2049 let isCommutable = IsCommutable in
2050 def rr : AVX512BI<opc, MRMSrcReg,
2051 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2052 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2053 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2054 EVEX_4V, Sched<[sched]>;
2055 def rm : AVX512BI<opc, MRMSrcMem,
2056 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2057 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2058 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2059 (_.VT (_.LdFrag addr:$src2))))]>,
2060 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2061 let isCommutable = IsCommutable in
2062 def rrk : AVX512BI<opc, MRMSrcReg,
2063 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2064 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2065 "$dst {${mask}}, $src1, $src2}"),
2066 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2067 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2068 EVEX_4V, EVEX_K, Sched<[sched]>;
2069 def rmk : AVX512BI<opc, MRMSrcMem,
2070 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2071 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2072 "$dst {${mask}}, $src1, $src2}"),
2073 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2074 (OpNode_su (_.VT _.RC:$src1),
2075 (_.VT (_.LdFrag addr:$src2)))))]>,
2076 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2079 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2081 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2083 avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> {
2084 def rmb : AVX512BI<opc, MRMSrcMem,
2085 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2086 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2087 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2088 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2089 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2090 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2091 def rmbk : AVX512BI<opc, MRMSrcMem,
2092 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2093 _.ScalarMemOp:$src2),
2094 !strconcat(OpcodeStr,
2095 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2096 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2097 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2098 (OpNode_su (_.VT _.RC:$src1),
2100 (_.ScalarLdFrag addr:$src2)))))]>,
2101 EVEX_4V, EVEX_K, EVEX_B,
2102 Sched<[sched.Folded, sched.ReadAfterFold]>;
2105 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2106 PatFrag OpNode_su, X86SchedWriteWidths sched,
2107 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2108 bit IsCommutable = 0> {
2109 let Predicates = [prd] in
2110 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
2111 VTInfo.info512, IsCommutable>, EVEX_V512;
2113 let Predicates = [prd, HasVLX] in {
2114 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
2115 VTInfo.info256, IsCommutable>, EVEX_V256;
2116 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
2117 VTInfo.info128, IsCommutable>, EVEX_V128;
2121 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2122 PatFrag OpNode, PatFrag OpNode_su,
2123 X86SchedWriteWidths sched,
2124 AVX512VLVectorVTInfo VTInfo,
2125 Predicate prd, bit IsCommutable = 0> {
2126 let Predicates = [prd] in
2127 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
2128 VTInfo.info512, IsCommutable>, EVEX_V512;
2130 let Predicates = [prd, HasVLX] in {
2131 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
2132 VTInfo.info256, IsCommutable>, EVEX_V256;
2133 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
2134 VTInfo.info128, IsCommutable>, EVEX_V128;
2138 // This fragment treats X86cmpm as commutable to help match loads in both
2139 // operands for PCMPEQ.
2140 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2141 def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2142 (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2143 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2144 (setcc node:$src1, node:$src2, SETGT)>;
2146 def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2),
2147 (X86pcmpeqm_c node:$src1, node:$src2), [{
2148 return N->hasOneUse();
2150 def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2),
2151 (X86pcmpgtm node:$src1, node:$src2), [{
2152 return N->hasOneUse();
2155 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2156 // increase the pattern complexity the way an immediate would.
2157 let AddedComplexity = 2 in {
2158 // FIXME: Is there a better scheduler class for VPCMP?
2159 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su,
2160 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2161 EVEX_CD8<8, CD8VF>, VEX_WIG;
2163 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su,
2164 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2165 EVEX_CD8<16, CD8VF>, VEX_WIG;
2167 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su,
2168 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2169 EVEX_CD8<32, CD8VF>;
2171 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su,
2172 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2173 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2175 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su,
2176 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2177 EVEX_CD8<8, CD8VF>, VEX_WIG;
2179 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su,
2180 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2181 EVEX_CD8<16, CD8VF>, VEX_WIG;
2183 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su,
2184 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2185 EVEX_CD8<32, CD8VF>;
2187 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su,
2188 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2189 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2192 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2193 PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2194 X86FoldableSchedWrite sched,
2195 X86VectorVTInfo _, string Name> {
2196 let isCommutable = 1 in
2197 def rri : AVX512AIi8<opc, MRMSrcReg,
2198 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2199 !strconcat("vpcmp", Suffix,
2200 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2201 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2204 EVEX_4V, Sched<[sched]>;
2205 def rmi : AVX512AIi8<opc, MRMSrcMem,
2206 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2207 !strconcat("vpcmp", Suffix,
2208 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2209 [(set _.KRC:$dst, (_.KVT
2212 (_.VT (_.LdFrag addr:$src2)),
2214 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2215 let isCommutable = 1 in
2216 def rrik : AVX512AIi8<opc, MRMSrcReg,
2217 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2219 !strconcat("vpcmp", Suffix,
2220 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2221 "$dst {${mask}}, $src1, $src2, $cc}"),
2222 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2223 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2226 EVEX_4V, EVEX_K, Sched<[sched]>;
2227 def rmik : AVX512AIi8<opc, MRMSrcMem,
2228 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2230 !strconcat("vpcmp", Suffix,
2231 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2232 "$dst {${mask}}, $src1, $src2, $cc}"),
2233 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2237 (_.VT (_.LdFrag addr:$src2)),
2239 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2241 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2242 (_.VT _.RC:$src1), cond)),
2243 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2244 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2246 def : Pat<(and _.KRCWM:$mask,
2247 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2248 (_.VT _.RC:$src1), cond))),
2249 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2250 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2251 (CommFrag.OperandTransform $cc))>;
2254 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2255 PatFrag Frag_su, PatFrag CommFrag,
2256 PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2257 X86VectorVTInfo _, string Name> :
2258 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2260 def rmib : AVX512AIi8<opc, MRMSrcMem,
2261 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2263 !strconcat("vpcmp", Suffix,
2264 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2265 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2266 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2269 (_.ScalarLdFrag addr:$src2)),
2271 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2272 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2273 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2274 _.ScalarMemOp:$src2, u8imm:$cc),
2275 !strconcat("vpcmp", Suffix,
2276 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2277 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2278 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2282 (_.ScalarLdFrag addr:$src2)),
2284 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2286 def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2287 (_.VT _.RC:$src1), cond)),
2288 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2289 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2291 def : Pat<(and _.KRCWM:$mask,
2292 (_.KVT (CommFrag_su:$cc (X86VBroadcast
2293 (_.ScalarLdFrag addr:$src2)),
2294 (_.VT _.RC:$src1), cond))),
2295 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2296 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2297 (CommFrag.OperandTransform $cc))>;
2300 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2301 PatFrag Frag_su, PatFrag CommFrag,
2302 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2303 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2304 let Predicates = [prd] in
2305 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2306 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2308 let Predicates = [prd, HasVLX] in {
2309 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2310 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2311 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2312 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2316 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2317 PatFrag Frag_su, PatFrag CommFrag,
2318 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2319 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2320 let Predicates = [prd] in
2321 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2322 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2324 let Predicates = [prd, HasVLX] in {
2325 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2326 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2327 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2328 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2332 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2333 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2334 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2335 return getI8Imm(SSECC, SDLoc(N));
2338 // Swapped operand version of the above.
2339 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2340 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2341 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2342 SSECC = X86::getSwappedVPCMPImm(SSECC);
2343 return getI8Imm(SSECC, SDLoc(N));
2346 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2347 (setcc node:$src1, node:$src2, node:$cc), [{
2348 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2349 return !ISD::isUnsignedIntSetCC(CC);
2352 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2353 (setcc node:$src1, node:$src2, node:$cc), [{
2354 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2355 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2358 // Same as above, but commutes immediate. Use for load folding.
2359 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2360 (setcc node:$src1, node:$src2, node:$cc), [{
2361 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2362 return !ISD::isUnsignedIntSetCC(CC);
2363 }], X86pcmpm_imm_commute>;
2365 def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2366 (setcc node:$src1, node:$src2, node:$cc), [{
2367 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2368 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2369 }], X86pcmpm_imm_commute>;
2371 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2372 (setcc node:$src1, node:$src2, node:$cc), [{
2373 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2374 return ISD::isUnsignedIntSetCC(CC);
2377 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2378 (setcc node:$src1, node:$src2, node:$cc), [{
2379 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2380 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2383 // Same as above, but commutes immediate. Use for load folding.
2384 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2385 (setcc node:$src1, node:$src2, node:$cc), [{
2386 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2387 return ISD::isUnsignedIntSetCC(CC);
2388 }], X86pcmpm_imm_commute>;
2390 def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2391 (setcc node:$src1, node:$src2, node:$cc), [{
2392 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2393 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2394 }], X86pcmpm_imm_commute>;
2396 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2397 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2398 X86pcmpm_commute, X86pcmpm_commute_su,
2399 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2401 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2402 X86pcmpum_commute, X86pcmpum_commute_su,
2403 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2406 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2407 X86pcmpm_commute, X86pcmpm_commute_su,
2408 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2409 VEX_W, EVEX_CD8<16, CD8VF>;
2410 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2411 X86pcmpum_commute, X86pcmpum_commute_su,
2412 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2413 VEX_W, EVEX_CD8<16, CD8VF>;
2415 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2416 X86pcmpm_commute, X86pcmpm_commute_su,
2417 SchedWriteVecALU, avx512vl_i32_info,
2418 HasAVX512>, EVEX_CD8<32, CD8VF>;
2419 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2420 X86pcmpum_commute, X86pcmpum_commute_su,
2421 SchedWriteVecALU, avx512vl_i32_info,
2422 HasAVX512>, EVEX_CD8<32, CD8VF>;
2424 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2425 X86pcmpm_commute, X86pcmpm_commute_su,
2426 SchedWriteVecALU, avx512vl_i64_info,
2427 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2428 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2429 X86pcmpum_commute, X86pcmpum_commute_su,
2430 SchedWriteVecALU, avx512vl_i64_info,
2431 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2433 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2434 (X86cmpm node:$src1, node:$src2, node:$cc), [{
2435 return N->hasOneUse();
2437 def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2438 (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2439 return N->hasOneUse();
2442 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2444 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2445 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2447 "$cc, $src2, $src1", "$src1, $src2, $cc",
2448 (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2449 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2452 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2453 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2455 "$cc, $src2, $src1", "$src1, $src2, $cc",
2456 (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2458 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2460 Sched<[sched.Folded, sched.ReadAfterFold]>;
2462 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2464 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2466 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2467 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2468 (X86cmpm (_.VT _.RC:$src1),
2469 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2471 (X86cmpm_su (_.VT _.RC:$src1),
2472 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2474 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2476 // Patterns for selecting with loads in other operand.
2477 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2478 CommutableCMPCC:$cc),
2479 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2482 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2484 CommutableCMPCC:$cc)),
2485 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2486 _.RC:$src1, addr:$src2,
2489 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2490 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2491 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2494 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast
2495 (_.ScalarLdFrag addr:$src2)),
2497 CommutableCMPCC:$cc)),
2498 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2499 _.RC:$src1, addr:$src2,
2503 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2504 // comparison code form (VCMP[EQ/LT/LE/...]
2505 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2506 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2508 "$cc, {sae}, $src2, $src1",
2509 "$src1, $src2, {sae}, $cc",
2510 (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2511 (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2513 EVEX_B, Sched<[sched]>;
2516 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2517 let Predicates = [HasAVX512] in {
2518 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2519 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2522 let Predicates = [HasAVX512,HasVLX] in {
2523 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2524 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2528 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2529 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2530 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2531 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2533 // Patterns to select fp compares with load as first operand.
2534 let Predicates = [HasAVX512] in {
2535 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2536 CommutableCMPCC:$cc)),
2537 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2539 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2540 CommutableCMPCC:$cc)),
2541 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2544 // ----------------------------------------------------------------
2547 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2548 (X86Vfpclasss node:$src1, node:$src2), [{
2549 return N->hasOneUse();
2552 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2553 (X86Vfpclass node:$src1, node:$src2), [{
2554 return N->hasOneUse();
2557 //handle fpclass instruction mask = op(reg_scalar,imm)
2558 // op(mem_scalar,imm)
2559 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2560 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2562 let Predicates = [prd], ExeDomain = _.ExeDomain in {
2563 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2564 (ins _.RC:$src1, i32u8imm:$src2),
2565 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2566 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2567 (i32 imm:$src2)))]>,
2569 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2570 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2571 OpcodeStr##_.Suffix#
2572 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2573 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2574 (X86Vfpclasss_su (_.VT _.RC:$src1),
2575 (i32 imm:$src2))))]>,
2576 EVEX_K, Sched<[sched]>;
2577 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2578 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2579 OpcodeStr##_.Suffix##
2580 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2582 (X86Vfpclasss _.ScalarIntMemCPat:$src1,
2583 (i32 imm:$src2)))]>,
2584 Sched<[sched.Folded, sched.ReadAfterFold]>;
2585 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2586 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2587 OpcodeStr##_.Suffix##
2588 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2589 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2590 (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
2591 (i32 imm:$src2))))]>,
2592 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2596 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2597 // fpclass(reg_vec, mem_vec, imm)
2598 // fpclass(reg_vec, broadcast(eltVt), imm)
2599 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2600 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2602 let ExeDomain = _.ExeDomain in {
2603 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2604 (ins _.RC:$src1, i32u8imm:$src2),
2605 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2606 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2607 (i32 imm:$src2)))]>,
2609 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2610 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2611 OpcodeStr##_.Suffix#
2612 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2613 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2614 (X86Vfpclass_su (_.VT _.RC:$src1),
2615 (i32 imm:$src2))))]>,
2616 EVEX_K, Sched<[sched]>;
2617 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2618 (ins _.MemOp:$src1, i32u8imm:$src2),
2619 OpcodeStr##_.Suffix#"{"#mem#"}"#
2620 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2621 [(set _.KRC:$dst,(X86Vfpclass
2622 (_.VT (_.LdFrag addr:$src1)),
2623 (i32 imm:$src2)))]>,
2624 Sched<[sched.Folded, sched.ReadAfterFold]>;
2625 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2626 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2627 OpcodeStr##_.Suffix#"{"#mem#"}"#
2628 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2629 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2630 (_.VT (_.LdFrag addr:$src1)),
2631 (i32 imm:$src2))))]>,
2632 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2633 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2634 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2635 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2636 _.BroadcastStr##", $dst|$dst, ${src1}"
2637 ##_.BroadcastStr##", $src2}",
2638 [(set _.KRC:$dst,(X86Vfpclass
2639 (_.VT (X86VBroadcast
2640 (_.ScalarLdFrag addr:$src1))),
2641 (i32 imm:$src2)))]>,
2642 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2643 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2644 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2645 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2646 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2647 _.BroadcastStr##", $src2}",
2648 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2649 (_.VT (X86VBroadcast
2650 (_.ScalarLdFrag addr:$src1))),
2651 (i32 imm:$src2))))]>,
2652 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2655 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2657 def : InstAlias<OpcodeStr#_.Suffix#mem#
2658 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2659 (!cast<Instruction>(NAME#"rr")
2660 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2661 def : InstAlias<OpcodeStr#_.Suffix#mem#
2662 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2663 (!cast<Instruction>(NAME#"rrk")
2664 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2665 def : InstAlias<OpcodeStr#_.Suffix#mem#
2666 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2667 _.BroadcastStr#", $src2}",
2668 (!cast<Instruction>(NAME#"rmb")
2669 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2670 def : InstAlias<OpcodeStr#_.Suffix#mem#
2671 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2672 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2673 (!cast<Instruction>(NAME#"rmbk")
2674 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2677 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2678 bits<8> opc, X86SchedWriteWidths sched,
2680 let Predicates = [prd] in {
2681 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2682 _.info512, "z">, EVEX_V512;
2684 let Predicates = [prd, HasVLX] in {
2685 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2686 _.info128, "x">, EVEX_V128;
2687 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2688 _.info256, "y">, EVEX_V256;
2692 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2693 bits<8> opcScalar, X86SchedWriteWidths sched,
2695 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2697 EVEX_CD8<32, CD8VF>;
2698 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2700 EVEX_CD8<64, CD8VF> , VEX_W;
2701 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2702 sched.Scl, f32x_info, prd>, VEX_LIG,
2703 EVEX_CD8<32, CD8VT1>;
2704 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2705 sched.Scl, f64x_info, prd>, VEX_LIG,
2706 EVEX_CD8<64, CD8VT1>, VEX_W;
2709 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2710 HasDQI>, AVX512AIi8Base, EVEX;
2712 //-----------------------------------------------------------------
2713 // Mask register copy, including
2714 // - copy between mask registers
2715 // - load/store mask registers
2716 // - copy from GPR to mask register and vice versa
2718 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2719 string OpcodeStr, RegisterClass KRC,
2720 ValueType vvt, X86MemOperand x86memop> {
2721 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2722 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2723 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2725 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2726 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2727 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2729 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2730 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2731 [(store KRC:$src, addr:$dst)]>,
2732 Sched<[WriteStore]>;
2735 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2737 RegisterClass KRC, RegisterClass GRC> {
2738 let hasSideEffects = 0 in {
2739 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2740 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2742 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2743 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2748 let Predicates = [HasDQI] in
2749 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2750 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2753 let Predicates = [HasAVX512] in
2754 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2755 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2758 let Predicates = [HasBWI] in {
2759 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2761 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2763 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2765 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2769 // GR from/to mask register
2770 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2771 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2772 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2773 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2775 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2776 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2777 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2778 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2780 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2781 (KMOVWrk VK16:$src)>;
2782 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2783 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2784 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2785 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2786 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2787 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2789 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2790 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2791 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2792 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2793 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2794 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2795 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2796 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2798 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2799 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2800 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2801 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2802 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2803 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2804 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2805 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2808 let Predicates = [HasDQI] in {
2809 def : Pat<(store VK1:$src, addr:$dst),
2810 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2812 def : Pat<(v1i1 (load addr:$src)),
2813 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2814 def : Pat<(v2i1 (load addr:$src)),
2815 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2816 def : Pat<(v4i1 (load addr:$src)),
2817 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2820 let Predicates = [HasAVX512] in {
2821 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2822 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2823 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2824 (KMOVWkm addr:$src)>;
2827 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2828 SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2829 SDTCVecEltisVT<1, i1>,
2832 let Predicates = [HasAVX512] in {
2833 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2834 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2835 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2837 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2838 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2840 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2841 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2843 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2844 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2847 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2848 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2849 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2850 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2851 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2852 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2853 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2855 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2856 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2859 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2863 // Mask unary operation
2865 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2866 RegisterClass KRC, SDPatternOperator OpNode,
2867 X86FoldableSchedWrite sched, Predicate prd> {
2868 let Predicates = [prd] in
2869 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2870 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2871 [(set KRC:$dst, (OpNode KRC:$src))]>,
2875 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2876 SDPatternOperator OpNode,
2877 X86FoldableSchedWrite sched> {
2878 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2879 sched, HasDQI>, VEX, PD;
2880 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2881 sched, HasAVX512>, VEX, PS;
2882 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2883 sched, HasBWI>, VEX, PD, VEX_W;
2884 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2885 sched, HasBWI>, VEX, PS, VEX_W;
2888 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2889 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2891 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2892 let Predicates = [HasAVX512, NoDQI] in
2893 def : Pat<(vnot VK8:$src),
2894 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2896 def : Pat<(vnot VK4:$src),
2897 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2898 def : Pat<(vnot VK2:$src),
2899 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2901 // Mask binary operation
2902 // - KAND, KANDN, KOR, KXNOR, KXOR
2903 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2904 RegisterClass KRC, SDPatternOperator OpNode,
2905 X86FoldableSchedWrite sched, Predicate prd,
2907 let Predicates = [prd], isCommutable = IsCommutable in
2908 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2909 !strconcat(OpcodeStr,
2910 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2911 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2915 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2916 SDPatternOperator OpNode,
2917 X86FoldableSchedWrite sched, bit IsCommutable,
2918 Predicate prdW = HasAVX512> {
2919 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2920 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2921 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2922 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2923 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2924 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2925 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2926 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2929 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2930 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2931 // These nodes use 'vnot' instead of 'not' to support vectors.
2932 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2933 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2935 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2936 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
2937 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
2938 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
2939 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
2940 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
2941 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2943 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2945 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2946 // for the DQI set, this type is legal and KxxxB instruction is used
2947 let Predicates = [NoDQI] in
2948 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2950 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2951 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2953 // All types smaller than 8 bits require conversion anyway
2954 def : Pat<(OpNode VK1:$src1, VK1:$src2),
2955 (COPY_TO_REGCLASS (Inst
2956 (COPY_TO_REGCLASS VK1:$src1, VK16),
2957 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2958 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2959 (COPY_TO_REGCLASS (Inst
2960 (COPY_TO_REGCLASS VK2:$src1, VK16),
2961 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
2962 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2963 (COPY_TO_REGCLASS (Inst
2964 (COPY_TO_REGCLASS VK4:$src1, VK16),
2965 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
2968 defm : avx512_binop_pat<and, and, KANDWrr>;
2969 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2970 defm : avx512_binop_pat<or, or, KORWrr>;
2971 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2972 defm : avx512_binop_pat<xor, xor, KXORWrr>;
2975 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
2976 X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
2978 let Predicates = [prd] in {
2979 let hasSideEffects = 0 in
2980 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
2981 (ins Src.KRC:$src1, Src.KRC:$src2),
2982 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2983 VEX_4V, VEX_L, Sched<[sched]>;
2985 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
2986 (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
2990 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
2991 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
2992 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
2995 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2996 SDNode OpNode, X86FoldableSchedWrite sched,
2998 let Predicates = [prd], Defs = [EFLAGS] in
2999 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3000 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3001 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3005 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3006 X86FoldableSchedWrite sched,
3007 Predicate prdW = HasAVX512> {
3008 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3010 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3012 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3014 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3018 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3019 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3020 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3023 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3024 SDNode OpNode, X86FoldableSchedWrite sched> {
3025 let Predicates = [HasAVX512] in
3026 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3027 !strconcat(OpcodeStr,
3028 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3029 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3033 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3034 SDNode OpNode, X86FoldableSchedWrite sched> {
3035 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3036 sched>, VEX, TAPD, VEX_W;
3037 let Predicates = [HasDQI] in
3038 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3040 let Predicates = [HasBWI] in {
3041 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3042 sched>, VEX, TAPD, VEX_W;
3043 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3048 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3049 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3051 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3052 multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3054 X86VectorVTInfo Narrow,
3055 X86VectorVTInfo Wide> {
3056 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3057 (Narrow.VT Narrow.RC:$src2))),
3059 (!cast<Instruction>(InstStr#"Zrr")
3060 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3061 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3064 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3065 (Frag_su (Narrow.VT Narrow.RC:$src1),
3066 (Narrow.VT Narrow.RC:$src2)))),
3068 (!cast<Instruction>(InstStr#"Zrrk")
3069 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3070 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3071 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3075 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3076 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3078 X86VectorVTInfo Narrow,
3079 X86VectorVTInfo Wide> {
3080 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3081 (Narrow.VT Narrow.RC:$src2), cond)),
3083 (!cast<Instruction>(InstStr##Zrri)
3084 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3085 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3086 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3088 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3089 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3090 (Narrow.VT Narrow.RC:$src2),
3092 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3093 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3094 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3095 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3096 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3099 // Same as above, but for fp types which don't use PatFrags.
3100 multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su,
3102 X86VectorVTInfo Narrow,
3103 X86VectorVTInfo Wide> {
3104 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3105 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3107 (!cast<Instruction>(InstStr##Zrri)
3108 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3109 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3110 imm:$cc), Narrow.KRC)>;
3112 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3113 (OpNode_su (Narrow.VT Narrow.RC:$src1),
3114 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3115 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3116 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3117 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3118 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3119 imm:$cc), Narrow.KRC)>;
3122 let Predicates = [HasAVX512, NoVLX] in {
3123 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3124 // increase the pattern complexity the way an immediate would.
3125 let AddedComplexity = 2 in {
3126 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
3127 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
3129 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
3130 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
3132 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3133 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3135 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3136 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3139 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3140 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3142 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3143 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3145 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3146 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3148 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3149 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3151 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>;
3152 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>;
3153 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>;
3154 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>;
3157 let Predicates = [HasBWI, NoVLX] in {
3158 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3159 // increase the pattern complexity the way an immediate would.
3160 let AddedComplexity = 2 in {
3161 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v32i8x_info, v64i8_info>;
3162 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v32i8x_info, v64i8_info>;
3164 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v16i8x_info, v64i8_info>;
3165 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v16i8x_info, v64i8_info>;
3167 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v16i16x_info, v32i16_info>;
3168 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v16i16x_info, v32i16_info>;
3170 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v8i16x_info, v32i16_info>;
3171 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v8i16x_info, v32i16_info>;
3174 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3175 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3177 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3178 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3180 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3181 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3183 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3184 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3187 // Mask setting all 0s or 1s
3188 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3189 let Predicates = [HasAVX512] in
3190 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3191 SchedRW = [WriteZero] in
3192 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3193 [(set KRC:$dst, (VT Val))]>;
3196 multiclass avx512_mask_setop_w<PatFrag Val> {
3197 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3198 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3199 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3202 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3203 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3205 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3206 let Predicates = [HasAVX512] in {
3207 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3208 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3209 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3210 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3211 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3212 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3213 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3214 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3217 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3218 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3219 RegisterClass RC, ValueType VT> {
3220 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3221 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3223 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3224 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3226 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3227 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3228 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3229 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3230 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3231 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3233 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3234 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3235 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3236 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3237 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3239 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3240 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3241 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3242 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3244 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3245 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3246 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3248 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3249 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3251 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3253 //===----------------------------------------------------------------------===//
3254 // AVX-512 - Aligned and unaligned load and store
3257 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3258 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3259 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3260 bit NoRMPattern = 0,
3261 SDPatternOperator SelectOprr = vselect> {
3262 let hasSideEffects = 0 in {
3263 let isMoveReg = 1 in
3264 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3265 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3266 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3267 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3268 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3269 (ins _.KRCWM:$mask, _.RC:$src),
3270 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3271 "${dst} {${mask}} {z}, $src}"),
3272 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3274 _.ImmAllZerosV)))], _.ExeDomain>,
3275 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3277 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3278 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3279 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3280 !if(NoRMPattern, [],
3282 (_.VT (ld_frag addr:$src)))]),
3283 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3284 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3286 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3287 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3288 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3289 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3290 "${dst} {${mask}}, $src1}"),
3291 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3293 (_.VT _.RC:$src0))))], _.ExeDomain>,
3294 EVEX, EVEX_K, Sched<[Sched.RR]>;
3295 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3296 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3297 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3298 "${dst} {${mask}}, $src1}"),
3299 [(set _.RC:$dst, (_.VT
3300 (vselect _.KRCWM:$mask,
3301 (_.VT (ld_frag addr:$src1)),
3302 (_.VT _.RC:$src0))))], _.ExeDomain>,
3303 EVEX, EVEX_K, Sched<[Sched.RM]>;
3305 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3306 (ins _.KRCWM:$mask, _.MemOp:$src),
3307 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3308 "${dst} {${mask}} {z}, $src}",
3309 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3310 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3311 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3313 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3314 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3316 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3317 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3319 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3320 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3321 _.KRCWM:$mask, addr:$ptr)>;
3324 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3325 AVX512VLVectorVTInfo _, Predicate prd,
3326 X86SchedWriteMoveLSWidths Sched,
3327 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3328 let Predicates = [prd] in
3329 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3330 _.info512.AlignedLdFrag, masked_load_aligned,
3331 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3333 let Predicates = [prd, HasVLX] in {
3334 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3335 _.info256.AlignedLdFrag, masked_load_aligned,
3336 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3337 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3338 _.info128.AlignedLdFrag, masked_load_aligned,
3339 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3343 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3344 AVX512VLVectorVTInfo _, Predicate prd,
3345 X86SchedWriteMoveLSWidths Sched,
3346 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3347 SDPatternOperator SelectOprr = vselect> {
3348 let Predicates = [prd] in
3349 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3350 masked_load, Sched.ZMM, "",
3351 NoRMPattern, SelectOprr>, EVEX_V512;
3353 let Predicates = [prd, HasVLX] in {
3354 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3355 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3356 NoRMPattern, SelectOprr>, EVEX_V256;
3357 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3358 masked_load, Sched.XMM, EVEX2VEXOvrd,
3359 NoRMPattern, SelectOprr>, EVEX_V128;
3363 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3364 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3365 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3366 bit NoMRPattern = 0> {
3367 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3368 let isMoveReg = 1 in
3369 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3370 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3371 [], _.ExeDomain>, EVEX,
3372 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3373 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3374 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3375 (ins _.KRCWM:$mask, _.RC:$src),
3376 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3377 "${dst} {${mask}}, $src}",
3378 [], _.ExeDomain>, EVEX, EVEX_K,
3379 FoldGenData<BaseName#_.ZSuffix#rrk>,
3381 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3382 (ins _.KRCWM:$mask, _.RC:$src),
3383 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3384 "${dst} {${mask}} {z}, $src}",
3385 [], _.ExeDomain>, EVEX, EVEX_KZ,
3386 FoldGenData<BaseName#_.ZSuffix#rrkz>,
3390 let hasSideEffects = 0, mayStore = 1 in
3391 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3392 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3393 !if(NoMRPattern, [],
3394 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3395 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3396 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3397 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3398 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3399 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3400 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3403 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3404 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3405 _.KRCWM:$mask, _.RC:$src)>;
3407 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3408 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3409 _.RC:$dst, _.RC:$src), 0>;
3410 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3411 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3412 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3413 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3414 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3415 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3418 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3419 AVX512VLVectorVTInfo _, Predicate prd,
3420 X86SchedWriteMoveLSWidths Sched,
3421 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3422 let Predicates = [prd] in
3423 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3424 masked_store, Sched.ZMM, "",
3425 NoMRPattern>, EVEX_V512;
3426 let Predicates = [prd, HasVLX] in {
3427 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3428 masked_store, Sched.YMM,
3429 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3430 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3431 masked_store, Sched.XMM, EVEX2VEXOvrd,
3432 NoMRPattern>, EVEX_V128;
3436 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3437 AVX512VLVectorVTInfo _, Predicate prd,
3438 X86SchedWriteMoveLSWidths Sched,
3439 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3440 let Predicates = [prd] in
3441 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3442 masked_store_aligned, Sched.ZMM, "",
3443 NoMRPattern>, EVEX_V512;
3445 let Predicates = [prd, HasVLX] in {
3446 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3447 masked_store_aligned, Sched.YMM,
3448 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3449 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3450 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3451 NoMRPattern>, EVEX_V128;
3455 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3456 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3457 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3458 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3459 PS, EVEX_CD8<32, CD8VF>;
3461 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3462 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3463 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3464 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3465 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3467 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3468 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3469 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3470 SchedWriteFMoveLS, "VMOVUPS">,
3471 PS, EVEX_CD8<32, CD8VF>;
3473 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3474 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3475 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3476 SchedWriteFMoveLS, "VMOVUPD">,
3477 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3479 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3480 HasAVX512, SchedWriteVecMoveLS,
3482 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3483 HasAVX512, SchedWriteVecMoveLS,
3485 PD, EVEX_CD8<32, CD8VF>;
3487 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3488 HasAVX512, SchedWriteVecMoveLS,
3490 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3491 HasAVX512, SchedWriteVecMoveLS,
3493 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3495 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3496 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3497 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3498 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3499 XD, EVEX_CD8<8, CD8VF>;
3501 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3502 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3503 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3504 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3505 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3507 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3508 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3509 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3510 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3511 XS, EVEX_CD8<32, CD8VF>;
3513 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3514 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3515 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3516 SchedWriteVecMoveLS, "VMOVDQU">,
3517 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3519 // Special instructions to help with spilling when we don't have VLX. We need
3520 // to load or store from a ZMM register instead. These are converted in
3521 // expandPostRAPseudos.
3522 let isReMaterializable = 1, canFoldAsLoad = 1,
3523 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3524 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3525 "", []>, Sched<[WriteFLoadX]>;
3526 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3527 "", []>, Sched<[WriteFLoadY]>;
3528 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3529 "", []>, Sched<[WriteFLoadX]>;
3530 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3531 "", []>, Sched<[WriteFLoadY]>;
3534 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3535 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3536 "", []>, Sched<[WriteFStoreX]>;
3537 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3538 "", []>, Sched<[WriteFStoreY]>;
3539 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3540 "", []>, Sched<[WriteFStoreX]>;
3541 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3542 "", []>, Sched<[WriteFStoreY]>;
3545 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3546 (v8i64 VR512:$src))),
3547 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3550 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3551 (v16i32 VR512:$src))),
3552 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3554 // These patterns exist to prevent the above patterns from introducing a second
3555 // mask inversion when one already exists.
3556 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3557 (v8i64 immAllZerosV),
3558 (v8i64 VR512:$src))),
3559 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3560 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3561 (v16i32 immAllZerosV),
3562 (v16i32 VR512:$src))),
3563 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3565 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3566 X86VectorVTInfo Wide> {
3567 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3568 Narrow.RC:$src1, Narrow.RC:$src0)),
3571 (!cast<Instruction>(InstrStr#"rrk")
3572 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3573 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3574 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3577 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3578 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3581 (!cast<Instruction>(InstrStr#"rrkz")
3582 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3583 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3587 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3588 // available. Use a 512-bit operation and extract.
3589 let Predicates = [HasAVX512, NoVLX] in {
3590 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3591 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3592 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3593 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3595 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3596 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3597 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3598 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3601 let Predicates = [HasBWI, NoVLX] in {
3602 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3603 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3605 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3606 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3609 let Predicates = [HasAVX512] in {
3611 def : Pat<(alignedloadv16i32 addr:$src),
3612 (VMOVDQA64Zrm addr:$src)>;
3613 def : Pat<(alignedloadv32i16 addr:$src),
3614 (VMOVDQA64Zrm addr:$src)>;
3615 def : Pat<(alignedloadv64i8 addr:$src),
3616 (VMOVDQA64Zrm addr:$src)>;
3617 def : Pat<(loadv16i32 addr:$src),
3618 (VMOVDQU64Zrm addr:$src)>;
3619 def : Pat<(loadv32i16 addr:$src),
3620 (VMOVDQU64Zrm addr:$src)>;
3621 def : Pat<(loadv64i8 addr:$src),
3622 (VMOVDQU64Zrm addr:$src)>;
3625 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3626 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3627 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3628 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3629 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3630 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3631 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3632 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3633 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3634 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3635 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3636 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3639 let Predicates = [HasVLX] in {
3641 def : Pat<(alignedloadv4i32 addr:$src),
3642 (VMOVDQA64Z128rm addr:$src)>;
3643 def : Pat<(alignedloadv8i16 addr:$src),
3644 (VMOVDQA64Z128rm addr:$src)>;
3645 def : Pat<(alignedloadv16i8 addr:$src),
3646 (VMOVDQA64Z128rm addr:$src)>;
3647 def : Pat<(loadv4i32 addr:$src),
3648 (VMOVDQU64Z128rm addr:$src)>;
3649 def : Pat<(loadv8i16 addr:$src),
3650 (VMOVDQU64Z128rm addr:$src)>;
3651 def : Pat<(loadv16i8 addr:$src),
3652 (VMOVDQU64Z128rm addr:$src)>;
3655 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3656 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3657 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3658 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3659 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3660 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3661 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3662 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3663 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3664 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3665 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3666 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3669 def : Pat<(alignedloadv8i32 addr:$src),
3670 (VMOVDQA64Z256rm addr:$src)>;
3671 def : Pat<(alignedloadv16i16 addr:$src),
3672 (VMOVDQA64Z256rm addr:$src)>;
3673 def : Pat<(alignedloadv32i8 addr:$src),
3674 (VMOVDQA64Z256rm addr:$src)>;
3675 def : Pat<(loadv8i32 addr:$src),
3676 (VMOVDQU64Z256rm addr:$src)>;
3677 def : Pat<(loadv16i16 addr:$src),
3678 (VMOVDQU64Z256rm addr:$src)>;
3679 def : Pat<(loadv32i8 addr:$src),
3680 (VMOVDQU64Z256rm addr:$src)>;
3683 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3684 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3685 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3686 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3687 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3688 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3689 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3690 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3691 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3692 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3693 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3694 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3697 // Move Int Doubleword to Packed Double Int
3699 let ExeDomain = SSEPackedInt in {
3700 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3701 "vmovd\t{$src, $dst|$dst, $src}",
3703 (v4i32 (scalar_to_vector GR32:$src)))]>,
3704 EVEX, Sched<[WriteVecMoveFromGpr]>;
3705 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3706 "vmovd\t{$src, $dst|$dst, $src}",
3708 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3709 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3710 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3711 "vmovq\t{$src, $dst|$dst, $src}",
3713 (v2i64 (scalar_to_vector GR64:$src)))]>,
3714 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3715 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3716 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3718 "vmovq\t{$src, $dst|$dst, $src}", []>,
3719 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3720 let isCodeGenOnly = 1 in {
3721 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3722 "vmovq\t{$src, $dst|$dst, $src}",
3723 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3724 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3725 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3726 "vmovq\t{$src, $dst|$dst, $src}",
3727 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3728 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3730 } // ExeDomain = SSEPackedInt
3732 // Move Int Doubleword to Single Scalar
3734 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3735 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3736 "vmovd\t{$src, $dst|$dst, $src}",
3737 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3738 EVEX, Sched<[WriteVecMoveFromGpr]>;
3739 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3741 // Move doubleword from xmm register to r/m32
3743 let ExeDomain = SSEPackedInt in {
3744 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3745 "vmovd\t{$src, $dst|$dst, $src}",
3746 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3748 EVEX, Sched<[WriteVecMoveToGpr]>;
3749 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3750 (ins i32mem:$dst, VR128X:$src),
3751 "vmovd\t{$src, $dst|$dst, $src}",
3752 [(store (i32 (extractelt (v4i32 VR128X:$src),
3753 (iPTR 0))), addr:$dst)]>,
3754 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3755 } // ExeDomain = SSEPackedInt
3757 // Move quadword from xmm1 register to r/m64
3759 let ExeDomain = SSEPackedInt in {
3760 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3761 "vmovq\t{$src, $dst|$dst, $src}",
3762 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3764 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3765 Requires<[HasAVX512]>;
3767 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3768 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3769 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3770 EVEX, VEX_W, Sched<[WriteVecStore]>,
3771 Requires<[HasAVX512, In64BitMode]>;
3773 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3774 (ins i64mem:$dst, VR128X:$src),
3775 "vmovq\t{$src, $dst|$dst, $src}",
3776 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3778 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3779 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3781 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3782 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3784 "vmovq\t{$src, $dst|$dst, $src}", []>,
3785 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3786 } // ExeDomain = SSEPackedInt
3788 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3789 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3791 let Predicates = [HasAVX512] in {
3792 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3793 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3796 // Move Scalar Single to Double Int
3798 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3799 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3801 "vmovd\t{$src, $dst|$dst, $src}",
3802 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3803 EVEX, Sched<[WriteVecMoveToGpr]>;
3804 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3806 // Move Quadword Int to Packed Quadword Int
3808 let ExeDomain = SSEPackedInt in {
3809 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3811 "vmovq\t{$src, $dst|$dst, $src}",
3813 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3814 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3815 } // ExeDomain = SSEPackedInt
3817 // Allow "vmovd" but print "vmovq".
3818 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3819 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3820 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3821 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3823 //===----------------------------------------------------------------------===//
3824 // AVX-512 MOVSS, MOVSD
3825 //===----------------------------------------------------------------------===//
3827 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3828 X86VectorVTInfo _> {
3829 let Predicates = [HasAVX512, OptForSize] in
3830 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3831 (ins _.RC:$src1, _.RC:$src2),
3832 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3833 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3834 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3835 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3836 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3837 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3838 "$dst {${mask}} {z}, $src1, $src2}"),
3839 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3840 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3842 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3843 let Constraints = "$src0 = $dst" in
3844 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3845 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3846 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3847 "$dst {${mask}}, $src1, $src2}"),
3848 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3849 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3850 (_.VT _.RC:$src0))))],
3851 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3852 let canFoldAsLoad = 1, isReMaterializable = 1 in {
3853 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3854 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3855 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3856 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3857 // _alt version uses FR32/FR64 register class.
3858 let isCodeGenOnly = 1 in
3859 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3860 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3861 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3862 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3864 let mayLoad = 1, hasSideEffects = 0 in {
3865 let Constraints = "$src0 = $dst" in
3866 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3867 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3868 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3869 "$dst {${mask}}, $src}"),
3870 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3871 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3872 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3873 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3874 "$dst {${mask}} {z}, $src}"),
3875 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3877 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3878 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3879 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
3880 EVEX, Sched<[WriteFStore]>;
3881 let mayStore = 1, hasSideEffects = 0 in
3882 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3883 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3884 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3885 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3889 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3890 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3892 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3893 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3896 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3897 PatLeaf ZeroFP, X86VectorVTInfo _> {
3899 def : Pat<(_.VT (OpNode _.RC:$src0,
3900 (_.VT (scalar_to_vector
3901 (_.EltVT (X86selects VK1WM:$mask,
3902 (_.EltVT _.FRC:$src1),
3903 (_.EltVT _.FRC:$src2))))))),
3904 (!cast<Instruction>(InstrStr#rrk)
3905 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3908 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3910 def : Pat<(_.VT (OpNode _.RC:$src0,
3911 (_.VT (scalar_to_vector
3912 (_.EltVT (X86selects VK1WM:$mask,
3913 (_.EltVT _.FRC:$src1),
3914 (_.EltVT ZeroFP))))))),
3915 (!cast<Instruction>(InstrStr#rrkz)
3918 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3921 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3922 dag Mask, RegisterClass MaskRC> {
3924 def : Pat<(masked_store
3925 (_.info512.VT (insert_subvector undef,
3926 (_.info128.VT _.info128.RC:$src),
3927 (iPTR 0))), addr:$dst, Mask),
3928 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3929 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3930 _.info128.RC:$src)>;
3934 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3935 AVX512VLVectorVTInfo _,
3936 dag Mask, RegisterClass MaskRC,
3937 SubRegIndex subreg> {
3939 def : Pat<(masked_store
3940 (_.info512.VT (insert_subvector undef,
3941 (_.info128.VT _.info128.RC:$src),
3942 (iPTR 0))), addr:$dst, Mask),
3943 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3944 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3945 _.info128.RC:$src)>;
3949 // This matches the more recent codegen from clang that avoids emitting a 512
3950 // bit masked store directly. Codegen will widen 128-bit masked store to 512
3951 // bits on AVX512F only targets.
3952 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
3953 AVX512VLVectorVTInfo _,
3954 dag Mask512, dag Mask128,
3955 RegisterClass MaskRC,
3956 SubRegIndex subreg> {
3959 def : Pat<(masked_store
3960 (_.info512.VT (insert_subvector undef,
3961 (_.info128.VT _.info128.RC:$src),
3962 (iPTR 0))), addr:$dst, Mask512),
3963 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3964 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3965 _.info128.RC:$src)>;
3967 // AVX512VL pattern.
3968 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
3969 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3970 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3971 _.info128.RC:$src)>;
3974 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3975 dag Mask, RegisterClass MaskRC> {
3977 def : Pat<(_.info128.VT (extract_subvector
3978 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3979 _.info512.ImmAllZerosV)),
3981 (!cast<Instruction>(InstrStr#rmkz)
3982 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3985 def : Pat<(_.info128.VT (extract_subvector
3986 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3987 (_.info512.VT (insert_subvector undef,
3988 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3991 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3992 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3997 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3998 AVX512VLVectorVTInfo _,
3999 dag Mask, RegisterClass MaskRC,
4000 SubRegIndex subreg> {
4002 def : Pat<(_.info128.VT (extract_subvector
4003 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4004 _.info512.ImmAllZerosV)),
4006 (!cast<Instruction>(InstrStr#rmkz)
4007 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4010 def : Pat<(_.info128.VT (extract_subvector
4011 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4012 (_.info512.VT (insert_subvector undef,
4013 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4016 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4017 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4022 // This matches the more recent codegen from clang that avoids emitting a 512
4023 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4024 // bits on AVX512F only targets.
4025 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4026 AVX512VLVectorVTInfo _,
4027 dag Mask512, dag Mask128,
4028 RegisterClass MaskRC,
4029 SubRegIndex subreg> {
4030 // AVX512F patterns.
4031 def : Pat<(_.info128.VT (extract_subvector
4032 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4033 _.info512.ImmAllZerosV)),
4035 (!cast<Instruction>(InstrStr#rmkz)
4036 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4039 def : Pat<(_.info128.VT (extract_subvector
4040 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4041 (_.info512.VT (insert_subvector undef,
4042 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4045 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4046 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4049 // AVX512Vl patterns.
4050 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4051 _.info128.ImmAllZerosV)),
4052 (!cast<Instruction>(InstrStr#rmkz)
4053 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4056 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4057 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4058 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4059 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4063 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4064 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4066 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4067 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4068 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4069 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4070 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4071 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4073 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4074 (v16i1 (insert_subvector
4075 (v16i1 immAllZerosV),
4076 (v4i1 (extract_subvector
4077 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4080 (v4i1 (extract_subvector
4081 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4082 (iPTR 0))), GR8, sub_8bit>;
4083 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4088 (v16i1 immAllZerosV),
4089 (v2i1 (extract_subvector
4090 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4094 (v2i1 (extract_subvector
4095 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4096 (iPTR 0))), GR8, sub_8bit>;
4098 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4099 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4100 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4101 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4102 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4103 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4105 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4106 (v16i1 (insert_subvector
4107 (v16i1 immAllZerosV),
4108 (v4i1 (extract_subvector
4109 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4112 (v4i1 (extract_subvector
4113 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4114 (iPTR 0))), GR8, sub_8bit>;
4115 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4120 (v16i1 immAllZerosV),
4121 (v2i1 (extract_subvector
4122 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4126 (v2i1 (extract_subvector
4127 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4128 (iPTR 0))), GR8, sub_8bit>;
4130 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4131 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4132 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4133 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4134 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4136 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4137 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4138 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4140 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4141 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4142 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4143 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4144 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4146 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
4147 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4148 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4150 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4151 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4152 (ins VR128X:$src1, VR128X:$src2),
4153 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4154 []>, XS, EVEX_4V, VEX_LIG,
4155 FoldGenData<"VMOVSSZrr">,
4156 Sched<[SchedWriteFShuffle.XMM]>;
4158 let Constraints = "$src0 = $dst" in
4159 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4160 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4161 VR128X:$src1, VR128X:$src2),
4162 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4163 "$dst {${mask}}, $src1, $src2}",
4164 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4165 FoldGenData<"VMOVSSZrrk">,
4166 Sched<[SchedWriteFShuffle.XMM]>;
4168 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4169 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4170 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4171 "$dst {${mask}} {z}, $src1, $src2}",
4172 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4173 FoldGenData<"VMOVSSZrrkz">,
4174 Sched<[SchedWriteFShuffle.XMM]>;
4176 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4177 (ins VR128X:$src1, VR128X:$src2),
4178 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4179 []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4180 FoldGenData<"VMOVSDZrr">,
4181 Sched<[SchedWriteFShuffle.XMM]>;
4183 let Constraints = "$src0 = $dst" in
4184 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4185 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4186 VR128X:$src1, VR128X:$src2),
4187 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4188 "$dst {${mask}}, $src1, $src2}",
4189 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4190 VEX_W, FoldGenData<"VMOVSDZrrk">,
4191 Sched<[SchedWriteFShuffle.XMM]>;
4193 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4194 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4196 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4197 "$dst {${mask}} {z}, $src1, $src2}",
4198 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4199 VEX_W, FoldGenData<"VMOVSDZrrkz">,
4200 Sched<[SchedWriteFShuffle.XMM]>;
4203 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4204 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4205 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4206 "$dst {${mask}}, $src1, $src2}",
4207 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4208 VR128X:$src1, VR128X:$src2), 0>;
4209 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4210 "$dst {${mask}} {z}, $src1, $src2}",
4211 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4212 VR128X:$src1, VR128X:$src2), 0>;
4213 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4214 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4215 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4216 "$dst {${mask}}, $src1, $src2}",
4217 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4218 VR128X:$src1, VR128X:$src2), 0>;
4219 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4220 "$dst {${mask}} {z}, $src1, $src2}",
4221 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4222 VR128X:$src1, VR128X:$src2), 0>;
4224 let Predicates = [HasAVX512, OptForSize] in {
4225 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4226 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4227 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4228 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4230 // Move low f32 and clear high bits.
4231 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4232 (SUBREG_TO_REG (i32 0),
4233 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4234 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4235 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4236 (SUBREG_TO_REG (i32 0),
4237 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4238 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4240 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4241 (SUBREG_TO_REG (i32 0),
4242 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4243 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4244 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4245 (SUBREG_TO_REG (i32 0),
4246 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4247 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4250 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4251 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4252 let Predicates = [HasAVX512, OptForSpeed] in {
4253 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4254 (SUBREG_TO_REG (i32 0),
4255 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4256 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4257 (i8 1))), sub_xmm)>;
4258 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4259 (SUBREG_TO_REG (i32 0),
4260 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4261 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4262 (i8 3))), sub_xmm)>;
4265 let Predicates = [HasAVX512] in {
4266 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4267 (VMOVSSZrm addr:$src)>;
4268 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4269 (VMOVSDZrm addr:$src)>;
4271 // Represent the same patterns above but in the form they appear for
4273 def : Pat<(v8f32 (X86vzload32 addr:$src)),
4274 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4275 def : Pat<(v4f64 (X86vzload64 addr:$src)),
4276 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4278 // Represent the same patterns above but in the form they appear for
4280 def : Pat<(v16f32 (X86vzload32 addr:$src)),
4281 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4282 def : Pat<(v8f64 (X86vzload64 addr:$src)),
4283 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4286 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4287 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4289 "vmovq\t{$src, $dst|$dst, $src}",
4290 [(set VR128X:$dst, (v2i64 (X86vzmovl
4291 (v2i64 VR128X:$src))))]>,
4295 let Predicates = [HasAVX512] in {
4296 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4297 (VMOVDI2PDIZrr GR32:$src)>;
4299 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4300 (VMOV64toPQIZrr GR64:$src)>;
4302 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4303 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4304 (VMOVDI2PDIZrm addr:$src)>;
4305 def : Pat<(v4i32 (X86vzload32 addr:$src)),
4306 (VMOVDI2PDIZrm addr:$src)>;
4307 def : Pat<(v8i32 (X86vzload32 addr:$src)),
4308 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4309 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4310 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4311 def : Pat<(v2i64 (X86vzload64 addr:$src)),
4312 (VMOVQI2PQIZrm addr:$src)>;
4313 def : Pat<(v4i64 (X86vzload64 addr:$src)),
4314 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4316 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4317 def : Pat<(v16i32 (X86vzload32 addr:$src)),
4318 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4319 def : Pat<(v8i64 (X86vzload64 addr:$src)),
4320 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4322 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4323 (SUBREG_TO_REG (i32 0),
4324 (v2f64 (VMOVZPQILo2PQIZrr
4325 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4327 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4328 (SUBREG_TO_REG (i32 0),
4329 (v2i64 (VMOVZPQILo2PQIZrr
4330 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4333 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4334 (SUBREG_TO_REG (i32 0),
4335 (v2f64 (VMOVZPQILo2PQIZrr
4336 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4338 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4339 (SUBREG_TO_REG (i32 0),
4340 (v2i64 (VMOVZPQILo2PQIZrr
4341 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4345 //===----------------------------------------------------------------------===//
4346 // AVX-512 - Non-temporals
4347 //===----------------------------------------------------------------------===//
4349 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4350 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4351 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4352 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4354 let Predicates = [HasVLX] in {
4355 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4357 "vmovntdqa\t{$src, $dst|$dst, $src}",
4358 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4359 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4361 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4363 "vmovntdqa\t{$src, $dst|$dst, $src}",
4364 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4365 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4368 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4369 X86SchedWriteMoveLS Sched,
4370 PatFrag st_frag = alignednontemporalstore> {
4371 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4372 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4373 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4374 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4375 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4378 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4379 AVX512VLVectorVTInfo VTInfo,
4380 X86SchedWriteMoveLSWidths Sched> {
4381 let Predicates = [HasAVX512] in
4382 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4384 let Predicates = [HasAVX512, HasVLX] in {
4385 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4386 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4390 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4391 SchedWriteVecMoveLSNT>, PD;
4392 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4393 SchedWriteFMoveLSNT>, PD, VEX_W;
4394 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4395 SchedWriteFMoveLSNT>, PS;
4397 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4398 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4399 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4400 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4401 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4402 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4403 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4405 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4406 (VMOVNTDQAZrm addr:$src)>;
4407 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4408 (VMOVNTDQAZrm addr:$src)>;
4409 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4410 (VMOVNTDQAZrm addr:$src)>;
4411 def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4412 (VMOVNTDQAZrm addr:$src)>;
4413 def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4414 (VMOVNTDQAZrm addr:$src)>;
4415 def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4416 (VMOVNTDQAZrm addr:$src)>;
4419 let Predicates = [HasVLX], AddedComplexity = 400 in {
4420 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4421 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4422 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4423 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4424 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4425 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4427 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4428 (VMOVNTDQAZ256rm addr:$src)>;
4429 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4430 (VMOVNTDQAZ256rm addr:$src)>;
4431 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4432 (VMOVNTDQAZ256rm addr:$src)>;
4433 def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4434 (VMOVNTDQAZ256rm addr:$src)>;
4435 def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4436 (VMOVNTDQAZ256rm addr:$src)>;
4437 def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4438 (VMOVNTDQAZ256rm addr:$src)>;
4440 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4441 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4442 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4443 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4444 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4445 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4447 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4448 (VMOVNTDQAZ128rm addr:$src)>;
4449 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4450 (VMOVNTDQAZ128rm addr:$src)>;
4451 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4452 (VMOVNTDQAZ128rm addr:$src)>;
4453 def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4454 (VMOVNTDQAZ128rm addr:$src)>;
4455 def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4456 (VMOVNTDQAZ128rm addr:$src)>;
4457 def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4458 (VMOVNTDQAZ128rm addr:$src)>;
4461 //===----------------------------------------------------------------------===//
4462 // AVX-512 - Integer arithmetic
4464 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4465 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4466 bit IsCommutable = 0> {
4467 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4468 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4469 "$src2, $src1", "$src1, $src2",
4470 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4471 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4474 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4475 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4476 "$src2, $src1", "$src1, $src2",
4477 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4478 AVX512BIBase, EVEX_4V,
4479 Sched<[sched.Folded, sched.ReadAfterFold]>;
4482 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4483 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4484 bit IsCommutable = 0> :
4485 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4486 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4487 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4488 "${src2}"##_.BroadcastStr##", $src1",
4489 "$src1, ${src2}"##_.BroadcastStr,
4490 (_.VT (OpNode _.RC:$src1,
4492 (_.ScalarLdFrag addr:$src2))))>,
4493 AVX512BIBase, EVEX_4V, EVEX_B,
4494 Sched<[sched.Folded, sched.ReadAfterFold]>;
4497 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4498 AVX512VLVectorVTInfo VTInfo,
4499 X86SchedWriteWidths sched, Predicate prd,
4500 bit IsCommutable = 0> {
4501 let Predicates = [prd] in
4502 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4503 IsCommutable>, EVEX_V512;
4505 let Predicates = [prd, HasVLX] in {
4506 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4507 sched.YMM, IsCommutable>, EVEX_V256;
4508 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4509 sched.XMM, IsCommutable>, EVEX_V128;
4513 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4514 AVX512VLVectorVTInfo VTInfo,
4515 X86SchedWriteWidths sched, Predicate prd,
4516 bit IsCommutable = 0> {
4517 let Predicates = [prd] in
4518 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4519 IsCommutable>, EVEX_V512;
4521 let Predicates = [prd, HasVLX] in {
4522 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4523 sched.YMM, IsCommutable>, EVEX_V256;
4524 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4525 sched.XMM, IsCommutable>, EVEX_V128;
4529 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4530 X86SchedWriteWidths sched, Predicate prd,
4531 bit IsCommutable = 0> {
4532 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4533 sched, prd, IsCommutable>,
4534 VEX_W, EVEX_CD8<64, CD8VF>;
4537 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4538 X86SchedWriteWidths sched, Predicate prd,
4539 bit IsCommutable = 0> {
4540 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4541 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4544 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4545 X86SchedWriteWidths sched, Predicate prd,
4546 bit IsCommutable = 0> {
4547 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4548 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4552 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4553 X86SchedWriteWidths sched, Predicate prd,
4554 bit IsCommutable = 0> {
4555 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4556 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4560 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4561 SDNode OpNode, X86SchedWriteWidths sched,
4562 Predicate prd, bit IsCommutable = 0> {
4563 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4566 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4570 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4571 SDNode OpNode, X86SchedWriteWidths sched,
4572 Predicate prd, bit IsCommutable = 0> {
4573 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4576 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4580 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4581 bits<8> opc_d, bits<8> opc_q,
4582 string OpcodeStr, SDNode OpNode,
4583 X86SchedWriteWidths sched,
4584 bit IsCommutable = 0> {
4585 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4586 sched, HasAVX512, IsCommutable>,
4587 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4588 sched, HasBWI, IsCommutable>;
4591 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4592 X86FoldableSchedWrite sched,
4593 SDNode OpNode,X86VectorVTInfo _Src,
4594 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4595 bit IsCommutable = 0> {
4596 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4597 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4598 "$src2, $src1","$src1, $src2",
4600 (_Src.VT _Src.RC:$src1),
4601 (_Src.VT _Src.RC:$src2))),
4603 AVX512BIBase, EVEX_4V, Sched<[sched]>;
4604 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4605 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4606 "$src2, $src1", "$src1, $src2",
4607 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4608 (_Src.LdFrag addr:$src2)))>,
4609 AVX512BIBase, EVEX_4V,
4610 Sched<[sched.Folded, sched.ReadAfterFold]>;
4612 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4613 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4615 "${src2}"##_Brdct.BroadcastStr##", $src1",
4616 "$src1, ${src2}"##_Brdct.BroadcastStr,
4617 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4618 (_Brdct.VT (X86VBroadcast
4619 (_Brdct.ScalarLdFrag addr:$src2))))))>,
4620 AVX512BIBase, EVEX_4V, EVEX_B,
4621 Sched<[sched.Folded, sched.ReadAfterFold]>;
4624 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4625 SchedWriteVecALU, 1>;
4626 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4627 SchedWriteVecALU, 0>;
4628 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4629 SchedWriteVecALU, HasBWI, 1>;
4630 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4631 SchedWriteVecALU, HasBWI, 0>;
4632 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4633 SchedWriteVecALU, HasBWI, 1>;
4634 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4635 SchedWriteVecALU, HasBWI, 0>;
4636 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4637 SchedWritePMULLD, HasAVX512, 1>, T8PD;
4638 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4639 SchedWriteVecIMul, HasBWI, 1>;
4640 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4641 SchedWriteVecIMul, HasDQI, 1>, T8PD,
4642 NotEVEX2VEXConvertible;
4643 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4645 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4647 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4648 SchedWriteVecIMul, HasBWI, 1>, T8PD;
4649 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4650 SchedWriteVecALU, HasBWI, 1>;
4651 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4652 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4653 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4654 SchedWriteVecIMul, HasAVX512, 1>;
4656 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4657 X86SchedWriteWidths sched,
4658 AVX512VLVectorVTInfo _SrcVTInfo,
4659 AVX512VLVectorVTInfo _DstVTInfo,
4660 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4661 let Predicates = [prd] in
4662 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4663 _SrcVTInfo.info512, _DstVTInfo.info512,
4664 v8i64_info, IsCommutable>,
4665 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4666 let Predicates = [HasVLX, prd] in {
4667 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4668 _SrcVTInfo.info256, _DstVTInfo.info256,
4669 v4i64x_info, IsCommutable>,
4670 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4671 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4672 _SrcVTInfo.info128, _DstVTInfo.info128,
4673 v2i64x_info, IsCommutable>,
4674 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4678 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4679 avx512vl_i8_info, avx512vl_i8_info,
4680 X86multishift, HasVBMI, 0>, T8PD;
4682 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4683 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4684 X86FoldableSchedWrite sched> {
4685 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4686 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4688 "${src2}"##_Src.BroadcastStr##", $src1",
4689 "$src1, ${src2}"##_Src.BroadcastStr,
4690 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4691 (_Src.VT (X86VBroadcast
4692 (_Src.ScalarLdFrag addr:$src2))))))>,
4693 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4694 Sched<[sched.Folded, sched.ReadAfterFold]>;
4697 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4698 SDNode OpNode,X86VectorVTInfo _Src,
4699 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4700 bit IsCommutable = 0> {
4701 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4702 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4703 "$src2, $src1","$src1, $src2",
4705 (_Src.VT _Src.RC:$src1),
4706 (_Src.VT _Src.RC:$src2))),
4707 IsCommutable, IsCommutable>,
4708 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4709 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4710 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4711 "$src2, $src1", "$src1, $src2",
4712 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4713 (_Src.LdFrag addr:$src2)))>,
4714 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4715 Sched<[sched.Folded, sched.ReadAfterFold]>;
4718 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4720 let Predicates = [HasBWI] in
4721 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4722 v32i16_info, SchedWriteShuffle.ZMM>,
4723 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4724 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4725 let Predicates = [HasBWI, HasVLX] in {
4726 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4727 v16i16x_info, SchedWriteShuffle.YMM>,
4728 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4729 v16i16x_info, SchedWriteShuffle.YMM>,
4731 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4732 v8i16x_info, SchedWriteShuffle.XMM>,
4733 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4734 v8i16x_info, SchedWriteShuffle.XMM>,
4738 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4740 let Predicates = [HasBWI] in
4741 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4742 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4743 let Predicates = [HasBWI, HasVLX] in {
4744 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4745 v32i8x_info, SchedWriteShuffle.YMM>,
4747 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4748 v16i8x_info, SchedWriteShuffle.XMM>,
4753 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4754 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4755 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4756 let Predicates = [HasBWI] in
4757 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4758 _Dst.info512, SchedWriteVecIMul.ZMM,
4759 IsCommutable>, EVEX_V512;
4760 let Predicates = [HasBWI, HasVLX] in {
4761 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4762 _Dst.info256, SchedWriteVecIMul.YMM,
4763 IsCommutable>, EVEX_V256;
4764 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4765 _Dst.info128, SchedWriteVecIMul.XMM,
4766 IsCommutable>, EVEX_V128;
4770 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4771 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4772 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4773 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4775 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4776 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4777 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4778 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4780 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4781 SchedWriteVecALU, HasBWI, 1>, T8PD;
4782 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4783 SchedWriteVecALU, HasBWI, 1>;
4784 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4785 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4786 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4787 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4788 NotEVEX2VEXConvertible;
4790 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4791 SchedWriteVecALU, HasBWI, 1>;
4792 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4793 SchedWriteVecALU, HasBWI, 1>, T8PD;
4794 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4795 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4796 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4797 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4798 NotEVEX2VEXConvertible;
4800 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4801 SchedWriteVecALU, HasBWI, 1>, T8PD;
4802 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4803 SchedWriteVecALU, HasBWI, 1>;
4804 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4805 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4806 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4807 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4808 NotEVEX2VEXConvertible;
4810 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4811 SchedWriteVecALU, HasBWI, 1>;
4812 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4813 SchedWriteVecALU, HasBWI, 1>, T8PD;
4814 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4815 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4816 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4817 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4818 NotEVEX2VEXConvertible;
4820 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4821 let Predicates = [HasDQI, NoVLX] in {
4822 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4825 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4826 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4829 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4832 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4833 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4837 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4838 let Predicates = [HasDQI, NoVLX] in {
4839 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4842 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4843 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4846 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4849 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4850 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4854 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4855 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4858 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4859 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4862 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4865 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4866 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4870 let Predicates = [HasAVX512, NoVLX] in {
4871 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4872 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4873 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4874 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4877 //===----------------------------------------------------------------------===//
4878 // AVX-512 Logical Instructions
4879 //===----------------------------------------------------------------------===//
4881 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
4882 SchedWriteVecLogic, HasAVX512, 1>;
4883 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
4884 SchedWriteVecLogic, HasAVX512, 1>;
4885 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
4886 SchedWriteVecLogic, HasAVX512, 1>;
4887 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
4888 SchedWriteVecLogic, HasAVX512>;
4890 let Predicates = [HasVLX] in {
4891 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
4892 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
4893 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
4894 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
4896 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
4897 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
4898 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
4899 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
4901 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
4902 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
4903 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
4904 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
4906 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
4907 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
4908 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
4909 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
4911 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
4912 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
4913 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
4914 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
4916 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
4917 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
4918 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
4919 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
4921 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
4922 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
4923 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
4924 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
4926 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
4927 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
4928 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
4929 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
4931 def : Pat<(and VR128X:$src1,
4932 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4933 (VPANDDZ128rmb VR128X:$src1, addr:$src2)>;
4934 def : Pat<(or VR128X:$src1,
4935 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4936 (VPORDZ128rmb VR128X:$src1, addr:$src2)>;
4937 def : Pat<(xor VR128X:$src1,
4938 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4939 (VPXORDZ128rmb VR128X:$src1, addr:$src2)>;
4940 def : Pat<(X86andnp VR128X:$src1,
4941 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4942 (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>;
4944 def : Pat<(and VR128X:$src1,
4945 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4946 (VPANDQZ128rmb VR128X:$src1, addr:$src2)>;
4947 def : Pat<(or VR128X:$src1,
4948 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4949 (VPORQZ128rmb VR128X:$src1, addr:$src2)>;
4950 def : Pat<(xor VR128X:$src1,
4951 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4952 (VPXORQZ128rmb VR128X:$src1, addr:$src2)>;
4953 def : Pat<(X86andnp VR128X:$src1,
4954 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4955 (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>;
4957 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
4958 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
4959 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
4960 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
4962 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
4963 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
4964 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
4965 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
4967 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
4968 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
4969 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
4970 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
4972 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
4973 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
4974 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
4975 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
4977 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
4978 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
4979 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
4980 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
4982 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
4983 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
4984 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
4985 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
4987 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
4988 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
4989 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
4990 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
4992 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
4993 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
4994 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
4995 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
4997 def : Pat<(and VR256X:$src1,
4998 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
4999 (VPANDDZ256rmb VR256X:$src1, addr:$src2)>;
5000 def : Pat<(or VR256X:$src1,
5001 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5002 (VPORDZ256rmb VR256X:$src1, addr:$src2)>;
5003 def : Pat<(xor VR256X:$src1,
5004 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5005 (VPXORDZ256rmb VR256X:$src1, addr:$src2)>;
5006 def : Pat<(X86andnp VR256X:$src1,
5007 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5008 (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>;
5010 def : Pat<(and VR256X:$src1,
5011 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5012 (VPANDQZ256rmb VR256X:$src1, addr:$src2)>;
5013 def : Pat<(or VR256X:$src1,
5014 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5015 (VPORQZ256rmb VR256X:$src1, addr:$src2)>;
5016 def : Pat<(xor VR256X:$src1,
5017 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5018 (VPXORQZ256rmb VR256X:$src1, addr:$src2)>;
5019 def : Pat<(X86andnp VR256X:$src1,
5020 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5021 (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>;
5024 let Predicates = [HasAVX512] in {
5025 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5026 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5027 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5028 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5030 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5031 (VPORQZrr VR512:$src1, VR512:$src2)>;
5032 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5033 (VPORQZrr VR512:$src1, VR512:$src2)>;
5035 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5036 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5037 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5038 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5040 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5041 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5042 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5043 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5045 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5046 (VPANDQZrm VR512:$src1, addr:$src2)>;
5047 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5048 (VPANDQZrm VR512:$src1, addr:$src2)>;
5050 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5051 (VPORQZrm VR512:$src1, addr:$src2)>;
5052 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5053 (VPORQZrm VR512:$src1, addr:$src2)>;
5055 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5056 (VPXORQZrm VR512:$src1, addr:$src2)>;
5057 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5058 (VPXORQZrm VR512:$src1, addr:$src2)>;
5060 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5061 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5062 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5063 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5065 def : Pat<(and VR512:$src1,
5066 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5067 (VPANDDZrmb VR512:$src1, addr:$src2)>;
5068 def : Pat<(or VR512:$src1,
5069 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5070 (VPORDZrmb VR512:$src1, addr:$src2)>;
5071 def : Pat<(xor VR512:$src1,
5072 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5073 (VPXORDZrmb VR512:$src1, addr:$src2)>;
5074 def : Pat<(X86andnp VR512:$src1,
5075 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5076 (VPANDNDZrmb VR512:$src1, addr:$src2)>;
5078 def : Pat<(and VR512:$src1,
5079 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5080 (VPANDQZrmb VR512:$src1, addr:$src2)>;
5081 def : Pat<(or VR512:$src1,
5082 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5083 (VPORQZrmb VR512:$src1, addr:$src2)>;
5084 def : Pat<(xor VR512:$src1,
5085 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5086 (VPXORQZrmb VR512:$src1, addr:$src2)>;
5087 def : Pat<(X86andnp VR512:$src1,
5088 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5089 (VPANDNQZrmb VR512:$src1, addr:$src2)>;
5092 // Patterns to catch vselect with different type than logic op.
5093 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5095 X86VectorVTInfo IntInfo> {
5096 // Masked register-register logical operations.
5097 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5098 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5100 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5101 _.RC:$src1, _.RC:$src2)>;
5103 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5104 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5106 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5109 // Masked register-memory logical operations.
5110 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5111 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5112 (load addr:$src2)))),
5114 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5115 _.RC:$src1, addr:$src2)>;
5116 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5117 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5118 (load addr:$src2)))),
5120 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5124 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5126 X86VectorVTInfo IntInfo> {
5127 // Register-broadcast logical operations.
5128 def : Pat<(IntInfo.VT (OpNode _.RC:$src1,
5129 (bitconvert (_.VT (X86VBroadcast
5130 (_.ScalarLdFrag addr:$src2)))))),
5131 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5132 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5134 (IntInfo.VT (OpNode _.RC:$src1,
5137 (_.ScalarLdFrag addr:$src2))))))),
5139 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5140 _.RC:$src1, addr:$src2)>;
5141 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5143 (IntInfo.VT (OpNode _.RC:$src1,
5146 (_.ScalarLdFrag addr:$src2))))))),
5148 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5149 _.RC:$src1, addr:$src2)>;
5152 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5153 AVX512VLVectorVTInfo SelectInfo,
5154 AVX512VLVectorVTInfo IntInfo> {
5155 let Predicates = [HasVLX] in {
5156 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5158 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5161 let Predicates = [HasAVX512] in {
5162 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5167 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5168 AVX512VLVectorVTInfo SelectInfo,
5169 AVX512VLVectorVTInfo IntInfo> {
5170 let Predicates = [HasVLX] in {
5171 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5172 SelectInfo.info128, IntInfo.info128>;
5173 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5174 SelectInfo.info256, IntInfo.info256>;
5176 let Predicates = [HasAVX512] in {
5177 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5178 SelectInfo.info512, IntInfo.info512>;
5182 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5183 // i64 vselect with i32/i16/i8 logic op
5184 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5186 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5188 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5191 // i32 vselect with i64/i16/i8 logic op
5192 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5194 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5196 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5199 // f32 vselect with i64/i32/i16/i8 logic op
5200 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5202 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5204 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5206 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5209 // f64 vselect with i64/i32/i16/i8 logic op
5210 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5212 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5214 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5216 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5219 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5222 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5227 defm : avx512_logical_lowering_types<"VPAND", and>;
5228 defm : avx512_logical_lowering_types<"VPOR", or>;
5229 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5230 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5232 //===----------------------------------------------------------------------===//
5233 // AVX-512 FP arithmetic
5234 //===----------------------------------------------------------------------===//
5236 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5237 SDNode OpNode, SDNode VecNode,
5238 X86FoldableSchedWrite sched, bit IsCommutable> {
5239 let ExeDomain = _.ExeDomain in {
5240 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5241 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5242 "$src2, $src1", "$src1, $src2",
5243 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5246 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5247 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5248 "$src2, $src1", "$src1, $src2",
5249 (_.VT (VecNode _.RC:$src1,
5250 _.ScalarIntMemCPat:$src2))>,
5251 Sched<[sched.Folded, sched.ReadAfterFold]>;
5252 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5253 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5254 (ins _.FRC:$src1, _.FRC:$src2),
5255 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5256 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5258 let isCommutable = IsCommutable;
5260 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5261 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5262 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5263 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5264 (_.ScalarLdFrag addr:$src2)))]>,
5265 Sched<[sched.Folded, sched.ReadAfterFold]>;
5270 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5271 SDNode VecNode, X86FoldableSchedWrite sched,
5272 bit IsCommutable = 0> {
5273 let ExeDomain = _.ExeDomain in
5274 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5275 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5276 "$rc, $src2, $src1", "$src1, $src2, $rc",
5277 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5279 EVEX_B, EVEX_RC, Sched<[sched]>;
5281 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5282 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5283 X86FoldableSchedWrite sched, bit IsCommutable> {
5284 let ExeDomain = _.ExeDomain in {
5285 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5286 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5287 "$src2, $src1", "$src1, $src2",
5288 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5291 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5292 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5293 "$src2, $src1", "$src1, $src2",
5294 (_.VT (VecNode _.RC:$src1,
5295 _.ScalarIntMemCPat:$src2))>,
5296 Sched<[sched.Folded, sched.ReadAfterFold]>;
5298 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5299 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5300 (ins _.FRC:$src1, _.FRC:$src2),
5301 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5302 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5304 let isCommutable = IsCommutable;
5306 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5307 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5308 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5309 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5310 (_.ScalarLdFrag addr:$src2)))]>,
5311 Sched<[sched.Folded, sched.ReadAfterFold]>;
5314 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5315 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5316 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5317 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5318 EVEX_B, Sched<[sched]>;
5322 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5323 SDNode VecNode, SDNode RndNode,
5324 X86SchedWriteSizes sched, bit IsCommutable> {
5325 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5326 sched.PS.Scl, IsCommutable>,
5327 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5328 sched.PS.Scl, IsCommutable>,
5329 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5330 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5331 sched.PD.Scl, IsCommutable>,
5332 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5333 sched.PD.Scl, IsCommutable>,
5334 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5337 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5338 SDNode VecNode, SDNode SaeNode,
5339 X86SchedWriteSizes sched, bit IsCommutable> {
5340 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5341 VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5342 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5343 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5344 VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5345 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5347 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
5348 SchedWriteFAddSizes, 1>;
5349 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
5350 SchedWriteFMulSizes, 1>;
5351 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
5352 SchedWriteFAddSizes, 0>;
5353 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
5354 SchedWriteFDivSizes, 0>;
5355 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5356 SchedWriteFCmpSizes, 0>;
5357 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5358 SchedWriteFCmpSizes, 0>;
5360 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5361 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5362 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5363 X86VectorVTInfo _, SDNode OpNode,
5364 X86FoldableSchedWrite sched> {
5365 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5366 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5367 (ins _.FRC:$src1, _.FRC:$src2),
5368 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5369 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5371 let isCommutable = 1;
5373 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5374 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5375 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5376 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5377 (_.ScalarLdFrag addr:$src2)))]>,
5378 Sched<[sched.Folded, sched.ReadAfterFold]>;
5381 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5382 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5383 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5385 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5386 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5387 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5389 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5390 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5391 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5393 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5394 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5395 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5397 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5398 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5400 bit IsKCommutable = IsCommutable> {
5401 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5402 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5403 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5404 "$src2, $src1", "$src1, $src2",
5405 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5406 IsKCommutable, IsKCommutable>,
5407 EVEX_4V, Sched<[sched]>;
5408 let mayLoad = 1 in {
5409 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5410 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5411 "$src2, $src1", "$src1, $src2",
5412 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5413 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5414 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5415 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5416 "${src2}"##_.BroadcastStr##", $src1",
5417 "$src1, ${src2}"##_.BroadcastStr,
5418 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5419 (_.ScalarLdFrag addr:$src2))))>,
5421 Sched<[sched.Folded, sched.ReadAfterFold]>;
5426 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5427 SDPatternOperator OpNodeRnd,
5428 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5429 let ExeDomain = _.ExeDomain in
5430 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5431 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5432 "$rc, $src2, $src1", "$src1, $src2, $rc",
5433 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5434 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5437 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5438 SDPatternOperator OpNodeSAE,
5439 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5440 let ExeDomain = _.ExeDomain in
5441 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5442 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5443 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5444 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5445 EVEX_4V, EVEX_B, Sched<[sched]>;
5448 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5449 Predicate prd, X86SchedWriteSizes sched,
5450 bit IsCommutable = 0,
5451 bit IsPD128Commutable = IsCommutable> {
5452 let Predicates = [prd] in {
5453 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5454 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5455 EVEX_CD8<32, CD8VF>;
5456 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5457 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5458 EVEX_CD8<64, CD8VF>;
5461 // Define only if AVX512VL feature is present.
5462 let Predicates = [prd, HasVLX] in {
5463 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5464 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5465 EVEX_CD8<32, CD8VF>;
5466 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5467 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5468 EVEX_CD8<32, CD8VF>;
5469 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5470 sched.PD.XMM, IsPD128Commutable,
5471 IsCommutable>, EVEX_V128, PD, VEX_W,
5472 EVEX_CD8<64, CD8VF>;
5473 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5474 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5475 EVEX_CD8<64, CD8VF>;
5479 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5480 X86SchedWriteSizes sched> {
5481 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5483 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5484 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5486 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5489 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5490 X86SchedWriteSizes sched> {
5491 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5493 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5494 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5496 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5499 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5500 SchedWriteFAddSizes, 1>,
5501 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5502 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5503 SchedWriteFMulSizes, 1>,
5504 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5505 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5506 SchedWriteFAddSizes>,
5507 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5508 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5509 SchedWriteFDivSizes>,
5510 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5511 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5512 SchedWriteFCmpSizes, 0>,
5513 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5514 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5515 SchedWriteFCmpSizes, 0>,
5516 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5517 let isCodeGenOnly = 1 in {
5518 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5519 SchedWriteFCmpSizes, 1>;
5520 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5521 SchedWriteFCmpSizes, 1>;
5523 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5524 SchedWriteFLogicSizes, 1>;
5525 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5526 SchedWriteFLogicSizes, 0>;
5527 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5528 SchedWriteFLogicSizes, 1>;
5529 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5530 SchedWriteFLogicSizes, 1>;
5532 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5533 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5534 let ExeDomain = _.ExeDomain in {
5535 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5536 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5537 "$src2, $src1", "$src1, $src2",
5538 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5539 EVEX_4V, Sched<[sched]>;
5540 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5541 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5542 "$src2, $src1", "$src1, $src2",
5543 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5544 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5545 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5546 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5547 "${src2}"##_.BroadcastStr##", $src1",
5548 "$src1, ${src2}"##_.BroadcastStr,
5549 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5550 (_.ScalarLdFrag addr:$src2))))>,
5551 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5555 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5556 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5557 let ExeDomain = _.ExeDomain in {
5558 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5559 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5560 "$src2, $src1", "$src1, $src2",
5561 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5563 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5564 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5565 "$src2, $src1", "$src1, $src2",
5566 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
5567 Sched<[sched.Folded, sched.ReadAfterFold]>;
5571 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5572 X86SchedWriteWidths sched> {
5573 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5574 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5575 EVEX_V512, EVEX_CD8<32, CD8VF>;
5576 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5577 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5578 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5579 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5580 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
5581 X86scalefsRnd, sched.Scl>,
5582 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5583 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5584 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
5585 X86scalefsRnd, sched.Scl>,
5586 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5588 // Define only if AVX512VL feature is present.
5589 let Predicates = [HasVLX] in {
5590 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5591 EVEX_V128, EVEX_CD8<32, CD8VF>;
5592 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5593 EVEX_V256, EVEX_CD8<32, CD8VF>;
5594 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5595 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5596 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5597 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5600 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5601 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5603 //===----------------------------------------------------------------------===//
5604 // AVX-512 VPTESTM instructions
5605 //===----------------------------------------------------------------------===//
5607 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5608 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5610 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5611 // There are just too many permuations due to commutability and bitcasts.
5612 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5613 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5614 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5615 "$src2, $src1", "$src1, $src2",
5616 (null_frag), (null_frag), 1>,
5617 EVEX_4V, Sched<[sched]>;
5619 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5620 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5621 "$src2, $src1", "$src1, $src2",
5622 (null_frag), (null_frag)>,
5623 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5624 Sched<[sched.Folded, sched.ReadAfterFold]>;
5628 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5629 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5630 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5631 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5632 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5633 "${src2}"##_.BroadcastStr##", $src1",
5634 "$src1, ${src2}"##_.BroadcastStr,
5635 (null_frag), (null_frag)>,
5636 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5637 Sched<[sched.Folded, sched.ReadAfterFold]>;
5640 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5641 X86SchedWriteWidths sched,
5642 AVX512VLVectorVTInfo _> {
5643 let Predicates = [HasAVX512] in
5644 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5645 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5647 let Predicates = [HasAVX512, HasVLX] in {
5648 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5649 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5650 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5651 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5655 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5656 X86SchedWriteWidths sched> {
5657 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5659 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5660 avx512vl_i64_info>, VEX_W;
5663 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5664 X86SchedWriteWidths sched> {
5665 let Predicates = [HasBWI] in {
5666 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5667 v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5668 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5669 v64i8_info, NAME#"B">, EVEX_V512;
5671 let Predicates = [HasVLX, HasBWI] in {
5673 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5674 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5675 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5676 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5677 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5678 v32i8x_info, NAME#"B">, EVEX_V256;
5679 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5680 v16i8x_info, NAME#"B">, EVEX_V128;
5684 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5685 X86SchedWriteWidths sched> :
5686 avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5687 avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5689 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5690 SchedWriteVecLogic>, T8PD;
5691 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5692 SchedWriteVecLogic>, T8XS;
5694 //===----------------------------------------------------------------------===//
5695 // AVX-512 Shift instructions
5696 //===----------------------------------------------------------------------===//
5698 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5699 string OpcodeStr, SDNode OpNode,
5700 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5701 let ExeDomain = _.ExeDomain in {
5702 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5703 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5704 "$src2, $src1", "$src1, $src2",
5705 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
5707 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5708 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5709 "$src2, $src1", "$src1, $src2",
5710 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5712 Sched<[sched.Folded]>;
5716 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5717 string OpcodeStr, SDNode OpNode,
5718 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5719 let ExeDomain = _.ExeDomain in
5720 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5721 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5722 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5723 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
5724 EVEX_B, Sched<[sched.Folded]>;
5727 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5728 X86FoldableSchedWrite sched, ValueType SrcVT,
5729 X86VectorVTInfo _> {
5730 // src2 is always 128-bit
5731 let ExeDomain = _.ExeDomain in {
5732 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5733 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5734 "$src2, $src1", "$src1, $src2",
5735 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5736 AVX512BIBase, EVEX_4V, Sched<[sched]>;
5737 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5738 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5739 "$src2, $src1", "$src1, $src2",
5740 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5742 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5746 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5747 X86SchedWriteWidths sched, ValueType SrcVT,
5748 AVX512VLVectorVTInfo VTInfo,
5750 let Predicates = [prd] in
5751 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5752 VTInfo.info512>, EVEX_V512,
5753 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5754 let Predicates = [prd, HasVLX] in {
5755 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5756 VTInfo.info256>, EVEX_V256,
5757 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5758 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5759 VTInfo.info128>, EVEX_V128,
5760 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5764 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5765 string OpcodeStr, SDNode OpNode,
5766 X86SchedWriteWidths sched,
5767 bit NotEVEX2VEXConvertibleQ = 0> {
5768 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5769 avx512vl_i32_info, HasAVX512>;
5770 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5771 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5772 avx512vl_i64_info, HasAVX512>, VEX_W;
5773 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5774 avx512vl_i16_info, HasBWI>;
5777 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5778 string OpcodeStr, SDNode OpNode,
5779 X86SchedWriteWidths sched,
5780 AVX512VLVectorVTInfo VTInfo> {
5781 let Predicates = [HasAVX512] in
5782 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5783 sched.ZMM, VTInfo.info512>,
5784 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5785 VTInfo.info512>, EVEX_V512;
5786 let Predicates = [HasAVX512, HasVLX] in {
5787 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5788 sched.YMM, VTInfo.info256>,
5789 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5790 VTInfo.info256>, EVEX_V256;
5791 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5792 sched.XMM, VTInfo.info128>,
5793 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5794 VTInfo.info128>, EVEX_V128;
5798 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5799 string OpcodeStr, SDNode OpNode,
5800 X86SchedWriteWidths sched> {
5801 let Predicates = [HasBWI] in
5802 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5803 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5804 let Predicates = [HasVLX, HasBWI] in {
5805 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5806 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5807 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5808 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5812 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5813 Format ImmFormR, Format ImmFormM,
5814 string OpcodeStr, SDNode OpNode,
5815 X86SchedWriteWidths sched,
5816 bit NotEVEX2VEXConvertibleQ = 0> {
5817 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5818 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5819 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5820 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5821 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5824 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5825 SchedWriteVecShiftImm>,
5826 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5827 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5829 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5830 SchedWriteVecShiftImm>,
5831 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5832 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5834 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5835 SchedWriteVecShiftImm, 1>,
5836 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5837 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5839 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5840 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5841 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5842 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5844 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5845 SchedWriteVecShift>;
5846 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5847 SchedWriteVecShift, 1>;
5848 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5849 SchedWriteVecShift>;
5851 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5852 let Predicates = [HasAVX512, NoVLX] in {
5853 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5854 (EXTRACT_SUBREG (v8i64
5856 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5857 VR128X:$src2)), sub_ymm)>;
5859 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5860 (EXTRACT_SUBREG (v8i64
5862 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5863 VR128X:$src2)), sub_xmm)>;
5865 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5866 (EXTRACT_SUBREG (v8i64
5868 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5869 imm:$src2)), sub_ymm)>;
5871 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5872 (EXTRACT_SUBREG (v8i64
5874 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5875 imm:$src2)), sub_xmm)>;
5878 //===-------------------------------------------------------------------===//
5879 // Variable Bit Shifts
5880 //===-------------------------------------------------------------------===//
5882 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5883 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5884 let ExeDomain = _.ExeDomain in {
5885 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5886 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5887 "$src2, $src1", "$src1, $src2",
5888 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5889 AVX5128IBase, EVEX_4V, Sched<[sched]>;
5890 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5891 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5892 "$src2, $src1", "$src1, $src2",
5893 (_.VT (OpNode _.RC:$src1,
5894 (_.VT (_.LdFrag addr:$src2))))>,
5895 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5896 Sched<[sched.Folded, sched.ReadAfterFold]>;
5900 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5901 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5902 let ExeDomain = _.ExeDomain in
5903 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5904 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5905 "${src2}"##_.BroadcastStr##", $src1",
5906 "$src1, ${src2}"##_.BroadcastStr,
5907 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5908 (_.ScalarLdFrag addr:$src2)))))>,
5909 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5910 Sched<[sched.Folded, sched.ReadAfterFold]>;
5913 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5914 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5915 let Predicates = [HasAVX512] in
5916 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5917 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5919 let Predicates = [HasAVX512, HasVLX] in {
5920 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5921 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5922 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5923 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5927 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5928 SDNode OpNode, X86SchedWriteWidths sched> {
5929 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5931 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5932 avx512vl_i64_info>, VEX_W;
5935 // Use 512bit version to implement 128/256 bit in case NoVLX.
5936 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5937 SDNode OpNode, list<Predicate> p> {
5938 let Predicates = p in {
5939 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5940 (_.info256.VT _.info256.RC:$src2))),
5942 (!cast<Instruction>(OpcodeStr#"Zrr")
5943 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5944 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5947 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5948 (_.info128.VT _.info128.RC:$src2))),
5950 (!cast<Instruction>(OpcodeStr#"Zrr")
5951 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5952 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5956 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
5957 SDNode OpNode, X86SchedWriteWidths sched> {
5958 let Predicates = [HasBWI] in
5959 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
5961 let Predicates = [HasVLX, HasBWI] in {
5963 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
5965 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
5970 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
5971 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
5973 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
5974 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
5976 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
5977 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
5979 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
5980 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
5982 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
5983 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
5984 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
5985 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
5988 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5989 let Predicates = [HasAVX512, NoVLX] in {
5990 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5991 (EXTRACT_SUBREG (v8i64
5993 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5994 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5996 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5997 (EXTRACT_SUBREG (v8i64
5999 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6000 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6003 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6004 (EXTRACT_SUBREG (v16i32
6006 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6007 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6009 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6010 (EXTRACT_SUBREG (v16i32
6012 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6013 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6016 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6017 (EXTRACT_SUBREG (v8i64
6019 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6020 imm:$src2)), sub_xmm)>;
6021 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6022 (EXTRACT_SUBREG (v8i64
6024 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6025 imm:$src2)), sub_ymm)>;
6027 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6028 (EXTRACT_SUBREG (v16i32
6030 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6031 imm:$src2)), sub_xmm)>;
6032 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6033 (EXTRACT_SUBREG (v16i32
6035 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6036 imm:$src2)), sub_ymm)>;
6039 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6040 let Predicates = [HasAVX512, NoVLX] in {
6041 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6042 (EXTRACT_SUBREG (v8i64
6044 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6045 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6047 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6048 (EXTRACT_SUBREG (v8i64
6050 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6051 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6054 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6055 (EXTRACT_SUBREG (v16i32
6057 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6058 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6060 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6061 (EXTRACT_SUBREG (v16i32
6063 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6064 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6067 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6068 (EXTRACT_SUBREG (v8i64
6070 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6071 imm:$src2)), sub_xmm)>;
6072 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6073 (EXTRACT_SUBREG (v8i64
6075 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6076 imm:$src2)), sub_ymm)>;
6078 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6079 (EXTRACT_SUBREG (v16i32
6081 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6082 imm:$src2)), sub_xmm)>;
6083 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6084 (EXTRACT_SUBREG (v16i32
6086 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6087 imm:$src2)), sub_ymm)>;
6090 //===-------------------------------------------------------------------===//
6091 // 1-src variable permutation VPERMW/D/Q
6092 //===-------------------------------------------------------------------===//
6094 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6095 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6096 let Predicates = [HasAVX512] in
6097 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6098 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6100 let Predicates = [HasAVX512, HasVLX] in
6101 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6102 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6105 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6106 string OpcodeStr, SDNode OpNode,
6107 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6108 let Predicates = [HasAVX512] in
6109 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6110 sched, VTInfo.info512>,
6111 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6112 sched, VTInfo.info512>, EVEX_V512;
6113 let Predicates = [HasAVX512, HasVLX] in
6114 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6115 sched, VTInfo.info256>,
6116 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6117 sched, VTInfo.info256>, EVEX_V256;
6120 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6121 Predicate prd, SDNode OpNode,
6122 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6123 let Predicates = [prd] in
6124 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6126 let Predicates = [HasVLX, prd] in {
6127 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6129 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6134 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6135 WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6136 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6137 WriteVarShuffle256, avx512vl_i8_info>;
6139 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6140 WriteVarShuffle256, avx512vl_i32_info>;
6141 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6142 WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6143 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6144 WriteFVarShuffle256, avx512vl_f32_info>;
6145 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6146 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6148 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6149 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6150 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6151 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6152 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6153 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6155 //===----------------------------------------------------------------------===//
6156 // AVX-512 - VPERMIL
6157 //===----------------------------------------------------------------------===//
6159 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6160 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6161 X86VectorVTInfo Ctrl> {
6162 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6163 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6164 "$src2, $src1", "$src1, $src2",
6165 (_.VT (OpNode _.RC:$src1,
6166 (Ctrl.VT Ctrl.RC:$src2)))>,
6167 T8PD, EVEX_4V, Sched<[sched]>;
6168 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6169 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6170 "$src2, $src1", "$src1, $src2",
6173 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6174 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6175 Sched<[sched.Folded, sched.ReadAfterFold]>;
6176 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6177 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6178 "${src2}"##_.BroadcastStr##", $src1",
6179 "$src1, ${src2}"##_.BroadcastStr,
6182 (Ctrl.VT (X86VBroadcast
6183 (Ctrl.ScalarLdFrag addr:$src2)))))>,
6184 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6185 Sched<[sched.Folded, sched.ReadAfterFold]>;
6188 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6189 X86SchedWriteWidths sched,
6190 AVX512VLVectorVTInfo _,
6191 AVX512VLVectorVTInfo Ctrl> {
6192 let Predicates = [HasAVX512] in {
6193 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6194 _.info512, Ctrl.info512>, EVEX_V512;
6196 let Predicates = [HasAVX512, HasVLX] in {
6197 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6198 _.info128, Ctrl.info128>, EVEX_V128;
6199 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6200 _.info256, Ctrl.info256>, EVEX_V256;
6204 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6205 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6206 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6208 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6209 X86VPermilpi, SchedWriteFShuffle, _>,
6210 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6213 let ExeDomain = SSEPackedSingle in
6214 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6216 let ExeDomain = SSEPackedDouble in
6217 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6218 avx512vl_i64_info>, VEX_W1X;
6220 //===----------------------------------------------------------------------===//
6221 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6222 //===----------------------------------------------------------------------===//
6224 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6225 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6226 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6227 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6228 X86PShufhw, SchedWriteShuffle>,
6229 EVEX, AVX512XSIi8Base;
6230 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6231 X86PShuflw, SchedWriteShuffle>,
6232 EVEX, AVX512XDIi8Base;
6234 //===----------------------------------------------------------------------===//
6235 // AVX-512 - VPSHUFB
6236 //===----------------------------------------------------------------------===//
6238 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6239 X86SchedWriteWidths sched> {
6240 let Predicates = [HasBWI] in
6241 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6244 let Predicates = [HasVLX, HasBWI] in {
6245 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6247 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6252 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6253 SchedWriteVarShuffle>, VEX_WIG;
6255 //===----------------------------------------------------------------------===//
6256 // Move Low to High and High to Low packed FP Instructions
6257 //===----------------------------------------------------------------------===//
6259 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6260 (ins VR128X:$src1, VR128X:$src2),
6261 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6262 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6263 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6264 let isCommutable = 1 in
6265 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6266 (ins VR128X:$src1, VR128X:$src2),
6267 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6268 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6269 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6271 //===----------------------------------------------------------------------===//
6272 // VMOVHPS/PD VMOVLPS Instructions
6273 // All patterns was taken from SSS implementation.
6274 //===----------------------------------------------------------------------===//
6276 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6277 SDPatternOperator OpNode,
6278 X86VectorVTInfo _> {
6279 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6280 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6281 (ins _.RC:$src1, f64mem:$src2),
6282 !strconcat(OpcodeStr,
6283 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6287 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6288 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6291 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6292 // SSE1. And MOVLPS pattern is even more complex.
6293 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6294 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6295 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6296 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6297 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6298 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6299 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6300 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6302 let Predicates = [HasAVX512] in {
6304 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6305 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6306 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6307 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6308 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6311 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6312 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6315 let SchedRW = [WriteFStore] in {
6316 let mayStore = 1, hasSideEffects = 0 in
6317 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6318 (ins f64mem:$dst, VR128X:$src),
6319 "vmovhps\t{$src, $dst|$dst, $src}",
6320 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6321 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6322 (ins f64mem:$dst, VR128X:$src),
6323 "vmovhpd\t{$src, $dst|$dst, $src}",
6324 [(store (f64 (extractelt
6325 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6326 (iPTR 0))), addr:$dst)]>,
6327 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6328 let mayStore = 1, hasSideEffects = 0 in
6329 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6330 (ins f64mem:$dst, VR128X:$src),
6331 "vmovlps\t{$src, $dst|$dst, $src}",
6332 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6333 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6334 (ins f64mem:$dst, VR128X:$src),
6335 "vmovlpd\t{$src, $dst|$dst, $src}",
6336 [(store (f64 (extractelt (v2f64 VR128X:$src),
6337 (iPTR 0))), addr:$dst)]>,
6338 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6341 let Predicates = [HasAVX512] in {
6343 def : Pat<(store (f64 (extractelt
6344 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6345 (iPTR 0))), addr:$dst),
6346 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6348 //===----------------------------------------------------------------------===//
6349 // FMA - Fused Multiply Operations
6352 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6353 X86FoldableSchedWrite sched,
6354 X86VectorVTInfo _, string Suff> {
6355 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6356 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6357 (ins _.RC:$src2, _.RC:$src3),
6358 OpcodeStr, "$src3, $src2", "$src2, $src3",
6359 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6360 AVX512FMA3Base, Sched<[sched]>;
6362 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6363 (ins _.RC:$src2, _.MemOp:$src3),
6364 OpcodeStr, "$src3, $src2", "$src2, $src3",
6365 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6366 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6368 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6369 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6370 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6371 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6373 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6374 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6378 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6379 X86FoldableSchedWrite sched,
6380 X86VectorVTInfo _, string Suff> {
6381 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6382 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6383 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6384 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6385 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6386 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6389 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6390 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6391 AVX512VLVectorVTInfo _, string Suff> {
6392 let Predicates = [HasAVX512] in {
6393 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6395 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6397 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6399 let Predicates = [HasVLX, HasAVX512] in {
6400 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6402 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6403 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6405 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6409 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6411 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6412 SchedWriteFMA, avx512vl_f32_info, "PS">;
6413 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6414 SchedWriteFMA, avx512vl_f64_info, "PD">,
6418 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6419 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6420 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6421 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6422 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6423 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6426 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6427 X86FoldableSchedWrite sched,
6428 X86VectorVTInfo _, string Suff> {
6429 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6430 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6431 (ins _.RC:$src2, _.RC:$src3),
6432 OpcodeStr, "$src3, $src2", "$src2, $src3",
6433 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6434 vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6436 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6437 (ins _.RC:$src2, _.MemOp:$src3),
6438 OpcodeStr, "$src3, $src2", "$src2, $src3",
6439 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6440 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6442 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6443 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6444 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6445 "$src2, ${src3}"##_.BroadcastStr,
6446 (_.VT (OpNode _.RC:$src2,
6447 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6448 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6449 Sched<[sched.Folded, sched.ReadAfterFold]>;
6453 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6454 X86FoldableSchedWrite sched,
6455 X86VectorVTInfo _, string Suff> {
6456 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6457 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6458 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6459 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6460 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6462 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6465 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6466 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6467 AVX512VLVectorVTInfo _, string Suff> {
6468 let Predicates = [HasAVX512] in {
6469 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6471 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6473 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6475 let Predicates = [HasVLX, HasAVX512] in {
6476 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6478 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6479 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6481 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6485 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6486 SDNode OpNodeRnd > {
6487 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6488 SchedWriteFMA, avx512vl_f32_info, "PS">;
6489 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6490 SchedWriteFMA, avx512vl_f64_info, "PD">,
6494 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6495 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6496 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6497 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6498 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6499 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6501 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6502 X86FoldableSchedWrite sched,
6503 X86VectorVTInfo _, string Suff> {
6504 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6505 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6506 (ins _.RC:$src2, _.RC:$src3),
6507 OpcodeStr, "$src3, $src2", "$src2, $src3",
6508 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6509 AVX512FMA3Base, Sched<[sched]>;
6511 // Pattern is 312 order so that the load is in a different place from the
6512 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6513 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6514 (ins _.RC:$src2, _.MemOp:$src3),
6515 OpcodeStr, "$src3, $src2", "$src2, $src3",
6516 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6517 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6519 // Pattern is 312 order so that the load is in a different place from the
6520 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6521 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6522 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6523 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6524 "$src2, ${src3}"##_.BroadcastStr,
6525 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6526 _.RC:$src1, _.RC:$src2)), 1, 0>,
6527 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6531 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6532 X86FoldableSchedWrite sched,
6533 X86VectorVTInfo _, string Suff> {
6534 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6535 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6536 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6537 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6538 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6540 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6543 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6544 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6545 AVX512VLVectorVTInfo _, string Suff> {
6546 let Predicates = [HasAVX512] in {
6547 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6549 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6551 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6553 let Predicates = [HasVLX, HasAVX512] in {
6554 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6556 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6557 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6559 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6563 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6564 SDNode OpNodeRnd > {
6565 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6566 SchedWriteFMA, avx512vl_f32_info, "PS">;
6567 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6568 SchedWriteFMA, avx512vl_f64_info, "PD">,
6572 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6573 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6574 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6575 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6576 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6577 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6580 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6581 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6582 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6583 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6584 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6585 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6586 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6589 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6590 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6591 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6592 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6594 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6595 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6596 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6597 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6599 let isCodeGenOnly = 1, isCommutable = 1 in {
6600 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6601 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6602 !strconcat(OpcodeStr,
6603 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6604 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6605 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6606 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6607 !strconcat(OpcodeStr,
6608 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6609 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6611 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6612 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6613 !strconcat(OpcodeStr,
6614 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6615 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6616 Sched<[SchedWriteFMA.Scl]>;
6617 }// isCodeGenOnly = 1
6618 }// Constraints = "$src1 = $dst"
6621 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6622 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6623 X86VectorVTInfo _, string SUFF> {
6624 let ExeDomain = _.ExeDomain in {
6625 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6626 // Operands for intrinsic are in 123 order to preserve passthu
6628 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6630 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6631 (_.ScalarLdFrag addr:$src3)))),
6632 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6633 _.FRC:$src3, (i32 timm:$rc)))), 0>;
6635 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6636 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6638 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6639 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6640 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6641 _.FRC:$src1, (i32 timm:$rc)))), 1>;
6643 // One pattern is 312 order so that the load is in a different place from the
6644 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6645 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6646 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6648 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6649 _.FRC:$src1, _.FRC:$src2))),
6650 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6651 _.FRC:$src2, (i32 timm:$rc)))), 1>;
6655 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6656 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6657 let Predicates = [HasAVX512] in {
6658 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6659 OpNodeRnd, f32x_info, "SS">,
6660 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6661 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6662 OpNodeRnd, f64x_info, "SD">,
6663 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6667 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6668 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6669 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6670 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6672 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6673 string Suffix, SDNode Move,
6674 X86VectorVTInfo _, PatLeaf ZeroFP> {
6675 let Predicates = [HasAVX512] in {
6676 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6678 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6680 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6681 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6682 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6684 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6685 (Op _.FRC:$src2, _.FRC:$src3,
6686 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6687 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6688 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6689 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6691 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6693 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6694 (_.ScalarLdFrag addr:$src3)))))),
6695 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6696 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6699 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6700 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6701 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6702 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6703 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6706 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6707 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6708 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6709 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6710 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6713 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6714 (X86selects VK1WM:$mask,
6716 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6718 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6719 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6720 VR128X:$src1, VK1WM:$mask,
6721 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6722 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6724 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6725 (X86selects VK1WM:$mask,
6727 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6728 (_.ScalarLdFrag addr:$src3)),
6729 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6730 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6731 VR128X:$src1, VK1WM:$mask,
6732 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6734 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6735 (X86selects VK1WM:$mask,
6736 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6737 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6738 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6739 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6740 VR128X:$src1, VK1WM:$mask,
6741 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6743 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6744 (X86selects VK1WM:$mask,
6745 (Op _.FRC:$src2, _.FRC:$src3,
6746 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6747 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6748 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6749 VR128X:$src1, VK1WM:$mask,
6750 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6751 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6753 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6754 (X86selects VK1WM:$mask,
6755 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6756 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6757 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6758 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6759 VR128X:$src1, VK1WM:$mask,
6760 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6762 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6763 (X86selects VK1WM:$mask,
6765 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6767 (_.EltVT ZeroFP)))))),
6768 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6769 VR128X:$src1, VK1WM:$mask,
6770 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6771 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6773 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6774 (X86selects VK1WM:$mask,
6775 (Op _.FRC:$src2, _.FRC:$src3,
6776 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6777 (_.EltVT ZeroFP)))))),
6778 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6779 VR128X:$src1, VK1WM:$mask,
6780 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6781 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6783 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6784 (X86selects VK1WM:$mask,
6786 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6787 (_.ScalarLdFrag addr:$src3)),
6788 (_.EltVT ZeroFP)))))),
6789 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6790 VR128X:$src1, VK1WM:$mask,
6791 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6793 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6794 (X86selects VK1WM:$mask,
6795 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6796 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6797 (_.EltVT ZeroFP)))))),
6798 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6799 VR128X:$src1, VK1WM:$mask,
6800 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6802 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6803 (X86selects VK1WM:$mask,
6804 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6805 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6806 (_.EltVT ZeroFP)))))),
6807 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6808 VR128X:$src1, VK1WM:$mask,
6809 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6811 // Patterns with rounding mode.
6812 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6814 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6815 _.FRC:$src3, (i32 timm:$rc)))))),
6816 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6817 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6818 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6820 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6821 (RndOp _.FRC:$src2, _.FRC:$src3,
6822 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6823 (i32 timm:$rc)))))),
6824 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6825 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6826 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6828 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6829 (X86selects VK1WM:$mask,
6831 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6832 _.FRC:$src3, (i32 timm:$rc)),
6833 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6834 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6835 VR128X:$src1, VK1WM:$mask,
6836 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6837 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6839 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6840 (X86selects VK1WM:$mask,
6841 (RndOp _.FRC:$src2, _.FRC:$src3,
6842 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6844 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6845 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6846 VR128X:$src1, VK1WM:$mask,
6847 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6848 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6850 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6851 (X86selects VK1WM:$mask,
6853 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6854 _.FRC:$src3, (i32 timm:$rc)),
6855 (_.EltVT ZeroFP)))))),
6856 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6857 VR128X:$src1, VK1WM:$mask,
6858 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6859 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6861 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6862 (X86selects VK1WM:$mask,
6863 (RndOp _.FRC:$src2, _.FRC:$src3,
6864 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6866 (_.EltVT ZeroFP)))))),
6867 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6868 VR128X:$src1, VK1WM:$mask,
6869 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6870 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6874 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
6875 X86Movss, v4f32x_info, fp32imm0>;
6876 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6877 X86Movss, v4f32x_info, fp32imm0>;
6878 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6879 X86Movss, v4f32x_info, fp32imm0>;
6880 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6881 X86Movss, v4f32x_info, fp32imm0>;
6883 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
6884 X86Movsd, v2f64x_info, fp64imm0>;
6885 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6886 X86Movsd, v2f64x_info, fp64imm0>;
6887 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6888 X86Movsd, v2f64x_info, fp64imm0>;
6889 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6890 X86Movsd, v2f64x_info, fp64imm0>;
6892 //===----------------------------------------------------------------------===//
6893 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6894 //===----------------------------------------------------------------------===//
6895 let Constraints = "$src1 = $dst" in {
6896 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6897 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6898 // NOTE: The SDNode have the multiply operands first with the add last.
6899 // This enables commuted load patterns to be autogenerated by tablegen.
6900 let ExeDomain = _.ExeDomain in {
6901 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6902 (ins _.RC:$src2, _.RC:$src3),
6903 OpcodeStr, "$src3, $src2", "$src2, $src3",
6904 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6905 AVX512FMA3Base, Sched<[sched]>;
6907 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6908 (ins _.RC:$src2, _.MemOp:$src3),
6909 OpcodeStr, "$src3, $src2", "$src2, $src3",
6910 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6911 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6913 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6914 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6915 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6916 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6918 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
6920 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6923 } // Constraints = "$src1 = $dst"
6925 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6926 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6927 let Predicates = [HasIFMA] in {
6928 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6929 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6931 let Predicates = [HasVLX, HasIFMA] in {
6932 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6933 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6934 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6935 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6939 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6940 SchedWriteVecIMul, avx512vl_i64_info>,
6942 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6943 SchedWriteVecIMul, avx512vl_i64_info>,
6946 //===----------------------------------------------------------------------===//
6947 // AVX-512 Scalar convert from sign integer to float/double
6948 //===----------------------------------------------------------------------===//
6950 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
6951 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6952 X86MemOperand x86memop, PatFrag ld_frag, string asm,
6954 let hasSideEffects = 0, isCodeGenOnly = 1 in {
6955 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6956 (ins DstVT.FRC:$src1, SrcRC:$src),
6957 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
6958 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
6960 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6961 (ins DstVT.FRC:$src1, x86memop:$src),
6962 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
6963 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6964 } // hasSideEffects = 0
6965 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6966 (ins DstVT.RC:$src1, SrcRC:$src2),
6967 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6968 [(set DstVT.RC:$dst,
6969 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
6970 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
6972 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6973 (ins DstVT.RC:$src1, x86memop:$src2),
6974 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6975 [(set DstVT.RC:$dst,
6976 (OpNode (DstVT.VT DstVT.RC:$src1),
6977 (ld_frag addr:$src2)))]>,
6978 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6979 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6980 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
6981 DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
6984 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
6985 X86FoldableSchedWrite sched, RegisterClass SrcRC,
6986 X86VectorVTInfo DstVT, string asm,
6988 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6989 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
6991 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
6992 [(set DstVT.RC:$dst,
6993 (OpNode (DstVT.VT DstVT.RC:$src1),
6996 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
6997 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
6998 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
6999 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7002 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7003 X86FoldableSchedWrite sched,
7004 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7005 X86MemOperand x86memop, PatFrag ld_frag,
7006 string asm, string mem> {
7007 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7008 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7009 ld_frag, asm, mem>, VEX_LIG;
7012 let Predicates = [HasAVX512] in {
7013 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7015 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7016 XS, EVEX_CD8<32, CD8VT1>;
7017 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7019 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7020 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7021 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7022 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
7023 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7024 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7026 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7027 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7029 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7030 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7031 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7032 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7034 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7035 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7036 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7037 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7038 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7039 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7040 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7041 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7043 def : Pat<(f32 (sint_to_fp GR32:$src)),
7044 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7045 def : Pat<(f32 (sint_to_fp GR64:$src)),
7046 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7047 def : Pat<(f64 (sint_to_fp GR32:$src)),
7048 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7049 def : Pat<(f64 (sint_to_fp GR64:$src)),
7050 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7052 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7054 v4f32x_info, i32mem, loadi32,
7055 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7056 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7058 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7059 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7060 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7061 i32mem, loadi32, "cvtusi2sd", "l">,
7062 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7063 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7065 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7066 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7068 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7069 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7070 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7071 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7073 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7074 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7075 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7076 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7077 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7078 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7079 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7080 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7082 def : Pat<(f32 (uint_to_fp GR32:$src)),
7083 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7084 def : Pat<(f32 (uint_to_fp GR64:$src)),
7085 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7086 def : Pat<(f64 (uint_to_fp GR32:$src)),
7087 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7088 def : Pat<(f64 (uint_to_fp GR64:$src)),
7089 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7092 //===----------------------------------------------------------------------===//
7093 // AVX-512 Scalar convert from float/double to integer
7094 //===----------------------------------------------------------------------===//
7096 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7097 X86VectorVTInfo DstVT, SDNode OpNode,
7099 X86FoldableSchedWrite sched, string asm,
7101 let Predicates = [HasAVX512] in {
7102 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7103 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7104 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7105 EVEX, VEX_LIG, Sched<[sched]>;
7106 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7107 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7108 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7109 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7111 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7112 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7113 [(set DstVT.RC:$dst, (OpNode
7114 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7115 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7116 } // Predicates = [HasAVX512]
7118 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7119 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7120 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7121 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7122 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7123 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7124 SrcVT.IntScalarMemOp:$src), 0, "att">;
7127 // Convert float/double to signed/unsigned int 32/64
7128 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7129 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7130 XS, EVEX_CD8<32, CD8VT1>;
7131 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7132 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7133 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7134 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7135 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7136 XS, EVEX_CD8<32, CD8VT1>;
7137 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7138 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7139 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7140 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7141 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7142 XD, EVEX_CD8<64, CD8VT1>;
7143 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7144 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7145 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7146 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7147 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7148 XD, EVEX_CD8<64, CD8VT1>;
7149 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7150 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7151 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7153 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7154 // which produce unnecessary vmovs{s,d} instructions
7155 let Predicates = [HasAVX512] in {
7156 def : Pat<(v4f32 (X86Movss
7157 (v4f32 VR128X:$dst),
7158 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7159 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7161 def : Pat<(v4f32 (X86Movss
7162 (v4f32 VR128X:$dst),
7163 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7164 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7166 def : Pat<(v4f32 (X86Movss
7167 (v4f32 VR128X:$dst),
7168 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7169 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7171 def : Pat<(v4f32 (X86Movss
7172 (v4f32 VR128X:$dst),
7173 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7174 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7176 def : Pat<(v2f64 (X86Movsd
7177 (v2f64 VR128X:$dst),
7178 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7179 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7181 def : Pat<(v2f64 (X86Movsd
7182 (v2f64 VR128X:$dst),
7183 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7184 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7186 def : Pat<(v2f64 (X86Movsd
7187 (v2f64 VR128X:$dst),
7188 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7189 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7191 def : Pat<(v2f64 (X86Movsd
7192 (v2f64 VR128X:$dst),
7193 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7194 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7196 def : Pat<(v4f32 (X86Movss
7197 (v4f32 VR128X:$dst),
7198 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7199 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7201 def : Pat<(v4f32 (X86Movss
7202 (v4f32 VR128X:$dst),
7203 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7204 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7206 def : Pat<(v4f32 (X86Movss
7207 (v4f32 VR128X:$dst),
7208 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7209 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7211 def : Pat<(v4f32 (X86Movss
7212 (v4f32 VR128X:$dst),
7213 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7214 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7216 def : Pat<(v2f64 (X86Movsd
7217 (v2f64 VR128X:$dst),
7218 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7219 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7221 def : Pat<(v2f64 (X86Movsd
7222 (v2f64 VR128X:$dst),
7223 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7224 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7226 def : Pat<(v2f64 (X86Movsd
7227 (v2f64 VR128X:$dst),
7228 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7229 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7231 def : Pat<(v2f64 (X86Movsd
7232 (v2f64 VR128X:$dst),
7233 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7234 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7235 } // Predicates = [HasAVX512]
7237 // Convert float/double to signed/unsigned int 32/64 with truncation
7238 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7239 X86VectorVTInfo _DstRC, SDNode OpNode,
7240 SDNode OpNodeInt, SDNode OpNodeSAE,
7241 X86FoldableSchedWrite sched, string aliasStr>{
7242 let Predicates = [HasAVX512] in {
7243 let isCodeGenOnly = 1 in {
7244 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7245 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7246 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7247 EVEX, VEX_LIG, Sched<[sched]>;
7248 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7249 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7250 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7251 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7254 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7255 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7256 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7257 EVEX, VEX_LIG, Sched<[sched]>;
7258 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7259 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7260 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7261 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7262 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7263 (ins _SrcRC.IntScalarMemOp:$src),
7264 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7265 [(set _DstRC.RC:$dst,
7266 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7267 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7270 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7271 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7272 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7273 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7274 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7275 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7276 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7279 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7280 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7281 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7282 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7283 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7284 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7285 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7286 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7287 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7288 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7289 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7290 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7292 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7293 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7294 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7295 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7296 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7297 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7298 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7299 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7300 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7301 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7302 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7303 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7305 //===----------------------------------------------------------------------===//
7306 // AVX-512 Convert form float to double and back
7307 //===----------------------------------------------------------------------===//
7309 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7310 X86VectorVTInfo _Src, SDNode OpNode,
7311 X86FoldableSchedWrite sched> {
7312 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7313 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7314 "$src2, $src1", "$src1, $src2",
7315 (_.VT (OpNode (_.VT _.RC:$src1),
7316 (_Src.VT _Src.RC:$src2)))>,
7317 EVEX_4V, VEX_LIG, Sched<[sched]>;
7318 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7319 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7320 "$src2, $src1", "$src1, $src2",
7321 (_.VT (OpNode (_.VT _.RC:$src1),
7322 (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
7324 Sched<[sched.Folded, sched.ReadAfterFold]>;
7326 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7327 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7328 (ins _.FRC:$src1, _Src.FRC:$src2),
7329 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7330 EVEX_4V, VEX_LIG, Sched<[sched]>;
7332 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7333 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7334 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7335 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7339 // Scalar Coversion with SAE - suppress all exceptions
7340 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7341 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7342 X86FoldableSchedWrite sched> {
7343 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7344 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7345 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7346 (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7347 (_Src.VT _Src.RC:$src2)))>,
7348 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7351 // Scalar Conversion with rounding control (RC)
7352 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7353 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7354 X86FoldableSchedWrite sched> {
7355 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7356 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7357 "$rc, $src2, $src1", "$src1, $src2, $rc",
7358 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7359 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7360 EVEX_4V, VEX_LIG, Sched<[sched]>,
7363 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7364 SDNode OpNode, SDNode OpNodeRnd,
7365 X86FoldableSchedWrite sched,
7366 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7367 let Predicates = [HasAVX512] in {
7368 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7369 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7370 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7374 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7375 SDNode OpNode, SDNode OpNodeSAE,
7376 X86FoldableSchedWrite sched,
7377 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7378 let Predicates = [HasAVX512] in {
7379 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7380 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7381 EVEX_CD8<32, CD8VT1>, XS;
7384 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7385 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7387 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7388 X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7391 def : Pat<(f64 (fpextend FR32X:$src)),
7392 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7393 Requires<[HasAVX512]>;
7394 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7395 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7396 Requires<[HasAVX512, OptForSize]>;
7398 def : Pat<(f32 (fpround FR64X:$src)),
7399 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7400 Requires<[HasAVX512]>;
7402 def : Pat<(v4f32 (X86Movss
7403 (v4f32 VR128X:$dst),
7404 (v4f32 (scalar_to_vector
7405 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7406 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7407 Requires<[HasAVX512]>;
7409 def : Pat<(v2f64 (X86Movsd
7410 (v2f64 VR128X:$dst),
7411 (v2f64 (scalar_to_vector
7412 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7413 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7414 Requires<[HasAVX512]>;
7416 //===----------------------------------------------------------------------===//
7417 // AVX-512 Vector convert from signed/unsigned integer to float/double
7418 // and from float/double to signed/unsigned integer
7419 //===----------------------------------------------------------------------===//
7421 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7422 X86VectorVTInfo _Src, SDNode OpNode,
7423 X86FoldableSchedWrite sched,
7424 string Broadcast = _.BroadcastStr,
7425 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7426 RegisterClass MaskRC = _.KRCWM,
7427 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7429 defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
7431 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7432 (ins MaskRC:$mask, _Src.RC:$src),
7433 OpcodeStr, "$src", "$src",
7434 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7435 (vselect MaskRC:$mask,
7436 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7438 vselect, "$src0 = $dst">,
7439 EVEX, Sched<[sched]>;
7441 defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7443 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7444 (ins MaskRC:$mask, MemOp:$src),
7445 OpcodeStr#Alias, "$src", "$src",
7447 (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
7448 vselect, "$src0 = $dst">,
7449 EVEX, Sched<[sched.Folded]>;
7451 defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7452 (ins _Src.ScalarMemOp:$src),
7453 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7454 (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7456 "${src}"##Broadcast, "${src}"##Broadcast,
7457 (_.VT (OpNode (_Src.VT
7458 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
7460 (vselect MaskRC:$mask,
7465 (_Src.ScalarLdFrag addr:$src))))),
7467 vselect, "$src0 = $dst">,
7468 EVEX, EVEX_B, Sched<[sched.Folded]>;
7470 // Coversion with SAE - suppress all exceptions
7471 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7472 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7473 X86FoldableSchedWrite sched> {
7474 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7475 (ins _Src.RC:$src), OpcodeStr,
7476 "{sae}, $src", "$src, {sae}",
7477 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7478 EVEX, EVEX_B, Sched<[sched]>;
7481 // Conversion with rounding control (RC)
7482 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7483 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7484 X86FoldableSchedWrite sched> {
7485 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7486 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7487 "$rc, $src", "$src, $rc",
7488 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7489 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7492 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7493 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7494 X86VectorVTInfo _Src, SDNode OpNode,
7495 X86FoldableSchedWrite sched,
7496 string Broadcast = _.BroadcastStr,
7497 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7498 RegisterClass MaskRC = _.KRCWM>
7499 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
7501 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7503 // Extend Float to Double
7504 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7505 X86SchedWriteWidths sched> {
7506 let Predicates = [HasAVX512] in {
7507 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7508 fpextend, sched.ZMM>,
7509 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7510 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7512 let Predicates = [HasVLX] in {
7513 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7514 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7515 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7516 sched.YMM>, EVEX_V256;
7520 // Truncate Double to Float
7521 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7522 let Predicates = [HasAVX512] in {
7523 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
7524 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7525 X86vfproundRnd, sched.ZMM>, EVEX_V512;
7527 let Predicates = [HasVLX] in {
7528 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7529 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
7531 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
7532 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7535 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7536 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7537 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7538 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7539 VK2WM:$mask, VR128X:$src), 0, "att">;
7540 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
7541 "$dst {${mask}} {z}, $src}",
7542 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7543 VK2WM:$mask, VR128X:$src), 0, "att">;
7544 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7545 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7546 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7547 "$dst {${mask}}, ${src}{1to2}}",
7548 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7549 VK2WM:$mask, f64mem:$src), 0, "att">;
7550 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7551 "$dst {${mask}} {z}, ${src}{1to2}}",
7552 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7553 VK2WM:$mask, f64mem:$src), 0, "att">;
7555 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7556 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7557 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7558 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7559 VK4WM:$mask, VR256X:$src), 0, "att">;
7560 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
7561 "$dst {${mask}} {z}, $src}",
7562 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7563 VK4WM:$mask, VR256X:$src), 0, "att">;
7564 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7565 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7566 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7567 "$dst {${mask}}, ${src}{1to4}}",
7568 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7569 VK4WM:$mask, f64mem:$src), 0, "att">;
7570 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7571 "$dst {${mask}} {z}, ${src}{1to4}}",
7572 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7573 VK4WM:$mask, f64mem:$src), 0, "att">;
7576 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7577 VEX_W, PD, EVEX_CD8<64, CD8VF>;
7578 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7579 PS, EVEX_CD8<32, CD8VH>;
7581 let Predicates = [HasAVX512] in {
7582 def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
7583 (VCVTPD2PSZrr VR512:$src)>;
7584 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7586 (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
7587 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7588 v8f32x_info.ImmAllZerosV),
7589 (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
7591 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7592 (VCVTPD2PSZrm addr:$src)>;
7593 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7595 (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7596 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7597 v8f32x_info.ImmAllZerosV),
7598 (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
7600 def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))),
7601 (VCVTPD2PSZrmb addr:$src)>;
7602 def : Pat<(vselect VK8WM:$mask,
7603 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
7604 (v8f32 VR256X:$src0)),
7605 (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7606 def : Pat<(vselect VK8WM:$mask,
7607 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
7608 v8f32x_info.ImmAllZerosV),
7609 (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
7612 let Predicates = [HasVLX] in {
7613 def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
7614 (VCVTPD2PSZ256rr VR256X:$src)>;
7615 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7617 (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
7618 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7619 v4f32x_info.ImmAllZerosV),
7620 (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
7622 def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
7623 (VCVTPD2PSZ256rm addr:$src)>;
7624 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7626 (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7627 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7628 v4f32x_info.ImmAllZerosV),
7629 (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
7631 def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7632 (VCVTPD2PSZ256rmb addr:$src)>;
7633 def : Pat<(vselect VK4WM:$mask,
7634 (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7636 (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7637 def : Pat<(vselect VK4WM:$mask,
7638 (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7639 v4f32x_info.ImmAllZerosV),
7640 (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
7642 // Special patterns to allow use of X86vmfpround for masking. Instruction
7643 // patterns have been disabled with null_frag.
7644 def : Pat<(X86vfpround (v2f64 VR128X:$src)),
7645 (VCVTPD2PSZ128rr VR128X:$src)>;
7646 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7648 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7649 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7651 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7653 def : Pat<(X86vfpround (loadv2f64 addr:$src)),
7654 (VCVTPD2PSZ128rm addr:$src)>;
7655 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7657 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7658 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7660 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7662 def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))),
7663 (VCVTPD2PSZ128rmb addr:$src)>;
7664 def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
7665 (v4f32 VR128X:$src0), VK2WM:$mask),
7666 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7667 def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
7668 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7669 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7672 // Convert Signed/Unsigned Doubleword to Double
7673 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7674 SDNode OpNode128, X86SchedWriteWidths sched> {
7675 // No rounding in this op
7676 let Predicates = [HasAVX512] in
7677 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7678 sched.ZMM>, EVEX_V512;
7680 let Predicates = [HasVLX] in {
7681 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7682 OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
7683 (v2f64 (OpNode128 (bc_v4i32
7685 (scalar_to_vector (loadi64 addr:$src))))))>,
7687 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7688 sched.YMM>, EVEX_V256;
7692 // Convert Signed/Unsigned Doubleword to Float
7693 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7694 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7695 let Predicates = [HasAVX512] in
7696 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7698 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7699 OpNodeRnd, sched.ZMM>, EVEX_V512;
7701 let Predicates = [HasVLX] in {
7702 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7703 sched.XMM>, EVEX_V128;
7704 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7705 sched.YMM>, EVEX_V256;
7709 // Convert Float to Signed/Unsigned Doubleword with truncation
7710 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7711 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7712 let Predicates = [HasAVX512] in {
7713 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7715 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7716 OpNodeSAE, sched.ZMM>, EVEX_V512;
7718 let Predicates = [HasVLX] in {
7719 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7720 sched.XMM>, EVEX_V128;
7721 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7722 sched.YMM>, EVEX_V256;
7726 // Convert Float to Signed/Unsigned Doubleword
7727 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7728 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7729 let Predicates = [HasAVX512] in {
7730 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7732 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7733 OpNodeRnd, sched.ZMM>, EVEX_V512;
7735 let Predicates = [HasVLX] in {
7736 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7737 sched.XMM>, EVEX_V128;
7738 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7739 sched.YMM>, EVEX_V256;
7743 // Convert Double to Signed/Unsigned Doubleword with truncation
7744 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7745 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7746 let Predicates = [HasAVX512] in {
7747 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7749 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7750 OpNodeSAE, sched.ZMM>, EVEX_V512;
7752 let Predicates = [HasVLX] in {
7753 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7754 // memory forms of these instructions in Asm Parser. They have the same
7755 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7756 // due to the same reason.
7757 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7758 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7760 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7761 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7764 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7765 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7766 VR128X:$src), 0, "att">;
7767 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7768 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7769 VK2WM:$mask, VR128X:$src), 0, "att">;
7770 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7771 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7772 VK2WM:$mask, VR128X:$src), 0, "att">;
7773 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7774 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7775 f64mem:$src), 0, "att">;
7776 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7777 "$dst {${mask}}, ${src}{1to2}}",
7778 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7779 VK2WM:$mask, f64mem:$src), 0, "att">;
7780 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7781 "$dst {${mask}} {z}, ${src}{1to2}}",
7782 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7783 VK2WM:$mask, f64mem:$src), 0, "att">;
7785 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7786 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7787 VR256X:$src), 0, "att">;
7788 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7789 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7790 VK4WM:$mask, VR256X:$src), 0, "att">;
7791 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7792 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7793 VK4WM:$mask, VR256X:$src), 0, "att">;
7794 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7795 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7796 f64mem:$src), 0, "att">;
7797 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7798 "$dst {${mask}}, ${src}{1to4}}",
7799 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7800 VK4WM:$mask, f64mem:$src), 0, "att">;
7801 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7802 "$dst {${mask}} {z}, ${src}{1to4}}",
7803 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7804 VK4WM:$mask, f64mem:$src), 0, "att">;
7807 // Convert Double to Signed/Unsigned Doubleword
7808 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7809 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7810 let Predicates = [HasAVX512] in {
7811 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7813 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7814 OpNodeRnd, sched.ZMM>, EVEX_V512;
7816 let Predicates = [HasVLX] in {
7817 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7818 // memory forms of these instructions in Asm Parcer. They have the same
7819 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7820 // due to the same reason.
7821 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7822 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7824 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7825 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7828 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7829 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7830 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7831 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7832 VK2WM:$mask, VR128X:$src), 0, "att">;
7833 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7834 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7835 VK2WM:$mask, VR128X:$src), 0, "att">;
7836 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7837 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7838 f64mem:$src), 0, "att">;
7839 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7840 "$dst {${mask}}, ${src}{1to2}}",
7841 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7842 VK2WM:$mask, f64mem:$src), 0, "att">;
7843 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7844 "$dst {${mask}} {z}, ${src}{1to2}}",
7845 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7846 VK2WM:$mask, f64mem:$src), 0, "att">;
7848 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7849 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7850 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7851 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7852 VK4WM:$mask, VR256X:$src), 0, "att">;
7853 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7854 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7855 VK4WM:$mask, VR256X:$src), 0, "att">;
7856 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7857 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7858 f64mem:$src), 0, "att">;
7859 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7860 "$dst {${mask}}, ${src}{1to4}}",
7861 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7862 VK4WM:$mask, f64mem:$src), 0, "att">;
7863 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7864 "$dst {${mask}} {z}, ${src}{1to4}}",
7865 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7866 VK4WM:$mask, f64mem:$src), 0, "att">;
7869 // Convert Double to Signed/Unsigned Quardword
7870 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7871 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7872 let Predicates = [HasDQI] in {
7873 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7875 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7876 OpNodeRnd, sched.ZMM>, EVEX_V512;
7878 let Predicates = [HasDQI, HasVLX] in {
7879 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7880 sched.XMM>, EVEX_V128;
7881 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7882 sched.YMM>, EVEX_V256;
7886 // Convert Double to Signed/Unsigned Quardword with truncation
7887 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7888 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7889 let Predicates = [HasDQI] in {
7890 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7892 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7893 OpNodeRnd, sched.ZMM>, EVEX_V512;
7895 let Predicates = [HasDQI, HasVLX] in {
7896 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7897 sched.XMM>, EVEX_V128;
7898 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7899 sched.YMM>, EVEX_V256;
7903 // Convert Signed/Unsigned Quardword to Double
7904 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7905 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7906 let Predicates = [HasDQI] in {
7907 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7909 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7910 OpNodeRnd, sched.ZMM>, EVEX_V512;
7912 let Predicates = [HasDQI, HasVLX] in {
7913 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7914 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7915 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7916 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7920 // Convert Float to Signed/Unsigned Quardword
7921 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7922 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7923 let Predicates = [HasDQI] in {
7924 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7926 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7927 OpNodeRnd, sched.ZMM>, EVEX_V512;
7929 let Predicates = [HasDQI, HasVLX] in {
7930 // Explicitly specified broadcast string, since we take only 2 elements
7931 // from v4f32x_info source
7932 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7933 sched.XMM, "{1to2}", "", f64mem, VK2WM,
7934 (v2i64 (OpNode (bc_v4f32
7936 (scalar_to_vector (loadf64 addr:$src))))))>,
7938 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7939 sched.YMM>, EVEX_V256;
7943 // Convert Float to Signed/Unsigned Quardword with truncation
7944 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7945 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7946 let Predicates = [HasDQI] in {
7947 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7948 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7949 OpNodeRnd, sched.ZMM>, EVEX_V512;
7951 let Predicates = [HasDQI, HasVLX] in {
7952 // Explicitly specified broadcast string, since we take only 2 elements
7953 // from v4f32x_info source
7954 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7955 sched.XMM, "{1to2}", "", f64mem, VK2WM,
7956 (v2i64 (OpNode (bc_v4f32
7958 (scalar_to_vector (loadf64 addr:$src))))))>,
7960 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7961 sched.YMM>, EVEX_V256;
7965 // Convert Signed/Unsigned Quardword to Float
7966 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7967 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7968 let Predicates = [HasDQI] in {
7969 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7971 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7972 OpNodeRnd, sched.ZMM>, EVEX_V512;
7974 let Predicates = [HasDQI, HasVLX] in {
7975 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7976 // memory forms of these instructions in Asm Parcer. They have the same
7977 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7978 // due to the same reason.
7979 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
7980 sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
7981 EVEX_V128, NotEVEX2VEXConvertible;
7982 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7983 sched.YMM, "{1to4}", "{y}">, EVEX_V256,
7984 NotEVEX2VEXConvertible;
7987 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7988 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7989 VR128X:$src), 0, "att">;
7990 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7991 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7992 VK2WM:$mask, VR128X:$src), 0, "att">;
7993 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7994 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7995 VK2WM:$mask, VR128X:$src), 0, "att">;
7996 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7997 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7998 i64mem:$src), 0, "att">;
7999 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
8000 "$dst {${mask}}, ${src}{1to2}}",
8001 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8002 VK2WM:$mask, i64mem:$src), 0, "att">;
8003 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8004 "$dst {${mask}} {z}, ${src}{1to2}}",
8005 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8006 VK2WM:$mask, i64mem:$src), 0, "att">;
8008 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8009 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8010 VR256X:$src), 0, "att">;
8011 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
8012 "$dst {${mask}}, $src}",
8013 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8014 VK4WM:$mask, VR256X:$src), 0, "att">;
8015 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
8016 "$dst {${mask}} {z}, $src}",
8017 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8018 VK4WM:$mask, VR256X:$src), 0, "att">;
8019 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8020 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8021 i64mem:$src), 0, "att">;
8022 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
8023 "$dst {${mask}}, ${src}{1to4}}",
8024 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8025 VK4WM:$mask, i64mem:$src), 0, "att">;
8026 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8027 "$dst {${mask}} {z}, ${src}{1to4}}",
8028 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8029 VK4WM:$mask, i64mem:$src), 0, "att">;
8032 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
8033 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8035 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
8036 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8037 PS, EVEX_CD8<32, CD8VF>;
8039 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
8040 X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
8041 XS, EVEX_CD8<32, CD8VF>;
8043 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
8044 X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
8045 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8047 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8048 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
8049 EVEX_CD8<32, CD8VF>;
8051 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8052 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
8053 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8055 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8056 X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8057 EVEX_CD8<32, CD8VH>;
8059 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8060 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8061 EVEX_CD8<32, CD8VF>;
8063 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8064 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8065 EVEX_CD8<32, CD8VF>;
8067 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8068 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8069 VEX_W, EVEX_CD8<64, CD8VF>;
8071 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8072 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8073 PS, EVEX_CD8<32, CD8VF>;
8075 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8076 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8077 PS, EVEX_CD8<64, CD8VF>;
8079 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8080 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8081 PD, EVEX_CD8<64, CD8VF>;
8083 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8084 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8085 EVEX_CD8<32, CD8VH>;
8087 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8088 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8089 PD, EVEX_CD8<64, CD8VF>;
8091 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8092 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8093 EVEX_CD8<32, CD8VH>;
8095 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8096 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
8097 PD, EVEX_CD8<64, CD8VF>;
8099 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8100 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
8101 EVEX_CD8<32, CD8VH>;
8103 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8104 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
8105 PD, EVEX_CD8<64, CD8VF>;
8107 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8108 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
8109 EVEX_CD8<32, CD8VH>;
8111 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8112 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8113 EVEX_CD8<64, CD8VF>;
8115 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8116 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8117 EVEX_CD8<64, CD8VF>;
8119 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
8120 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8121 EVEX_CD8<64, CD8VF>;
8123 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
8124 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8125 EVEX_CD8<64, CD8VF>;
8127 let Predicates = [HasVLX] in {
8128 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8129 // patterns have been disabled with null_frag.
8130 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8131 (VCVTPD2DQZ128rr VR128X:$src)>;
8132 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8134 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8135 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8137 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8139 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8140 (VCVTPD2DQZ128rm addr:$src)>;
8141 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8143 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8144 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8146 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8148 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8149 (VCVTPD2DQZ128rmb addr:$src)>;
8150 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8151 (v4i32 VR128X:$src0), VK2WM:$mask),
8152 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8153 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8154 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8155 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8157 // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8158 // patterns have been disabled with null_frag.
8159 def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
8160 (VCVTTPD2DQZ128rr VR128X:$src)>;
8161 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8163 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8164 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8166 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8168 def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
8169 (VCVTTPD2DQZ128rm addr:$src)>;
8170 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8172 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8173 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8175 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8177 def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8178 (VCVTTPD2DQZ128rmb addr:$src)>;
8179 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8180 (v4i32 VR128X:$src0), VK2WM:$mask),
8181 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8182 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8183 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8184 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8186 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8187 // patterns have been disabled with null_frag.
8188 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8189 (VCVTPD2UDQZ128rr VR128X:$src)>;
8190 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8192 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8193 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8195 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8197 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8198 (VCVTPD2UDQZ128rm addr:$src)>;
8199 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8201 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8202 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8204 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8206 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8207 (VCVTPD2UDQZ128rmb addr:$src)>;
8208 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8209 (v4i32 VR128X:$src0), VK2WM:$mask),
8210 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8211 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8212 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8213 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8215 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8216 // patterns have been disabled with null_frag.
8217 def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
8218 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8219 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8221 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8222 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8224 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8226 def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
8227 (VCVTTPD2UDQZ128rm addr:$src)>;
8228 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8230 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8231 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8233 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8235 def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8236 (VCVTTPD2UDQZ128rmb addr:$src)>;
8237 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8238 (v4i32 VR128X:$src0), VK2WM:$mask),
8239 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8240 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8241 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8242 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8245 let Predicates = [HasDQI, HasVLX] in {
8246 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8247 (VCVTPS2QQZ128rm addr:$src)>;
8248 def : Pat<(v2i64 (vselect VK2WM:$mask,
8249 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8251 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8252 def : Pat<(v2i64 (vselect VK2WM:$mask,
8253 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8254 v2i64x_info.ImmAllZerosV)),
8255 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8257 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8258 (VCVTPS2UQQZ128rm addr:$src)>;
8259 def : Pat<(v2i64 (vselect VK2WM:$mask,
8260 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8262 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8263 def : Pat<(v2i64 (vselect VK2WM:$mask,
8264 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8265 v2i64x_info.ImmAllZerosV)),
8266 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8268 def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8269 (VCVTTPS2QQZ128rm addr:$src)>;
8270 def : Pat<(v2i64 (vselect VK2WM:$mask,
8271 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8273 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8274 def : Pat<(v2i64 (vselect VK2WM:$mask,
8275 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8276 v2i64x_info.ImmAllZerosV)),
8277 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8279 def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8280 (VCVTTPS2UQQZ128rm addr:$src)>;
8281 def : Pat<(v2i64 (vselect VK2WM:$mask,
8282 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8284 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8285 def : Pat<(v2i64 (vselect VK2WM:$mask,
8286 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8287 v2i64x_info.ImmAllZerosV)),
8288 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8291 let Predicates = [HasAVX512, NoVLX] in {
8292 def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))),
8293 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8294 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8295 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8297 def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))),
8298 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8299 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8300 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8302 def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
8303 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8304 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8305 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8307 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8308 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8309 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8310 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8312 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8313 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8314 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8315 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8317 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8318 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8319 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8320 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8322 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8323 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8324 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8325 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8328 let Predicates = [HasVLX] in {
8329 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8330 (VCVTDQ2PDZ128rm addr:$src)>;
8331 def : Pat<(v2f64 (vselect VK2WM:$mask,
8332 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8334 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8335 def : Pat<(v2f64 (vselect VK2WM:$mask,
8336 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8337 v2f64x_info.ImmAllZerosV)),
8338 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8340 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8341 (VCVTUDQ2PDZ128rm addr:$src)>;
8342 def : Pat<(v2f64 (vselect VK2WM:$mask,
8343 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8345 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8346 def : Pat<(v2f64 (vselect VK2WM:$mask,
8347 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8348 v2f64x_info.ImmAllZerosV)),
8349 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8352 let Predicates = [HasDQI, HasVLX] in {
8353 // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8354 // patterns have been disabled with null_frag.
8355 def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
8356 (VCVTQQ2PSZ128rr VR128X:$src)>;
8357 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8359 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8360 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8362 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8364 def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))),
8365 (VCVTQQ2PSZ128rm addr:$src)>;
8366 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8368 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8369 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8371 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8373 def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
8374 (VCVTQQ2PSZ128rmb addr:$src)>;
8375 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8376 (v4f32 VR128X:$src0), VK2WM:$mask),
8377 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8378 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8379 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8380 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8382 // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8383 // patterns have been disabled with null_frag.
8384 def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))),
8385 (VCVTUQQ2PSZ128rr VR128X:$src)>;
8386 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8388 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8389 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8391 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8393 def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))),
8394 (VCVTUQQ2PSZ128rm addr:$src)>;
8395 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8397 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8398 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8400 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8402 def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
8403 (VCVTUQQ2PSZ128rmb addr:$src)>;
8404 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8405 (v4f32 VR128X:$src0), VK2WM:$mask),
8406 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8407 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8408 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8409 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8412 let Predicates = [HasDQI, NoVLX] in {
8413 def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))),
8414 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8415 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8416 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8418 def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))),
8419 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8420 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8421 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8423 def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))),
8424 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8425 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8426 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8428 def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))),
8429 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8430 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8431 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8433 def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))),
8434 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8435 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8436 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8438 def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
8439 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8440 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8441 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8443 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8444 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8445 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8446 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8448 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8449 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8450 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8451 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8453 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8454 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8455 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8456 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8458 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8459 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8460 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8461 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8463 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8464 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8465 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8466 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8468 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8469 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8470 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8471 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8474 //===----------------------------------------------------------------------===//
8475 // Half precision conversion instructions
8476 //===----------------------------------------------------------------------===//
8478 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8479 X86MemOperand x86memop, PatFrag ld_frag,
8480 X86FoldableSchedWrite sched> {
8481 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8482 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8483 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8484 T8PD, Sched<[sched]>;
8485 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8486 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8487 (X86cvtph2ps (_src.VT
8488 (ld_frag addr:$src)))>,
8489 T8PD, Sched<[sched.Folded]>;
8492 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8493 X86FoldableSchedWrite sched> {
8494 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8495 (ins _src.RC:$src), "vcvtph2ps",
8496 "{sae}, $src", "$src, {sae}",
8497 (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8498 T8PD, EVEX_B, Sched<[sched]>;
8501 let Predicates = [HasAVX512] in
8502 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8504 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8505 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8507 let Predicates = [HasVLX] in {
8508 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8509 load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8510 EVEX_CD8<32, CD8VH>;
8511 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8512 load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8513 EVEX_CD8<32, CD8VH>;
8515 // Pattern match vcvtph2ps of a scalar i64 load.
8516 def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
8517 (VCVTPH2PSZ128rm addr:$src)>;
8518 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8519 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8520 (VCVTPH2PSZ128rm addr:$src)>;
8523 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8524 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8525 let ExeDomain = GenericDomain in {
8526 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8527 (ins _src.RC:$src1, i32u8imm:$src2),
8528 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8529 [(set _dest.RC:$dst,
8530 (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
8532 let Constraints = "$src0 = $dst" in
8533 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8534 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8535 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8536 [(set _dest.RC:$dst,
8537 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8538 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8539 Sched<[RR]>, EVEX_K;
8540 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8541 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8542 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8543 [(set _dest.RC:$dst,
8544 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8545 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8546 Sched<[RR]>, EVEX_KZ;
8547 let hasSideEffects = 0, mayStore = 1 in {
8548 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8549 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8550 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8552 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8553 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8554 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8555 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8560 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8562 let hasSideEffects = 0 in
8563 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8564 (outs _dest.RC:$dst),
8565 (ins _src.RC:$src1, i32u8imm:$src2),
8566 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8567 EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8570 let Predicates = [HasAVX512] in {
8571 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8572 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8573 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8574 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8575 let Predicates = [HasVLX] in {
8576 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8577 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8578 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8579 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8580 WriteCvtPS2PH, WriteCvtPS2PHSt>,
8581 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8584 def : Pat<(store (f64 (extractelt
8585 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8586 (iPTR 0))), addr:$dst),
8587 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8588 def : Pat<(store (i64 (extractelt
8589 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8590 (iPTR 0))), addr:$dst),
8591 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8592 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
8593 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
8594 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
8595 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
8598 // Patterns for matching conversions from float to half-float and vice versa.
8599 let Predicates = [HasVLX] in {
8600 // Use MXCSR.RC for rounding instead of explicitly specifying the default
8601 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8602 // configurations we support (the default). However, falling back to MXCSR is
8603 // more consistent with other instructions, which are always controlled by it.
8604 // It's encoded as 0b100.
8605 def : Pat<(fp_to_f16 FR32X:$src),
8606 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8607 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8609 def : Pat<(f16_to_fp GR16:$src),
8610 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8611 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8613 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8614 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8615 (v8i16 (VCVTPS2PHZ128rr
8616 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8619 // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8620 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8621 string OpcodeStr, X86FoldableSchedWrite sched> {
8622 let hasSideEffects = 0 in
8623 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8624 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8625 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8628 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8629 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8630 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8631 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8632 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8633 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8634 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8635 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8636 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8639 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8640 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8641 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8642 EVEX_CD8<32, CD8VT1>;
8643 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8644 "ucomisd", WriteFCom>, PD, EVEX,
8645 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8646 let Pattern = []<dag> in {
8647 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8648 "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8649 EVEX_CD8<32, CD8VT1>;
8650 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8651 "comisd", WriteFCom>, PD, EVEX,
8652 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8654 let isCodeGenOnly = 1 in {
8655 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8656 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8657 EVEX_CD8<32, CD8VT1>;
8658 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8659 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8660 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8662 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8663 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8664 EVEX_CD8<32, CD8VT1>;
8665 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8666 sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8667 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8671 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8672 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8673 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8674 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8675 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8676 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8677 "$src2, $src1", "$src1, $src2",
8678 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8679 EVEX_4V, VEX_LIG, Sched<[sched]>;
8680 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8681 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8682 "$src2, $src1", "$src1, $src2",
8683 (OpNode (_.VT _.RC:$src1),
8684 _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
8685 Sched<[sched.Folded, sched.ReadAfterFold]>;
8689 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8690 f32x_info>, EVEX_CD8<32, CD8VT1>,
8692 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8693 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8695 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8696 SchedWriteFRsqrt.Scl, f32x_info>,
8697 EVEX_CD8<32, CD8VT1>, T8PD;
8698 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8699 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8700 EVEX_CD8<64, CD8VT1>, T8PD;
8702 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8703 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8704 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8705 let ExeDomain = _.ExeDomain in {
8706 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8707 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8708 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8710 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8711 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8713 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8714 Sched<[sched.Folded, sched.ReadAfterFold]>;
8715 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8716 (ins _.ScalarMemOp:$src), OpcodeStr,
8717 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8719 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8720 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8724 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8725 X86SchedWriteWidths sched> {
8726 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8727 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8728 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8729 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8731 // Define only if AVX512VL feature is present.
8732 let Predicates = [HasVLX] in {
8733 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8734 OpNode, sched.XMM, v4f32x_info>,
8735 EVEX_V128, EVEX_CD8<32, CD8VF>;
8736 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8737 OpNode, sched.YMM, v8f32x_info>,
8738 EVEX_V256, EVEX_CD8<32, CD8VF>;
8739 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8740 OpNode, sched.XMM, v2f64x_info>,
8741 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8742 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8743 OpNode, sched.YMM, v4f64x_info>,
8744 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8748 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8749 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8751 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8752 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8753 SDNode OpNode, SDNode OpNodeSAE,
8754 X86FoldableSchedWrite sched> {
8755 let ExeDomain = _.ExeDomain in {
8756 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8757 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8758 "$src2, $src1", "$src1, $src2",
8759 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8762 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8763 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8764 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8765 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8766 EVEX_B, Sched<[sched]>;
8768 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8769 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8770 "$src2, $src1", "$src1, $src2",
8771 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
8772 Sched<[sched.Folded, sched.ReadAfterFold]>;
8776 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8777 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8778 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8779 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8780 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8781 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8784 let Predicates = [HasERI] in {
8785 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8786 SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8787 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8788 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8791 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8792 SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8793 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8795 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8796 SDNode OpNode, X86FoldableSchedWrite sched> {
8797 let ExeDomain = _.ExeDomain in {
8798 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8799 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8800 (OpNode (_.VT _.RC:$src))>,
8803 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8804 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8806 (bitconvert (_.LdFrag addr:$src))))>,
8807 Sched<[sched.Folded, sched.ReadAfterFold]>;
8809 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8810 (ins _.ScalarMemOp:$src), OpcodeStr,
8811 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8813 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8814 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8817 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8818 SDNode OpNode, X86FoldableSchedWrite sched> {
8819 let ExeDomain = _.ExeDomain in
8820 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8821 (ins _.RC:$src), OpcodeStr,
8822 "{sae}, $src", "$src, {sae}",
8823 (OpNode (_.VT _.RC:$src))>,
8824 EVEX_B, Sched<[sched]>;
8827 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8828 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8829 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8830 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8831 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8832 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8833 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8834 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8837 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8838 SDNode OpNode, X86SchedWriteWidths sched> {
8839 // Define only if AVX512VL feature is present.
8840 let Predicates = [HasVLX] in {
8841 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8843 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8844 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8846 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8847 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8849 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8850 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8852 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8856 let Predicates = [HasERI] in {
8857 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8858 SchedWriteFRsqrt>, EVEX;
8859 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8860 SchedWriteFRcp>, EVEX;
8861 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8862 SchedWriteFAdd>, EVEX;
8864 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8866 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8867 SchedWriteFRnd>, EVEX;
8869 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8870 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8871 let ExeDomain = _.ExeDomain in
8872 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8873 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8874 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8875 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8878 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8879 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8880 let ExeDomain = _.ExeDomain in {
8881 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8882 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8883 (_.VT (fsqrt _.RC:$src))>, EVEX,
8885 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8886 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8888 (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8889 Sched<[sched.Folded, sched.ReadAfterFold]>;
8890 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8891 (ins _.ScalarMemOp:$src), OpcodeStr,
8892 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8894 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8895 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8899 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8900 X86SchedWriteSizes sched> {
8901 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8902 sched.PS.ZMM, v16f32_info>,
8903 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8904 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8905 sched.PD.ZMM, v8f64_info>,
8906 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8907 // Define only if AVX512VL feature is present.
8908 let Predicates = [HasVLX] in {
8909 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8910 sched.PS.XMM, v4f32x_info>,
8911 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8912 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8913 sched.PS.YMM, v8f32x_info>,
8914 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8915 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8916 sched.PD.XMM, v2f64x_info>,
8917 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8918 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8919 sched.PD.YMM, v4f64x_info>,
8920 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8924 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8925 X86SchedWriteSizes sched> {
8926 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8927 sched.PS.ZMM, v16f32_info>,
8928 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8929 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8930 sched.PD.ZMM, v8f64_info>,
8931 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8934 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8935 X86VectorVTInfo _, string Name> {
8936 let ExeDomain = _.ExeDomain in {
8937 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8938 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8939 "$src2, $src1", "$src1, $src2",
8940 (X86fsqrts (_.VT _.RC:$src1),
8941 (_.VT _.RC:$src2))>,
8943 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8944 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8945 "$src2, $src1", "$src1, $src2",
8946 (X86fsqrts (_.VT _.RC:$src1),
8947 _.ScalarIntMemCPat:$src2)>,
8948 Sched<[sched.Folded, sched.ReadAfterFold]>;
8949 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8950 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8951 "$rc, $src2, $src1", "$src1, $src2, $rc",
8952 (X86fsqrtRnds (_.VT _.RC:$src1),
8955 EVEX_B, EVEX_RC, Sched<[sched]>;
8957 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8958 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8959 (ins _.FRC:$src1, _.FRC:$src2),
8960 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8963 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8964 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8965 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8966 Sched<[sched.Folded, sched.ReadAfterFold]>;
8970 let Predicates = [HasAVX512] in {
8971 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8972 (!cast<Instruction>(Name#Zr)
8973 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8976 let Predicates = [HasAVX512, OptForSize] in {
8977 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8978 (!cast<Instruction>(Name#Zm)
8979 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8983 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8984 X86SchedWriteSizes sched> {
8985 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8986 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8987 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
8988 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
8991 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
8992 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
8994 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
8996 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8997 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8998 let ExeDomain = _.ExeDomain in {
8999 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9000 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9001 "$src3, $src2, $src1", "$src1, $src2, $src3",
9002 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9006 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9007 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9008 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9009 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9010 (i32 imm:$src3)))>, EVEX_B,
9013 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9014 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9016 "$src3, $src2, $src1", "$src1, $src2, $src3",
9017 (_.VT (X86RndScales _.RC:$src1,
9018 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
9019 Sched<[sched.Folded, sched.ReadAfterFold]>;
9021 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9022 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9023 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9024 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9025 []>, Sched<[sched]>;
9028 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9029 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9030 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9031 []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
9035 let Predicates = [HasAVX512] in {
9036 def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2),
9037 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9038 _.FRC:$src1, imm:$src2))>;
9041 let Predicates = [HasAVX512, OptForSize] in {
9042 def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2),
9043 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9044 addr:$src1, imm:$src2))>;
9048 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9049 SchedWriteFRnd.Scl, f32x_info>,
9050 AVX512AIi8Base, EVEX_4V, VEX_LIG,
9051 EVEX_CD8<32, CD8VT1>;
9053 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9054 SchedWriteFRnd.Scl, f64x_info>,
9055 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9056 EVEX_CD8<64, CD8VT1>;
9058 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9059 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9060 dag OutMask, Predicate BasePredicate> {
9061 let Predicates = [BasePredicate] in {
9062 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9063 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9064 (extractelt _.VT:$dst, (iPTR 0))))),
9065 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9066 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9068 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9069 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9071 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9072 OutMask, _.VT:$src2, _.VT:$src1)>;
9076 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9077 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9078 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9079 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9080 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9081 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9084 //-------------------------------------------------
9085 // Integer truncate and extend operations
9086 //-------------------------------------------------
9088 // PatFrags that contain a select and a truncate op. The take operands in the
9089 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9090 // either to the multiclasses.
9091 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9092 (vselect node:$mask,
9093 (trunc node:$src), node:$src0)>;
9094 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9095 (vselect node:$mask,
9096 (X86vtruncs node:$src), node:$src0)>;
9097 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9098 (vselect node:$mask,
9099 (X86vtruncus node:$src), node:$src0)>;
9101 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9102 SDPatternOperator MaskNode,
9103 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9104 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9105 let ExeDomain = DestInfo.ExeDomain in {
9106 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9107 (ins SrcInfo.RC:$src),
9108 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9109 [(set DestInfo.RC:$dst,
9110 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9111 EVEX, Sched<[sched]>;
9112 let Constraints = "$src0 = $dst" in
9113 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9114 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9115 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9116 [(set DestInfo.RC:$dst,
9117 (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9118 (DestInfo.VT DestInfo.RC:$src0),
9119 SrcInfo.KRCWM:$mask))]>,
9120 EVEX, EVEX_K, Sched<[sched]>;
9121 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9122 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9123 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9124 [(set DestInfo.RC:$dst,
9125 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9126 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9127 EVEX, EVEX_KZ, Sched<[sched]>;
9130 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9131 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9132 (ins x86memop:$dst, SrcInfo.RC:$src),
9133 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9134 EVEX, Sched<[sched.Folded]>;
9136 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9137 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9138 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9139 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9140 }//mayStore = 1, hasSideEffects = 0
9143 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9144 X86VectorVTInfo DestInfo,
9145 PatFrag truncFrag, PatFrag mtruncFrag,
9148 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9149 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9150 addr:$dst, SrcInfo.RC:$src)>;
9152 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9153 SrcInfo.KRCWM:$mask),
9154 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9155 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9158 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9159 SDNode OpNode256, SDNode OpNode512,
9160 SDPatternOperator MaskNode128,
9161 SDPatternOperator MaskNode256,
9162 SDPatternOperator MaskNode512,
9163 X86FoldableSchedWrite sched,
9164 AVX512VLVectorVTInfo VTSrcInfo,
9165 X86VectorVTInfo DestInfoZ128,
9166 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9167 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9168 X86MemOperand x86memopZ, PatFrag truncFrag,
9169 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9171 let Predicates = [HasVLX, prd] in {
9172 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9173 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9174 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9175 truncFrag, mtruncFrag, NAME>, EVEX_V128;
9177 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9178 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9179 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9180 truncFrag, mtruncFrag, NAME>, EVEX_V256;
9182 let Predicates = [prd] in
9183 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9184 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9185 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9186 truncFrag, mtruncFrag, NAME>, EVEX_V512;
9189 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9190 SDPatternOperator MaskNode,
9191 X86FoldableSchedWrite sched, PatFrag StoreNode,
9192 PatFrag MaskedStoreNode, SDNode InVecNode,
9193 SDPatternOperator InVecMaskNode> {
9194 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9195 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9196 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9197 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9198 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9201 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9202 SDPatternOperator MaskNode,
9203 X86FoldableSchedWrite sched, PatFrag StoreNode,
9204 PatFrag MaskedStoreNode, SDNode InVecNode,
9205 SDPatternOperator InVecMaskNode> {
9206 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9207 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9208 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9209 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9210 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9213 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9214 SDPatternOperator MaskNode,
9215 X86FoldableSchedWrite sched, PatFrag StoreNode,
9216 PatFrag MaskedStoreNode, SDNode InVecNode,
9217 SDPatternOperator InVecMaskNode> {
9218 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9219 InVecMaskNode, MaskNode, MaskNode, sched,
9220 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9221 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9222 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9225 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9226 SDPatternOperator MaskNode,
9227 X86FoldableSchedWrite sched, PatFrag StoreNode,
9228 PatFrag MaskedStoreNode, SDNode InVecNode,
9229 SDPatternOperator InVecMaskNode> {
9230 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9231 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9232 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9233 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9234 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9237 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9238 SDPatternOperator MaskNode,
9239 X86FoldableSchedWrite sched, PatFrag StoreNode,
9240 PatFrag MaskedStoreNode, SDNode InVecNode,
9241 SDPatternOperator InVecMaskNode> {
9242 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9243 InVecMaskNode, MaskNode, MaskNode, sched,
9244 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9245 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9246 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9249 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9250 SDPatternOperator MaskNode,
9251 X86FoldableSchedWrite sched, PatFrag StoreNode,
9252 PatFrag MaskedStoreNode, SDNode InVecNode,
9253 SDPatternOperator InVecMaskNode> {
9254 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9255 InVecMaskNode, MaskNode, MaskNode, sched,
9256 avx512vl_i16_info, v16i8x_info, v16i8x_info,
9257 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9258 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9261 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
9262 WriteShuffle256, truncstorevi8,
9263 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9264 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
9265 WriteShuffle256, truncstore_s_vi8,
9266 masked_truncstore_s_vi8, X86vtruncs,
9268 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9269 select_truncus, WriteShuffle256,
9270 truncstore_us_vi8, masked_truncstore_us_vi8,
9271 X86vtruncus, X86vmtruncus>;
9273 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9274 WriteShuffle256, truncstorevi16,
9275 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9276 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
9277 WriteShuffle256, truncstore_s_vi16,
9278 masked_truncstore_s_vi16, X86vtruncs,
9280 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9281 select_truncus, WriteShuffle256,
9282 truncstore_us_vi16, masked_truncstore_us_vi16,
9283 X86vtruncus, X86vmtruncus>;
9285 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9286 WriteShuffle256, truncstorevi32,
9287 masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9288 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
9289 WriteShuffle256, truncstore_s_vi32,
9290 masked_truncstore_s_vi32, X86vtruncs,
9292 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9293 select_truncus, WriteShuffle256,
9294 truncstore_us_vi32, masked_truncstore_us_vi32,
9295 X86vtruncus, X86vmtruncus>;
9297 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9298 WriteShuffle256, truncstorevi8,
9299 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9300 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9301 WriteShuffle256, truncstore_s_vi8,
9302 masked_truncstore_s_vi8, X86vtruncs,
9304 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
9305 select_truncus, WriteShuffle256,
9306 truncstore_us_vi8, masked_truncstore_us_vi8,
9307 X86vtruncus, X86vmtruncus>;
9309 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9310 WriteShuffle256, truncstorevi16,
9311 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9312 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9313 WriteShuffle256, truncstore_s_vi16,
9314 masked_truncstore_s_vi16, X86vtruncs,
9316 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9317 select_truncus, WriteShuffle256,
9318 truncstore_us_vi16, masked_truncstore_us_vi16,
9319 X86vtruncus, X86vmtruncus>;
9321 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9322 WriteShuffle256, truncstorevi8,
9323 masked_truncstorevi8, X86vtrunc,
9325 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9326 WriteShuffle256, truncstore_s_vi8,
9327 masked_truncstore_s_vi8, X86vtruncs,
9329 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9330 select_truncus, WriteShuffle256,
9331 truncstore_us_vi8, masked_truncstore_us_vi8,
9332 X86vtruncus, X86vmtruncus>;
9334 let Predicates = [HasAVX512, NoVLX] in {
9335 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9336 (v8i16 (EXTRACT_SUBREG
9337 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9338 VR256X:$src, sub_ymm)))), sub_xmm))>;
9339 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9340 (v4i32 (EXTRACT_SUBREG
9341 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9342 VR256X:$src, sub_ymm)))), sub_xmm))>;
9345 let Predicates = [HasBWI, NoVLX] in {
9346 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9347 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9348 VR256X:$src, sub_ymm))), sub_xmm))>;
9351 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9352 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9353 X86VectorVTInfo DestInfo,
9354 X86VectorVTInfo SrcInfo> {
9355 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9357 SrcInfo.KRCWM:$mask)),
9358 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9359 SrcInfo.KRCWM:$mask,
9362 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9363 DestInfo.ImmAllZerosV,
9364 SrcInfo.KRCWM:$mask)),
9365 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9369 let Predicates = [HasVLX] in {
9370 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9371 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9372 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9375 let Predicates = [HasAVX512] in {
9376 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9377 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9378 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9380 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9381 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9382 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9384 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9385 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9386 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9389 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9390 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9391 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9392 let ExeDomain = DestInfo.ExeDomain in {
9393 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9394 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9395 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9396 EVEX, Sched<[sched]>;
9398 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9399 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9400 (DestInfo.VT (LdFrag addr:$src))>,
9401 EVEX, Sched<[sched.Folded]>;
9405 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9406 SDNode OpNode, SDNode InVecNode, string ExtTy,
9407 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9408 let Predicates = [HasVLX, HasBWI] in {
9409 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9410 v16i8x_info, i64mem, LdFrag, InVecNode>,
9411 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9413 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9414 v16i8x_info, i128mem, LdFrag, OpNode>,
9415 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9417 let Predicates = [HasBWI] in {
9418 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9419 v32i8x_info, i256mem, LdFrag, OpNode>,
9420 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9424 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9425 SDNode OpNode, SDNode InVecNode, string ExtTy,
9426 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9427 let Predicates = [HasVLX, HasAVX512] in {
9428 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9429 v16i8x_info, i32mem, LdFrag, InVecNode>,
9430 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9432 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9433 v16i8x_info, i64mem, LdFrag, InVecNode>,
9434 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9436 let Predicates = [HasAVX512] in {
9437 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9438 v16i8x_info, i128mem, LdFrag, OpNode>,
9439 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9443 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9444 SDNode OpNode, SDNode InVecNode, string ExtTy,
9445 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9446 let Predicates = [HasVLX, HasAVX512] in {
9447 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9448 v16i8x_info, i16mem, LdFrag, InVecNode>,
9449 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9451 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9452 v16i8x_info, i32mem, LdFrag, InVecNode>,
9453 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9455 let Predicates = [HasAVX512] in {
9456 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9457 v16i8x_info, i64mem, LdFrag, InVecNode>,
9458 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9462 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9463 SDNode OpNode, SDNode InVecNode, string ExtTy,
9464 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9465 let Predicates = [HasVLX, HasAVX512] in {
9466 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9467 v8i16x_info, i64mem, LdFrag, InVecNode>,
9468 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9470 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9471 v8i16x_info, i128mem, LdFrag, OpNode>,
9472 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9474 let Predicates = [HasAVX512] in {
9475 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9476 v16i16x_info, i256mem, LdFrag, OpNode>,
9477 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9481 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9482 SDNode OpNode, SDNode InVecNode, string ExtTy,
9483 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9484 let Predicates = [HasVLX, HasAVX512] in {
9485 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9486 v8i16x_info, i32mem, LdFrag, InVecNode>,
9487 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9489 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9490 v8i16x_info, i64mem, LdFrag, InVecNode>,
9491 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9493 let Predicates = [HasAVX512] in {
9494 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9495 v8i16x_info, i128mem, LdFrag, OpNode>,
9496 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9500 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9501 SDNode OpNode, SDNode InVecNode, string ExtTy,
9502 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9504 let Predicates = [HasVLX, HasAVX512] in {
9505 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9506 v4i32x_info, i64mem, LdFrag, InVecNode>,
9507 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9509 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9510 v4i32x_info, i128mem, LdFrag, OpNode>,
9511 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9513 let Predicates = [HasAVX512] in {
9514 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9515 v8i32x_info, i256mem, LdFrag, OpNode>,
9516 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9520 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9521 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9522 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9523 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9524 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9525 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9527 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9528 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9529 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9530 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9531 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9532 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9535 // Patterns that we also need any extend versions of. aext_vector_inreg
9536 // is currently legalized to zext_vector_inreg.
9537 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9539 let Predicates = [HasVLX, HasBWI] in {
9540 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9541 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9544 let Predicates = [HasVLX] in {
9545 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9546 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9548 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9549 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9553 let Predicates = [HasBWI] in {
9554 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9555 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9557 let Predicates = [HasAVX512] in {
9558 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9559 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9560 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9561 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9563 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9564 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9566 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9567 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9571 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9573 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9575 let Predicates = [HasVLX, HasBWI] in {
9576 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9577 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9578 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9579 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9580 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9581 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9583 let Predicates = [HasVLX] in {
9584 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9585 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9586 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9587 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9589 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9590 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9592 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9593 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9594 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9595 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9596 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9597 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9599 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9600 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9601 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9602 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9604 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9605 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9606 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9607 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9608 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9609 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9611 let Predicates = [HasVLX] in {
9612 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9613 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9614 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9615 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9617 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9618 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9619 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9620 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9622 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9623 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9624 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9625 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9628 let Predicates = [HasAVX512] in {
9629 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9630 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9634 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9635 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9637 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9638 // ext+trunc aggresively making it impossible to legalize the DAG to this
9639 // pattern directly.
9640 let Predicates = [HasAVX512, NoBWI] in {
9641 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9642 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9643 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9644 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9647 //===----------------------------------------------------------------------===//
9648 // GATHER - SCATTER Operations
9650 // FIXME: Improve scheduling of gather/scatter instructions.
9651 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9652 X86MemOperand memop, PatFrag GatherNode,
9653 RegisterClass MaskRC = _.KRCWM> {
9654 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9655 ExeDomain = _.ExeDomain in
9656 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9657 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9658 !strconcat(OpcodeStr#_.Suffix,
9659 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9660 [(set _.RC:$dst, MaskRC:$mask_wb,
9661 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
9662 vectoraddr:$src2))]>, EVEX, EVEX_K,
9663 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9666 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9667 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9668 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9669 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9670 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9671 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
9672 let Predicates = [HasVLX] in {
9673 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9674 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9675 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9676 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9677 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9678 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9679 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9680 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9684 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9685 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9686 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9687 mgatherv16i32>, EVEX_V512;
9688 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9689 mgatherv8i64>, EVEX_V512;
9690 let Predicates = [HasVLX] in {
9691 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9692 vy256xmem, mgatherv8i32>, EVEX_V256;
9693 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9694 vy128xmem, mgatherv4i64>, EVEX_V256;
9695 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9696 vx128xmem, mgatherv4i32>, EVEX_V128;
9697 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9698 vx64xmem, mgatherv2i64, VK2WM>,
9704 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9705 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9707 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9708 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9710 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9711 X86MemOperand memop, PatFrag ScatterNode,
9712 RegisterClass MaskRC = _.KRCWM> {
9714 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9716 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9717 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9718 !strconcat(OpcodeStr#_.Suffix,
9719 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9720 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9721 MaskRC:$mask, vectoraddr:$dst))]>,
9722 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9723 Sched<[WriteStore]>;
9726 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9727 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9728 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9729 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9730 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9731 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
9732 let Predicates = [HasVLX] in {
9733 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9734 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9735 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9736 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9737 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9738 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9739 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9740 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9744 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9745 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9746 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9747 mscatterv16i32>, EVEX_V512;
9748 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9749 mscatterv8i64>, EVEX_V512;
9750 let Predicates = [HasVLX] in {
9751 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9752 vy256xmem, mscatterv8i32>, EVEX_V256;
9753 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9754 vy128xmem, mscatterv4i64>, EVEX_V256;
9755 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9756 vx128xmem, mscatterv4i32>, EVEX_V128;
9757 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9758 vx64xmem, mscatterv2i64, VK2WM>,
9763 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9764 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9766 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9767 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9770 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9771 RegisterClass KRC, X86MemOperand memop> {
9772 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9773 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9774 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9775 EVEX, EVEX_K, Sched<[WriteLoad]>;
9778 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9779 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9781 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9782 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9784 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9785 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9787 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9788 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9790 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9791 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9793 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9794 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9796 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9797 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9799 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9800 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9802 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9803 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9805 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9806 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9808 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9809 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9811 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9812 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9814 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9815 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9817 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9818 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9820 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9821 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9823 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9824 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9826 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9827 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9828 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9829 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9830 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9832 // Also need a pattern for anyextend.
9833 def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
9834 (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
9837 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9838 string OpcodeStr, Predicate prd> {
9839 let Predicates = [prd] in
9840 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9842 let Predicates = [prd, HasVLX] in {
9843 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9844 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9848 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9849 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9850 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9851 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9853 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9854 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9855 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9856 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9857 EVEX, Sched<[WriteMove]>;
9860 // Use 512bit version to implement 128/256 bit in case NoVLX.
9861 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9865 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9866 (_.KVT (COPY_TO_REGCLASS
9867 (!cast<Instruction>(Name#"Zrr")
9868 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9869 _.RC:$src, _.SubRegIdx)),
9873 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9874 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9875 let Predicates = [prd] in
9876 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9879 let Predicates = [prd, HasVLX] in {
9880 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9882 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9885 let Predicates = [prd, NoVLX] in {
9886 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9887 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9891 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9892 avx512vl_i8_info, HasBWI>;
9893 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9894 avx512vl_i16_info, HasBWI>, VEX_W;
9895 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9896 avx512vl_i32_info, HasDQI>;
9897 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9898 avx512vl_i64_info, HasDQI>, VEX_W;
9900 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9901 // is available, but BWI is not. We can't handle this in lowering because
9902 // a target independent DAG combine likes to combine sext and trunc.
9903 let Predicates = [HasDQI, NoBWI] in {
9904 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9905 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9906 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9907 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9909 def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
9910 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9911 def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
9912 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9915 let Predicates = [HasDQI, NoBWI, HasVLX] in {
9916 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9917 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9919 def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
9920 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9923 //===----------------------------------------------------------------------===//
9924 // AVX-512 - COMPRESS and EXPAND
9927 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9928 string OpcodeStr, X86FoldableSchedWrite sched> {
9929 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9930 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9931 (null_frag)>, AVX5128IBase,
9934 let mayStore = 1, hasSideEffects = 0 in
9935 def mr : AVX5128I<opc, MRMDestMem, (outs),
9936 (ins _.MemOp:$dst, _.RC:$src),
9937 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9938 []>, EVEX_CD8<_.EltSize, CD8VT1>,
9939 Sched<[sched.Folded]>;
9941 def mrk : AVX5128I<opc, MRMDestMem, (outs),
9942 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9943 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9945 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9946 Sched<[sched.Folded]>;
9949 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9950 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9951 (!cast<Instruction>(Name#_.ZSuffix##mrk)
9952 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9954 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9955 (!cast<Instruction>(Name#_.ZSuffix##rrk)
9956 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9957 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9958 (!cast<Instruction>(Name#_.ZSuffix##rrkz)
9959 _.KRCWM:$mask, _.RC:$src)>;
9962 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9963 X86FoldableSchedWrite sched,
9964 AVX512VLVectorVTInfo VTInfo,
9965 Predicate Pred = HasAVX512> {
9966 let Predicates = [Pred] in
9967 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9968 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9970 let Predicates = [Pred, HasVLX] in {
9971 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9972 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9973 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9974 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9978 // FIXME: Is there a better scheduler class for VPCOMPRESS?
9979 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9980 avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9981 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9982 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9983 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9984 avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9985 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9986 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9989 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9990 string OpcodeStr, X86FoldableSchedWrite sched> {
9991 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9992 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9993 (null_frag)>, AVX5128IBase,
9996 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9997 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9999 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10000 Sched<[sched.Folded, sched.ReadAfterFold]>;
10003 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10005 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10006 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10007 _.KRCWM:$mask, addr:$src)>;
10009 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10010 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10011 _.KRCWM:$mask, addr:$src)>;
10013 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10014 (_.VT _.RC:$src0))),
10015 (!cast<Instruction>(Name#_.ZSuffix##rmk)
10016 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10018 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10019 (!cast<Instruction>(Name#_.ZSuffix##rrk)
10020 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10021 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10022 (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10023 _.KRCWM:$mask, _.RC:$src)>;
10026 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10027 X86FoldableSchedWrite sched,
10028 AVX512VLVectorVTInfo VTInfo,
10029 Predicate Pred = HasAVX512> {
10030 let Predicates = [Pred] in
10031 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10032 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10034 let Predicates = [Pred, HasVLX] in {
10035 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10036 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10037 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10038 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10042 // FIXME: Is there a better scheduler class for VPEXPAND?
10043 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10044 avx512vl_i32_info>, EVEX;
10045 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10046 avx512vl_i64_info>, EVEX, VEX_W;
10047 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10048 avx512vl_f32_info>, EVEX;
10049 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10050 avx512vl_f64_info>, EVEX, VEX_W;
10052 //handle instruction reg_vec1 = op(reg_vec,imm)
10054 // op(broadcast(eltVt),imm)
10055 //all instruction created with FROUND_CURRENT
10056 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10057 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10058 let ExeDomain = _.ExeDomain in {
10059 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10060 (ins _.RC:$src1, i32u8imm:$src2),
10061 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10062 (OpNode (_.VT _.RC:$src1),
10063 (i32 imm:$src2))>, Sched<[sched]>;
10064 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10065 (ins _.MemOp:$src1, i32u8imm:$src2),
10066 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10067 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10069 Sched<[sched.Folded, sched.ReadAfterFold]>;
10070 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10071 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10072 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10073 "${src1}"##_.BroadcastStr##", $src2",
10074 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
10075 (i32 imm:$src2))>, EVEX_B,
10076 Sched<[sched.Folded, sched.ReadAfterFold]>;
10080 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10081 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10082 SDNode OpNode, X86FoldableSchedWrite sched,
10083 X86VectorVTInfo _> {
10084 let ExeDomain = _.ExeDomain in
10085 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10086 (ins _.RC:$src1, i32u8imm:$src2),
10087 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10088 "$src1, {sae}, $src2",
10089 (OpNode (_.VT _.RC:$src1),
10091 EVEX_B, Sched<[sched]>;
10094 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10095 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10096 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10097 let Predicates = [prd] in {
10098 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10100 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10101 sched.ZMM, _.info512>, EVEX_V512;
10103 let Predicates = [prd, HasVLX] in {
10104 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10105 _.info128>, EVEX_V128;
10106 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10107 _.info256>, EVEX_V256;
10111 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10112 // op(reg_vec2,mem_vec,imm)
10113 // op(reg_vec2,broadcast(eltVt),imm)
10114 //all instruction created with FROUND_CURRENT
10115 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10116 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10117 let ExeDomain = _.ExeDomain in {
10118 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10119 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10120 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10121 (OpNode (_.VT _.RC:$src1),
10125 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10126 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10127 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10128 (OpNode (_.VT _.RC:$src1),
10129 (_.VT (bitconvert (_.LdFrag addr:$src2))),
10131 Sched<[sched.Folded, sched.ReadAfterFold]>;
10132 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10133 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10134 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10135 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10136 (OpNode (_.VT _.RC:$src1),
10137 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10138 (i32 imm:$src3))>, EVEX_B,
10139 Sched<[sched.Folded, sched.ReadAfterFold]>;
10143 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10144 // op(reg_vec2,mem_vec,imm)
10145 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10146 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10147 X86VectorVTInfo SrcInfo>{
10148 let ExeDomain = DestInfo.ExeDomain in {
10149 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10150 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10151 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10152 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10153 (SrcInfo.VT SrcInfo.RC:$src2),
10156 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10157 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10158 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10159 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10160 (SrcInfo.VT (bitconvert
10161 (SrcInfo.LdFrag addr:$src2))),
10163 Sched<[sched.Folded, sched.ReadAfterFold]>;
10167 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10168 // op(reg_vec2,mem_vec,imm)
10169 // op(reg_vec2,broadcast(eltVt),imm)
10170 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10171 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10172 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10174 let ExeDomain = _.ExeDomain in
10175 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10176 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10177 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10178 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10179 (OpNode (_.VT _.RC:$src1),
10180 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10181 (i8 imm:$src3))>, EVEX_B,
10182 Sched<[sched.Folded, sched.ReadAfterFold]>;
10185 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10186 // op(reg_vec2,mem_scalar,imm)
10187 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10188 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10189 let ExeDomain = _.ExeDomain in {
10190 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10191 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10192 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10193 (OpNode (_.VT _.RC:$src1),
10197 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10198 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10199 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10200 (OpNode (_.VT _.RC:$src1),
10201 (_.VT (scalar_to_vector
10202 (_.ScalarLdFrag addr:$src2))),
10204 Sched<[sched.Folded, sched.ReadAfterFold]>;
10208 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10209 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10210 SDNode OpNode, X86FoldableSchedWrite sched,
10211 X86VectorVTInfo _> {
10212 let ExeDomain = _.ExeDomain in
10213 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10214 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10215 OpcodeStr, "$src3, {sae}, $src2, $src1",
10216 "$src1, $src2, {sae}, $src3",
10217 (OpNode (_.VT _.RC:$src1),
10220 EVEX_B, Sched<[sched]>;
10223 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10224 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10225 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10226 let ExeDomain = _.ExeDomain in
10227 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10228 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10229 OpcodeStr, "$src3, {sae}, $src2, $src1",
10230 "$src1, $src2, {sae}, $src3",
10231 (OpNode (_.VT _.RC:$src1),
10234 EVEX_B, Sched<[sched]>;
10237 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10238 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10239 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10240 let Predicates = [prd] in {
10241 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10242 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10246 let Predicates = [prd, HasVLX] in {
10247 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10249 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10254 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10255 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10256 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10257 let Predicates = [Pred] in {
10258 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10259 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10261 let Predicates = [Pred, HasVLX] in {
10262 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10263 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10264 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10265 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10269 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10270 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10271 Predicate Pred = HasAVX512> {
10272 let Predicates = [Pred] in {
10273 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10276 let Predicates = [Pred, HasVLX] in {
10277 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10279 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10284 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10285 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10286 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10287 let Predicates = [prd] in {
10288 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10289 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10293 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10294 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10295 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10296 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10297 opcPs, OpNode, OpNodeSAE, sched, prd>,
10298 EVEX_CD8<32, CD8VF>;
10299 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10300 opcPd, OpNode, OpNodeSAE, sched, prd>,
10301 EVEX_CD8<64, CD8VF>, VEX_W;
10304 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10305 X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
10306 AVX512AIi8Base, EVEX;
10307 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10308 X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
10309 AVX512AIi8Base, EVEX;
10310 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10311 X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
10312 AVX512AIi8Base, EVEX;
10314 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10315 0x50, X86VRange, X86VRangeSAE,
10316 SchedWriteFAdd, HasDQI>,
10317 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10318 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10319 0x50, X86VRange, X86VRangeSAE,
10320 SchedWriteFAdd, HasDQI>,
10321 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10323 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10324 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10325 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10326 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10327 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10328 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10330 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10331 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10332 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10333 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10334 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10335 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10337 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10338 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10339 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10340 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10341 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10342 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10344 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10345 X86FoldableSchedWrite sched,
10347 X86VectorVTInfo CastInfo,
10348 string EVEX2VEXOvrd> {
10349 let ExeDomain = _.ExeDomain in {
10350 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10351 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10352 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10354 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10355 (i8 imm:$src3)))))>,
10356 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10357 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10358 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10359 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10362 (CastInfo.VT (X86Shuf128 _.RC:$src1,
10363 (CastInfo.LdFrag addr:$src2),
10364 (i8 imm:$src3)))))>,
10365 Sched<[sched.Folded, sched.ReadAfterFold]>,
10366 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10367 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10368 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10369 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10370 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10374 (X86Shuf128 _.RC:$src1,
10375 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
10376 (i8 imm:$src3)))))>, EVEX_B,
10377 Sched<[sched.Folded, sched.ReadAfterFold]>;
10381 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10382 AVX512VLVectorVTInfo _,
10383 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10384 string EVEX2VEXOvrd>{
10385 let Predicates = [HasAVX512] in
10386 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10387 _.info512, CastInfo.info512, "">, EVEX_V512;
10389 let Predicates = [HasAVX512, HasVLX] in
10390 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10391 _.info256, CastInfo.info256,
10392 EVEX2VEXOvrd>, EVEX_V256;
10395 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10396 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10397 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10398 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10399 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10400 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10401 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10402 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10404 let Predicates = [HasAVX512] in {
10405 // Provide fallback in case the load node that is used in the broadcast
10406 // patterns above is used by additional users, which prevents the pattern
10408 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10409 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10410 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10412 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10413 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10414 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10417 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10418 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10419 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10421 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10422 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10423 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10426 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10427 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10428 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10431 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10432 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10433 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10437 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10438 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10439 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10440 // instantiation of this class.
10441 let ExeDomain = _.ExeDomain in {
10442 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10443 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10444 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10445 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
10446 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10447 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10448 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10449 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10450 (_.VT (X86VAlign _.RC:$src1,
10451 (bitconvert (_.LdFrag addr:$src2)),
10453 Sched<[sched.Folded, sched.ReadAfterFold]>,
10454 EVEX2VEXOverride<"VPALIGNRrmi">;
10456 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10457 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10458 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10459 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10460 (X86VAlign _.RC:$src1,
10461 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10462 (i8 imm:$src3))>, EVEX_B,
10463 Sched<[sched.Folded, sched.ReadAfterFold]>;
10467 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10468 AVX512VLVectorVTInfo _> {
10469 let Predicates = [HasAVX512] in {
10470 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10471 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10473 let Predicates = [HasAVX512, HasVLX] in {
10474 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10475 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10476 // We can't really override the 256-bit version so change it back to unset.
10477 let EVEX2VEXOverride = ? in
10478 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10479 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10483 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10484 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10485 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10486 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10489 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10490 SchedWriteShuffle, avx512vl_i8_info,
10491 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10493 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10495 def ValignqImm32XForm : SDNodeXForm<imm, [{
10496 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10498 def ValignqImm8XForm : SDNodeXForm<imm, [{
10499 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10501 def ValigndImm8XForm : SDNodeXForm<imm, [{
10502 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10505 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10506 X86VectorVTInfo From, X86VectorVTInfo To,
10507 SDNodeXForm ImmXForm> {
10508 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10510 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10513 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10514 To.RC:$src1, To.RC:$src2,
10515 (ImmXForm imm:$src3))>;
10517 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10519 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10522 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10523 To.RC:$src1, To.RC:$src2,
10524 (ImmXForm imm:$src3))>;
10526 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10528 (From.VT (OpNode From.RC:$src1,
10529 (From.LdFrag addr:$src2),
10532 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10533 To.RC:$src1, addr:$src2,
10534 (ImmXForm imm:$src3))>;
10536 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10538 (From.VT (OpNode From.RC:$src1,
10539 (From.LdFrag addr:$src2),
10542 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10543 To.RC:$src1, addr:$src2,
10544 (ImmXForm imm:$src3))>;
10547 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10548 X86VectorVTInfo From,
10549 X86VectorVTInfo To,
10550 SDNodeXForm ImmXForm> :
10551 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10552 def : Pat<(From.VT (OpNode From.RC:$src1,
10553 (bitconvert (To.VT (X86VBroadcast
10554 (To.ScalarLdFrag addr:$src2)))),
10556 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10557 (ImmXForm imm:$src3))>;
10559 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10561 (From.VT (OpNode From.RC:$src1,
10563 (To.VT (X86VBroadcast
10564 (To.ScalarLdFrag addr:$src2)))),
10567 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10568 To.RC:$src1, addr:$src2,
10569 (ImmXForm imm:$src3))>;
10571 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10573 (From.VT (OpNode From.RC:$src1,
10575 (To.VT (X86VBroadcast
10576 (To.ScalarLdFrag addr:$src2)))),
10579 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10580 To.RC:$src1, addr:$src2,
10581 (ImmXForm imm:$src3))>;
10584 let Predicates = [HasAVX512] in {
10585 // For 512-bit we lower to the widest element type we can. So we only need
10586 // to handle converting valignq to valignd.
10587 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10588 v16i32_info, ValignqImm32XForm>;
10591 let Predicates = [HasVLX] in {
10592 // For 128-bit we lower to the widest element type we can. So we only need
10593 // to handle converting valignq to valignd.
10594 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10595 v4i32x_info, ValignqImm32XForm>;
10596 // For 256-bit we lower to the widest element type we can. So we only need
10597 // to handle converting valignq to valignd.
10598 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10599 v8i32x_info, ValignqImm32XForm>;
10602 let Predicates = [HasVLX, HasBWI] in {
10603 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10604 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10605 v16i8x_info, ValignqImm8XForm>;
10606 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10607 v16i8x_info, ValigndImm8XForm>;
10610 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10611 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10612 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10614 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10615 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10616 let ExeDomain = _.ExeDomain in {
10617 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10618 (ins _.RC:$src1), OpcodeStr,
10620 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
10623 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10624 (ins _.MemOp:$src1), OpcodeStr,
10626 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
10627 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10628 Sched<[sched.Folded]>;
10632 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10633 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10634 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10635 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10636 (ins _.ScalarMemOp:$src1), OpcodeStr,
10637 "${src1}"##_.BroadcastStr,
10638 "${src1}"##_.BroadcastStr,
10639 (_.VT (OpNode (X86VBroadcast
10640 (_.ScalarLdFrag addr:$src1))))>,
10641 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10642 Sched<[sched.Folded]>;
10645 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10646 X86SchedWriteWidths sched,
10647 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10648 let Predicates = [prd] in
10649 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10652 let Predicates = [prd, HasVLX] in {
10653 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10655 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10660 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10661 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10663 let Predicates = [prd] in
10664 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10667 let Predicates = [prd, HasVLX] in {
10668 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10670 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10675 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10676 SDNode OpNode, X86SchedWriteWidths sched,
10678 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10679 avx512vl_i64_info, prd>, VEX_W;
10680 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10681 avx512vl_i32_info, prd>;
10684 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10685 SDNode OpNode, X86SchedWriteWidths sched,
10687 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10688 avx512vl_i16_info, prd>, VEX_WIG;
10689 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10690 avx512vl_i8_info, prd>, VEX_WIG;
10693 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10694 bits<8> opc_d, bits<8> opc_q,
10695 string OpcodeStr, SDNode OpNode,
10696 X86SchedWriteWidths sched> {
10697 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10699 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10703 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10706 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10707 let Predicates = [HasAVX512, NoVLX] in {
10708 def : Pat<(v4i64 (abs VR256X:$src)),
10711 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10713 def : Pat<(v2i64 (abs VR128X:$src)),
10716 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10720 // Use 512bit version to implement 128/256 bit.
10721 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10722 AVX512VLVectorVTInfo _, Predicate prd> {
10723 let Predicates = [prd, NoVLX] in {
10724 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
10726 (!cast<Instruction>(InstrStr # "Zrr")
10727 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10728 _.info256.RC:$src1,
10729 _.info256.SubRegIdx)),
10730 _.info256.SubRegIdx)>;
10732 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
10734 (!cast<Instruction>(InstrStr # "Zrr")
10735 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10736 _.info128.RC:$src1,
10737 _.info128.SubRegIdx)),
10738 _.info128.SubRegIdx)>;
10742 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10743 SchedWriteVecIMul, HasCDI>;
10745 // FIXME: Is there a better scheduler class for VPCONFLICT?
10746 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10747 SchedWriteVecALU, HasCDI>;
10749 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10750 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10751 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10753 //===---------------------------------------------------------------------===//
10754 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10755 //===---------------------------------------------------------------------===//
10757 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10758 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10759 SchedWriteVecALU, HasVPOPCNTDQ>;
10761 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10762 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10764 //===---------------------------------------------------------------------===//
10765 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10766 //===---------------------------------------------------------------------===//
10768 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10769 X86SchedWriteWidths sched> {
10770 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10771 avx512vl_f32_info, HasAVX512>, XS;
10774 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10775 SchedWriteFShuffle>;
10776 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10777 SchedWriteFShuffle>;
10779 //===----------------------------------------------------------------------===//
10780 // AVX-512 - MOVDDUP
10781 //===----------------------------------------------------------------------===//
10783 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
10784 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10785 let ExeDomain = _.ExeDomain in {
10786 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10787 (ins _.RC:$src), OpcodeStr, "$src", "$src",
10788 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
10790 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10791 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10792 (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
10793 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10794 Sched<[sched.Folded]>;
10798 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10799 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10800 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10801 VTInfo.info512>, EVEX_V512;
10803 let Predicates = [HasAVX512, HasVLX] in {
10804 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10805 VTInfo.info256>, EVEX_V256;
10806 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
10807 VTInfo.info128>, EVEX_V128;
10811 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10812 X86SchedWriteWidths sched> {
10813 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10814 avx512vl_f64_info>, XD, VEX_W;
10817 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10819 let Predicates = [HasVLX] in {
10820 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10821 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10822 def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10823 (VMOVDDUPZ128rm addr:$src)>;
10824 def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
10825 (VMOVDDUPZ128rm addr:$src)>;
10827 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10828 (v2f64 VR128X:$src0)),
10829 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10830 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10831 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10833 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10835 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10836 (v2f64 VR128X:$src0)),
10837 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10838 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10840 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10842 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10843 (v2f64 VR128X:$src0)),
10844 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10845 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10847 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10850 //===----------------------------------------------------------------------===//
10851 // AVX-512 - Unpack Instructions
10852 //===----------------------------------------------------------------------===//
10854 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10855 SchedWriteFShuffleSizes, 0, 1>;
10856 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10857 SchedWriteFShuffleSizes>;
10859 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10860 SchedWriteShuffle, HasBWI>;
10861 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10862 SchedWriteShuffle, HasBWI>;
10863 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10864 SchedWriteShuffle, HasBWI>;
10865 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10866 SchedWriteShuffle, HasBWI>;
10868 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10869 SchedWriteShuffle, HasAVX512>;
10870 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10871 SchedWriteShuffle, HasAVX512>;
10872 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10873 SchedWriteShuffle, HasAVX512>;
10874 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10875 SchedWriteShuffle, HasAVX512>;
10877 //===----------------------------------------------------------------------===//
10878 // AVX-512 - Extract & Insert Integer Instructions
10879 //===----------------------------------------------------------------------===//
10881 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10882 X86VectorVTInfo _> {
10883 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10884 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10885 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10886 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10888 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10891 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10892 let Predicates = [HasBWI] in {
10893 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10894 (ins _.RC:$src1, u8imm:$src2),
10895 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10896 [(set GR32orGR64:$dst,
10897 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10898 EVEX, TAPD, Sched<[WriteVecExtract]>;
10900 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10904 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10905 let Predicates = [HasBWI] in {
10906 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10907 (ins _.RC:$src1, u8imm:$src2),
10908 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10909 [(set GR32orGR64:$dst,
10910 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10911 EVEX, PD, Sched<[WriteVecExtract]>;
10913 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10914 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10915 (ins _.RC:$src1, u8imm:$src2),
10916 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10917 EVEX, TAPD, FoldGenData<NAME#rr>,
10918 Sched<[WriteVecExtract]>;
10920 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10924 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10925 RegisterClass GRC> {
10926 let Predicates = [HasDQI] in {
10927 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10928 (ins _.RC:$src1, u8imm:$src2),
10929 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10931 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10932 EVEX, TAPD, Sched<[WriteVecExtract]>;
10934 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10935 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10936 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10937 [(store (extractelt (_.VT _.RC:$src1),
10938 imm:$src2),addr:$dst)]>,
10939 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10940 Sched<[WriteVecExtractSt]>;
10944 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10945 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10946 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10947 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10949 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10950 X86VectorVTInfo _, PatFrag LdFrag> {
10951 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10952 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10953 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10955 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10956 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10959 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10960 X86VectorVTInfo _, PatFrag LdFrag> {
10961 let Predicates = [HasBWI] in {
10962 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10963 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10964 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10966 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10967 Sched<[WriteVecInsert]>;
10969 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10973 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10974 X86VectorVTInfo _, RegisterClass GRC> {
10975 let Predicates = [HasDQI] in {
10976 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10977 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10978 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10980 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10981 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10983 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10984 _.ScalarLdFrag>, TAPD;
10988 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10989 extloadi8>, TAPD, VEX_WIG;
10990 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10991 extloadi16>, PD, VEX_WIG;
10992 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10993 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10995 //===----------------------------------------------------------------------===//
10996 // VSHUFPS - VSHUFPD Operations
10997 //===----------------------------------------------------------------------===//
10999 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11000 AVX512VLVectorVTInfo VTInfo_FP>{
11001 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11002 SchedWriteFShuffle>,
11003 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11004 AVX512AIi8Base, EVEX_4V;
11007 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11008 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11010 //===----------------------------------------------------------------------===//
11011 // AVX-512 - Byte shift Left/Right
11012 //===----------------------------------------------------------------------===//
11014 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
11015 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11016 Format MRMm, string OpcodeStr,
11017 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11018 def rr : AVX512<opc, MRMr,
11019 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11020 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11021 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
11023 def rm : AVX512<opc, MRMm,
11024 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11025 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11026 [(set _.RC:$dst,(_.VT (OpNode
11027 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11028 (i8 imm:$src2))))]>,
11029 Sched<[sched.Folded, sched.ReadAfterFold]>;
11032 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11033 Format MRMm, string OpcodeStr,
11034 X86SchedWriteWidths sched, Predicate prd>{
11035 let Predicates = [prd] in
11036 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11037 sched.ZMM, v64i8_info>, EVEX_V512;
11038 let Predicates = [prd, HasVLX] in {
11039 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11040 sched.YMM, v32i8x_info>, EVEX_V256;
11041 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11042 sched.XMM, v16i8x_info>, EVEX_V128;
11045 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11046 SchedWriteShuffle, HasBWI>,
11047 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11048 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11049 SchedWriteShuffle, HasBWI>,
11050 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11052 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11053 string OpcodeStr, X86FoldableSchedWrite sched,
11054 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11055 def rr : AVX512BI<opc, MRMSrcReg,
11056 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11057 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11058 [(set _dst.RC:$dst,(_dst.VT
11059 (OpNode (_src.VT _src.RC:$src1),
11060 (_src.VT _src.RC:$src2))))]>,
11062 def rm : AVX512BI<opc, MRMSrcMem,
11063 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11064 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11065 [(set _dst.RC:$dst,(_dst.VT
11066 (OpNode (_src.VT _src.RC:$src1),
11067 (_src.VT (bitconvert
11068 (_src.LdFrag addr:$src2))))))]>,
11069 Sched<[sched.Folded, sched.ReadAfterFold]>;
11072 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11073 string OpcodeStr, X86SchedWriteWidths sched,
11075 let Predicates = [prd] in
11076 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11077 v8i64_info, v64i8_info>, EVEX_V512;
11078 let Predicates = [prd, HasVLX] in {
11079 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11080 v4i64x_info, v32i8x_info>, EVEX_V256;
11081 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11082 v2i64x_info, v16i8x_info>, EVEX_V128;
11086 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11087 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11089 // Transforms to swizzle an immediate to enable better matching when
11090 // memory operand isn't in the right place.
11091 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
11092 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11093 uint8_t Imm = N->getZExtValue();
11094 // Swap bits 1/4 and 3/6.
11095 uint8_t NewImm = Imm & 0xa5;
11096 if (Imm & 0x02) NewImm |= 0x10;
11097 if (Imm & 0x10) NewImm |= 0x02;
11098 if (Imm & 0x08) NewImm |= 0x40;
11099 if (Imm & 0x40) NewImm |= 0x08;
11100 return getI8Imm(NewImm, SDLoc(N));
11102 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
11103 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11104 uint8_t Imm = N->getZExtValue();
11105 // Swap bits 2/4 and 3/5.
11106 uint8_t NewImm = Imm & 0xc3;
11107 if (Imm & 0x04) NewImm |= 0x10;
11108 if (Imm & 0x10) NewImm |= 0x04;
11109 if (Imm & 0x08) NewImm |= 0x20;
11110 if (Imm & 0x20) NewImm |= 0x08;
11111 return getI8Imm(NewImm, SDLoc(N));
11113 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11114 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11115 uint8_t Imm = N->getZExtValue();
11116 // Swap bits 1/2 and 5/6.
11117 uint8_t NewImm = Imm & 0x99;
11118 if (Imm & 0x02) NewImm |= 0x04;
11119 if (Imm & 0x04) NewImm |= 0x02;
11120 if (Imm & 0x20) NewImm |= 0x40;
11121 if (Imm & 0x40) NewImm |= 0x20;
11122 return getI8Imm(NewImm, SDLoc(N));
11124 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11125 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11126 uint8_t Imm = N->getZExtValue();
11127 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11128 uint8_t NewImm = Imm & 0x81;
11129 if (Imm & 0x02) NewImm |= 0x04;
11130 if (Imm & 0x04) NewImm |= 0x10;
11131 if (Imm & 0x08) NewImm |= 0x40;
11132 if (Imm & 0x10) NewImm |= 0x02;
11133 if (Imm & 0x20) NewImm |= 0x08;
11134 if (Imm & 0x40) NewImm |= 0x20;
11135 return getI8Imm(NewImm, SDLoc(N));
11137 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11138 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11139 uint8_t Imm = N->getZExtValue();
11140 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11141 uint8_t NewImm = Imm & 0x81;
11142 if (Imm & 0x02) NewImm |= 0x10;
11143 if (Imm & 0x04) NewImm |= 0x02;
11144 if (Imm & 0x08) NewImm |= 0x20;
11145 if (Imm & 0x10) NewImm |= 0x04;
11146 if (Imm & 0x20) NewImm |= 0x40;
11147 if (Imm & 0x40) NewImm |= 0x08;
11148 return getI8Imm(NewImm, SDLoc(N));
11151 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11152 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11154 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11155 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11156 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11157 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11158 (OpNode (_.VT _.RC:$src1),
11161 (i8 imm:$src4)), 1, 1>,
11162 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11163 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11164 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11165 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11166 (OpNode (_.VT _.RC:$src1),
11168 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11169 (i8 imm:$src4)), 1, 0>,
11170 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11171 Sched<[sched.Folded, sched.ReadAfterFold]>;
11172 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11173 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11174 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11175 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11176 (OpNode (_.VT _.RC:$src1),
11178 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11179 (i8 imm:$src4)), 1, 0>, EVEX_B,
11180 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11181 Sched<[sched.Folded, sched.ReadAfterFold]>;
11182 }// Constraints = "$src1 = $dst"
11184 // Additional patterns for matching passthru operand in other positions.
11185 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11186 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11188 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11189 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11190 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11191 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11193 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11194 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11196 // Additional patterns for matching loads in other positions.
11197 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11198 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11199 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11200 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11201 def : Pat<(_.VT (OpNode _.RC:$src1,
11202 (bitconvert (_.LdFrag addr:$src3)),
11203 _.RC:$src2, (i8 imm:$src4))),
11204 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11205 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11207 // Additional patterns for matching zero masking with loads in other
11209 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11210 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11211 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11213 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11214 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11215 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11216 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11217 _.RC:$src2, (i8 imm:$src4)),
11219 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11220 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11222 // Additional patterns for matching masked loads with different
11224 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11225 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11226 _.RC:$src2, (i8 imm:$src4)),
11228 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11229 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11230 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11231 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11232 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11234 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11235 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11236 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11237 (OpNode _.RC:$src2, _.RC:$src1,
11238 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11240 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11241 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11242 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11243 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11244 _.RC:$src1, (i8 imm:$src4)),
11246 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11247 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11248 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11249 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11250 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11252 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11253 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11255 // Additional patterns for matching broadcasts in other positions.
11256 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11257 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11258 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11259 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11260 def : Pat<(_.VT (OpNode _.RC:$src1,
11261 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11262 _.RC:$src2, (i8 imm:$src4))),
11263 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11264 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11266 // Additional patterns for matching zero masking with broadcasts in other
11268 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11269 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11270 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11272 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11273 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11274 (VPTERNLOG321_imm8 imm:$src4))>;
11275 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11276 (OpNode _.RC:$src1,
11277 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11278 _.RC:$src2, (i8 imm:$src4)),
11280 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11281 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11282 (VPTERNLOG132_imm8 imm:$src4))>;
11284 // Additional patterns for matching masked broadcasts with different
11286 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11287 (OpNode _.RC:$src1,
11288 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11289 _.RC:$src2, (i8 imm:$src4)),
11291 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11292 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11293 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11294 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11295 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11297 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11298 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11299 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11300 (OpNode _.RC:$src2, _.RC:$src1,
11301 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11302 (i8 imm:$src4)), _.RC:$src1)),
11303 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11304 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11305 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11306 (OpNode _.RC:$src2,
11307 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11308 _.RC:$src1, (i8 imm:$src4)),
11310 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11311 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11312 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11313 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11314 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11316 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11317 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11320 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11321 AVX512VLVectorVTInfo _> {
11322 let Predicates = [HasAVX512] in
11323 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11324 _.info512, NAME>, EVEX_V512;
11325 let Predicates = [HasAVX512, HasVLX] in {
11326 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11327 _.info128, NAME>, EVEX_V128;
11328 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11329 _.info256, NAME>, EVEX_V256;
11333 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11334 avx512vl_i32_info>;
11335 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11336 avx512vl_i64_info>, VEX_W;
11338 // Patterns to implement vnot using vpternlog instead of creating all ones
11339 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11340 // so that the result is only dependent on src0. But we use the same source
11341 // for all operands to prevent a false dependency.
11342 // TODO: We should maybe have a more generalized algorithm for folding to
11344 let Predicates = [HasAVX512] in {
11345 def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11346 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11347 def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11348 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11349 def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11350 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11351 def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11352 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11355 let Predicates = [HasAVX512, NoVLX] in {
11356 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11359 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11360 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11361 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11362 (i8 15)), sub_xmm)>;
11363 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11366 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11367 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11368 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11369 (i8 15)), sub_xmm)>;
11370 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11373 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11374 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11375 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11376 (i8 15)), sub_xmm)>;
11377 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11380 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11381 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11382 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11383 (i8 15)), sub_xmm)>;
11385 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11388 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11389 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11390 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11391 (i8 15)), sub_ymm)>;
11392 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11395 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11396 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11397 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11398 (i8 15)), sub_ymm)>;
11399 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11402 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11403 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11404 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11405 (i8 15)), sub_ymm)>;
11406 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11409 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11410 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11411 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11412 (i8 15)), sub_ymm)>;
11415 let Predicates = [HasVLX] in {
11416 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11417 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11418 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11419 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11420 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11421 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11422 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11423 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11425 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11426 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11427 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11428 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11429 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11430 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11431 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11432 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11435 //===----------------------------------------------------------------------===//
11436 // AVX-512 - FixupImm
11437 //===----------------------------------------------------------------------===//
11439 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11440 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11441 X86VectorVTInfo TblVT>{
11442 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11443 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11444 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11445 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11446 (X86VFixupimm (_.VT _.RC:$src1),
11448 (TblVT.VT _.RC:$src3),
11449 (i32 imm:$src4))>, Sched<[sched]>;
11450 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11451 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11452 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11453 (X86VFixupimm (_.VT _.RC:$src1),
11455 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11457 Sched<[sched.Folded, sched.ReadAfterFold]>;
11458 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11459 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11460 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11461 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11462 (X86VFixupimm (_.VT _.RC:$src1),
11464 (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
11466 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11467 } // Constraints = "$src1 = $dst"
11470 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11471 X86FoldableSchedWrite sched,
11472 X86VectorVTInfo _, X86VectorVTInfo TblVT>
11473 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11474 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11475 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11476 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11477 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11478 "$src2, $src3, {sae}, $src4",
11479 (X86VFixupimmSAE (_.VT _.RC:$src1),
11481 (TblVT.VT _.RC:$src3),
11483 EVEX_B, Sched<[sched]>;
11487 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11488 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11489 X86VectorVTInfo _src3VT> {
11490 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11491 ExeDomain = _.ExeDomain in {
11492 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11493 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11494 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11495 (X86VFixupimms (_.VT _.RC:$src1),
11497 (_src3VT.VT _src3VT.RC:$src3),
11498 (i32 imm:$src4))>, Sched<[sched]>;
11499 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11500 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11501 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11502 "$src2, $src3, {sae}, $src4",
11503 (X86VFixupimmSAEs (_.VT _.RC:$src1),
11505 (_src3VT.VT _src3VT.RC:$src3),
11507 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11508 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11509 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11510 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11511 (X86VFixupimms (_.VT _.RC:$src1),
11513 (_src3VT.VT (scalar_to_vector
11514 (_src3VT.ScalarLdFrag addr:$src3))),
11516 Sched<[sched.Folded, sched.ReadAfterFold]>;
11520 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11521 AVX512VLVectorVTInfo _Vec,
11522 AVX512VLVectorVTInfo _Tbl> {
11523 let Predicates = [HasAVX512] in
11524 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11525 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11526 EVEX_4V, EVEX_V512;
11527 let Predicates = [HasAVX512, HasVLX] in {
11528 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11529 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11530 EVEX_4V, EVEX_V128;
11531 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11532 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11533 EVEX_4V, EVEX_V256;
11537 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11538 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11539 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11540 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11541 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11542 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11543 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11544 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11545 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11546 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11548 // Patterns used to select SSE scalar fp arithmetic instructions from
11551 // (1) a scalar fp operation followed by a blend
11553 // The effect is that the backend no longer emits unnecessary vector
11554 // insert instructions immediately after SSE scalar fp instructions
11555 // like addss or mulss.
11557 // For example, given the following code:
11558 // __m128 foo(__m128 A, __m128 B) {
11563 // Previously we generated:
11564 // addss %xmm0, %xmm1
11565 // movss %xmm1, %xmm0
11567 // We now generate:
11568 // addss %xmm1, %xmm0
11570 // (2) a vector packed single/double fp operation followed by a vector insert
11572 // The effect is that the backend converts the packed fp instruction
11573 // followed by a vector insert into a single SSE scalar fp instruction.
11575 // For example, given the following code:
11576 // __m128 foo(__m128 A, __m128 B) {
11577 // __m128 C = A + B;
11578 // return (__m128) {c[0], a[1], a[2], a[3]};
11581 // Previously we generated:
11582 // addps %xmm0, %xmm1
11583 // movss %xmm1, %xmm0
11585 // We now generate:
11586 // addss %xmm1, %xmm0
11588 // TODO: Some canonicalization in lowering would simplify the number of
11589 // patterns we have to try to match.
11590 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11591 X86VectorVTInfo _, PatLeaf ZeroFP> {
11592 let Predicates = [HasAVX512] in {
11593 // extracted scalar math op with insert via movss
11594 def : Pat<(MoveNode
11595 (_.VT VR128X:$dst),
11596 (_.VT (scalar_to_vector
11597 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11599 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11600 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11601 def : Pat<(MoveNode
11602 (_.VT VR128X:$dst),
11603 (_.VT (scalar_to_vector
11604 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11605 (_.ScalarLdFrag addr:$src))))),
11606 (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11608 // extracted masked scalar math op with insert via movss
11609 def : Pat<(MoveNode (_.VT VR128X:$src1),
11611 (X86selects VK1WM:$mask,
11613 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11616 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11617 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11618 VK1WM:$mask, _.VT:$src1,
11619 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11620 def : Pat<(MoveNode (_.VT VR128X:$src1),
11622 (X86selects VK1WM:$mask,
11624 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11625 (_.ScalarLdFrag addr:$src2)),
11627 (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11628 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11629 VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11631 // extracted masked scalar math op with insert via movss
11632 def : Pat<(MoveNode (_.VT VR128X:$src1),
11634 (X86selects VK1WM:$mask,
11636 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11637 _.FRC:$src2), (_.EltVT ZeroFP)))),
11638 (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11639 VK1WM:$mask, _.VT:$src1,
11640 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11641 def : Pat<(MoveNode (_.VT VR128X:$src1),
11643 (X86selects VK1WM:$mask,
11645 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11646 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11647 (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11651 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11652 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11653 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11654 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11656 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11657 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11658 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11659 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11661 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11662 SDNode Move, X86VectorVTInfo _> {
11663 let Predicates = [HasAVX512] in {
11664 def : Pat<(_.VT (Move _.VT:$dst,
11665 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11666 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11670 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11671 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11673 //===----------------------------------------------------------------------===//
11674 // AES instructions
11675 //===----------------------------------------------------------------------===//
11677 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11678 let Predicates = [HasVLX, HasVAES] in {
11679 defm Z128 : AESI_binop_rm_int<Op, OpStr,
11680 !cast<Intrinsic>(IntPrefix),
11681 loadv2i64, 0, VR128X, i128mem>,
11682 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11683 defm Z256 : AESI_binop_rm_int<Op, OpStr,
11684 !cast<Intrinsic>(IntPrefix##"_256"),
11685 loadv4i64, 0, VR256X, i256mem>,
11686 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11688 let Predicates = [HasAVX512, HasVAES] in
11689 defm Z : AESI_binop_rm_int<Op, OpStr,
11690 !cast<Intrinsic>(IntPrefix##"_512"),
11691 loadv8i64, 0, VR512, i512mem>,
11692 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11695 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11696 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11697 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11698 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11700 //===----------------------------------------------------------------------===//
11701 // PCLMUL instructions - Carry less multiplication
11702 //===----------------------------------------------------------------------===//
11704 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11705 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11706 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11708 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11709 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11710 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11712 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11713 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11714 EVEX_CD8<64, CD8VF>, VEX_WIG;
11718 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11719 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11720 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11722 //===----------------------------------------------------------------------===//
11724 //===----------------------------------------------------------------------===//
11726 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11727 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11728 let Constraints = "$src1 = $dst",
11729 ExeDomain = VTI.ExeDomain in {
11730 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11731 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11732 "$src3, $src2", "$src2, $src3",
11733 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11734 AVX512FMA3Base, Sched<[sched]>;
11735 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11736 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11737 "$src3, $src2", "$src2, $src3",
11738 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11739 (VTI.VT (VTI.LdFrag addr:$src3))))>,
11741 Sched<[sched.Folded, sched.ReadAfterFold]>;
11745 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11746 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11747 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11748 let Constraints = "$src1 = $dst",
11749 ExeDomain = VTI.ExeDomain in
11750 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11751 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11752 "${src3}"##VTI.BroadcastStr##", $src2",
11753 "$src2, ${src3}"##VTI.BroadcastStr,
11754 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11755 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
11756 AVX512FMA3Base, EVEX_B,
11757 Sched<[sched.Folded, sched.ReadAfterFold]>;
11760 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11761 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11762 let Predicates = [HasVBMI2] in
11763 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11765 let Predicates = [HasVBMI2, HasVLX] in {
11766 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11768 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11773 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11774 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11775 let Predicates = [HasVBMI2] in
11776 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11778 let Predicates = [HasVBMI2, HasVLX] in {
11779 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11781 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11785 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11786 SDNode OpNode, X86SchedWriteWidths sched> {
11787 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11788 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11789 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11790 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11791 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11792 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11795 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11796 SDNode OpNode, X86SchedWriteWidths sched> {
11797 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11798 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11799 VEX_W, EVEX_CD8<16, CD8VF>;
11800 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11801 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11802 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11803 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11807 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11808 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11809 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11810 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11813 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11814 avx512vl_i8_info, HasVBMI2>, EVEX,
11816 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11817 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11820 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11821 avx512vl_i8_info, HasVBMI2>, EVEX;
11822 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11823 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11825 //===----------------------------------------------------------------------===//
11827 //===----------------------------------------------------------------------===//
11829 let Constraints = "$src1 = $dst" in
11830 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11831 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11832 bit IsCommutable> {
11833 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11834 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11835 "$src3, $src2", "$src2, $src3",
11836 (VTI.VT (OpNode VTI.RC:$src1,
11837 VTI.RC:$src2, VTI.RC:$src3)),
11838 IsCommutable, IsCommutable>,
11839 EVEX_4V, T8PD, Sched<[sched]>;
11840 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11841 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11842 "$src3, $src2", "$src2, $src3",
11843 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11844 (VTI.VT (VTI.LdFrag addr:$src3))))>,
11845 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11846 Sched<[sched.Folded, sched.ReadAfterFold]>;
11847 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11848 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11849 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11850 "$src2, ${src3}"##VTI.BroadcastStr,
11851 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11852 (VTI.VT (X86VBroadcast
11853 (VTI.ScalarLdFrag addr:$src3))))>,
11854 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11855 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
11858 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11859 X86SchedWriteWidths sched, bit IsCommutable> {
11860 let Predicates = [HasVNNI] in
11861 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
11862 IsCommutable>, EVEX_V512;
11863 let Predicates = [HasVNNI, HasVLX] in {
11864 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
11865 IsCommutable>, EVEX_V256;
11866 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
11867 IsCommutable>, EVEX_V128;
11871 // FIXME: Is there a better scheduler class for VPDP?
11872 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
11873 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
11874 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
11875 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
11877 def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
11878 (X86vpmaddwd node:$lhs, node:$rhs), [{
11879 return N->hasOneUse();
11882 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
11883 let Predicates = [HasVNNI] in {
11884 def : Pat<(v16i32 (add VR512:$src1,
11885 (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
11886 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
11887 def : Pat<(v16i32 (add VR512:$src1,
11888 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
11889 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
11891 let Predicates = [HasVNNI,HasVLX] in {
11892 def : Pat<(v8i32 (add VR256X:$src1,
11893 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
11894 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
11895 def : Pat<(v8i32 (add VR256X:$src1,
11896 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
11897 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
11898 def : Pat<(v4i32 (add VR128X:$src1,
11899 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
11900 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
11901 def : Pat<(v4i32 (add VR128X:$src1,
11902 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
11903 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
11906 //===----------------------------------------------------------------------===//
11908 //===----------------------------------------------------------------------===//
11910 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11911 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11912 avx512vl_i8_info, HasBITALG>;
11913 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11914 avx512vl_i16_info, HasBITALG>, VEX_W;
11916 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11917 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11919 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
11920 (X86Vpshufbitqmb node:$src1, node:$src2), [{
11921 return N->hasOneUse();
11924 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11925 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11926 (ins VTI.RC:$src1, VTI.RC:$src2),
11928 "$src2, $src1", "$src1, $src2",
11929 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11930 (VTI.VT VTI.RC:$src2)),
11931 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11932 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11934 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11935 (ins VTI.RC:$src1, VTI.MemOp:$src2),
11937 "$src2, $src1", "$src1, $src2",
11938 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11939 (VTI.VT (VTI.LdFrag addr:$src2))),
11940 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11941 (VTI.VT (VTI.LdFrag addr:$src2)))>,
11942 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11943 Sched<[sched.Folded, sched.ReadAfterFold]>;
11946 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11947 let Predicates = [HasBITALG] in
11948 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11949 let Predicates = [HasBITALG, HasVLX] in {
11950 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11951 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11955 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11956 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11958 //===----------------------------------------------------------------------===//
11960 //===----------------------------------------------------------------------===//
11962 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11963 X86SchedWriteWidths sched> {
11964 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11965 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11967 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11968 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11970 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11975 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11977 EVEX_CD8<8, CD8VF>, T8PD;
11979 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11980 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11981 X86VectorVTInfo BcstVTI>
11982 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
11983 let ExeDomain = VTI.ExeDomain in
11984 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11985 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
11986 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
11987 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
11988 (OpNode (VTI.VT VTI.RC:$src1),
11989 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
11990 (i8 imm:$src3))>, EVEX_B,
11991 Sched<[sched.Folded, sched.ReadAfterFold]>;
11994 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11995 X86SchedWriteWidths sched> {
11996 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11997 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
11998 v64i8_info, v8i64_info>, EVEX_V512;
11999 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12000 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12001 v32i8x_info, v4i64x_info>, EVEX_V256;
12002 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12003 v16i8x_info, v2i64x_info>, EVEX_V128;
12007 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12008 X86GF2P8affineinvqb, SchedWriteVecIMul>,
12009 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12010 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12011 X86GF2P8affineqb, SchedWriteVecIMul>,
12012 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12015 //===----------------------------------------------------------------------===//
12017 //===----------------------------------------------------------------------===//
12019 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12020 Constraints = "$src1 = $dst" in {
12021 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12022 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12023 "v4fmaddps", "$src3, $src2", "$src2, $src3",
12024 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12025 Sched<[SchedWriteFMA.ZMM.Folded]>;
12027 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12028 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12029 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12030 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12031 Sched<[SchedWriteFMA.ZMM.Folded]>;
12033 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12034 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12035 "v4fmaddss", "$src3, $src2", "$src2, $src3",
12036 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12037 Sched<[SchedWriteFMA.Scl.Folded]>;
12039 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12040 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12041 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12042 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12043 Sched<[SchedWriteFMA.Scl.Folded]>;
12046 //===----------------------------------------------------------------------===//
12048 //===----------------------------------------------------------------------===//
12050 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12051 Constraints = "$src1 = $dst" in {
12052 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12053 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12054 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12055 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12056 Sched<[SchedWriteFMA.ZMM.Folded]>;
12058 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12059 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12060 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12061 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12062 Sched<[SchedWriteFMA.ZMM.Folded]>;
12065 let hasSideEffects = 0 in {
12066 let mayStore = 1 in
12067 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12069 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12072 //===----------------------------------------------------------------------===//
12074 //===----------------------------------------------------------------------===//
12076 multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
12077 def rr : I<0x68, MRMSrcReg,
12078 (outs _.KRPC:$dst),
12079 (ins _.RC:$src1, _.RC:$src2),
12080 !strconcat("vp2intersect", _.Suffix,
12081 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12082 [(set _.KRPC:$dst, (X86vp2intersect
12083 _.RC:$src1, (_.VT _.RC:$src2)))]>,
12086 def rm : I<0x68, MRMSrcMem,
12087 (outs _.KRPC:$dst),
12088 (ins _.RC:$src1, _.MemOp:$src2),
12089 !strconcat("vp2intersect", _.Suffix,
12090 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12091 [(set _.KRPC:$dst, (X86vp2intersect
12092 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12093 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
12095 def rmb : I<0x68, MRMSrcMem,
12096 (outs _.KRPC:$dst),
12097 (ins _.RC:$src1, _.ScalarMemOp:$src2),
12098 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12099 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12100 [(set _.KRPC:$dst, (X86vp2intersect
12101 _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>,
12102 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
12105 multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
12106 let Predicates = [HasAVX512, HasVP2INTERSECT] in
12107 defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
12109 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12110 defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
12111 defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
12115 defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
12116 defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
12118 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12119 X86SchedWriteWidths sched,
12120 AVX512VLVectorVTInfo _SrcVTInfo,
12121 AVX512VLVectorVTInfo _DstVTInfo,
12122 SDNode OpNode, Predicate prd,
12123 bit IsCommutable = 0> {
12124 let Predicates = [prd] in
12125 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12126 _SrcVTInfo.info512, _DstVTInfo.info512,
12127 _SrcVTInfo.info512, IsCommutable>,
12128 EVEX_V512, EVEX_CD8<32, CD8VF>;
12129 let Predicates = [HasVLX, prd] in {
12130 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12131 _SrcVTInfo.info256, _DstVTInfo.info256,
12132 _SrcVTInfo.info256, IsCommutable>,
12133 EVEX_V256, EVEX_CD8<32, CD8VF>;
12134 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12135 _SrcVTInfo.info128, _DstVTInfo.info128,
12136 _SrcVTInfo.info128, IsCommutable>,
12137 EVEX_V128, EVEX_CD8<32, CD8VF>;
12141 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12142 SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
12143 avx512vl_f32_info, avx512vl_i16_info,
12144 X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12146 // Truncate Float to BFloat16
12147 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12148 X86SchedWriteWidths sched> {
12149 let Predicates = [HasBF16] in {
12150 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12151 X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12153 let Predicates = [HasBF16, HasVLX] in {
12154 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12155 null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12157 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12159 sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12161 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12162 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12164 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12165 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12166 f128mem:$src), 0, "intel">;
12167 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12168 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12170 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12171 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12172 f256mem:$src), 0, "intel">;
12176 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12177 SchedWriteCvtPD2PS>, T8XS,
12178 EVEX_CD8<32, CD8VF>;
12180 let Predicates = [HasBF16, HasVLX] in {
12181 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12182 // patterns have been disabled with null_frag.
12183 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12184 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12185 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12187 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12188 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12190 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12192 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12193 (VCVTNEPS2BF16Z128rm addr:$src)>;
12194 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12196 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12197 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12199 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12201 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12202 (X86VBroadcast (loadf32 addr:$src))))),
12203 (VCVTNEPS2BF16Z128rmb addr:$src)>;
12204 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
12205 (v8i16 VR128X:$src0), VK4WM:$mask),
12206 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12207 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
12208 v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12209 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12212 let Constraints = "$src1 = $dst" in {
12213 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12214 X86VectorVTInfo _, X86VectorVTInfo src_v> {
12215 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12216 (ins _.RC:$src2, _.RC:$src3),
12217 OpcodeStr, "$src3, $src2", "$src2, $src3",
12218 (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
12221 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12222 (ins _.RC:$src2, _.MemOp:$src3),
12223 OpcodeStr, "$src3, $src2", "$src2, $src3",
12224 (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12225 (src_v.VT (bitconvert
12226 (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
12228 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12229 (ins _.RC:$src2, _.ScalarMemOp:$src3),
12231 !strconcat("${src3}", _.BroadcastStr,", $src2"),
12232 !strconcat("$src2, ${src3}", _.BroadcastStr),
12233 (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12234 (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>,
12238 } // Constraints = "$src1 = $dst"
12240 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12241 AVX512VLVectorVTInfo _,
12242 AVX512VLVectorVTInfo src_v, Predicate prd> {
12243 let Predicates = [prd] in {
12244 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
12245 src_v.info512>, EVEX_V512;
12247 let Predicates = [HasVLX, prd] in {
12248 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
12249 src_v.info256>, EVEX_V256;
12250 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
12251 src_v.info128>, EVEX_V128;
12255 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
12256 avx512vl_f32_info, avx512vl_i32_info,
12257 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;