1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT). These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22 RegisterClass RC = rc;
23 ValueType EltVT = eltvt;
24 int NumElts = numelts;
26 // Corresponding mask register class.
27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29 // Corresponding mask register pair class.
30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31 !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33 // Corresponding write-mask register class.
34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39 // Suffix used in the instruction mnemonic.
40 string Suffix = suffix;
42 // VTName is a string name for vector VT. For vector types it will be
43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44 // It is a little bit complex for scalar types, where NumElts = 1.
45 // In this case we build v4f32 or v2f64
46 string VTName = "v" # !if (!eq (NumElts, 1),
47 !if (!eq (EltVT.Size, 32), 4,
48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
51 ValueType VT = !cast<ValueType>(VTName);
53 string EltTypeName = !cast<string>(EltVT);
54 // Size of the element type in bits, e.g. 32 for v16i32.
55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56 int EltSize = EltVT.Size;
58 // "i" for integer types and "f" for floating-point types
59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61 // Size of RC in bits, e.g. 512 for VR512.
64 // The corresponding memory operand, e.g. i512mem for VR512.
65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67 // FP scalar memory operand for intrinsics - ssmem/sdmem.
68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
78 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
79 !cast<ComplexPattern>("sse_load_f32"),
80 !if (!eq (EltTypeName, "f64"),
81 !cast<ComplexPattern>("sse_load_f64"),
84 // The string to specify embedded broadcast in assembly.
85 string BroadcastStr = "{1to" # NumElts # "}";
87 // 8-bit compressed displacement tuple/subvector format. This is only
88 // defined for NumElts <= 8.
89 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
90 !cast<CD8VForm>("CD8VT" # NumElts), ?);
92 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
93 !if (!eq (Size, 256), sub_ymm, ?));
95 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
96 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
99 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
101 dag ImmAllZerosV = (VT immAllZerosV);
103 string ZSuffix = !if (!eq (Size, 128), "Z128",
104 !if (!eq (Size, 256), "Z256", "Z"));
107 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
108 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
109 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
110 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
111 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
112 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
114 // "x" in v32i8x_info means RC = VR256X
115 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
116 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
117 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
118 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
119 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
120 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
122 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
123 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
124 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
125 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
126 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
127 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
129 // We map scalar types to the smallest (128-bit) vector type
130 // with the appropriate element type. This allows to use the same masking logic.
131 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
132 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
133 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
134 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
136 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
137 X86VectorVTInfo i128> {
138 X86VectorVTInfo info512 = i512;
139 X86VectorVTInfo info256 = i256;
140 X86VectorVTInfo info128 = i128;
143 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
156 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158 RegisterClass KRC = _krc;
159 RegisterClass KRCWM = _krcwm;
163 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
164 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
165 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
166 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
167 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
168 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
169 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171 // This multiclass generates the masking variants from the non-masking
172 // variant. It only provides the assembly pieces for the masking variants.
173 // It assumes custom ISel patterns for masking which can be provided as
174 // template arguments.
175 multiclass AVX512_maskable_custom<bits<8> O, Format F,
177 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
179 string AttSrcAsm, string IntelSrcAsm,
181 list<dag> MaskingPattern,
182 list<dag> ZeroMaskingPattern,
183 string MaskingConstraint = "",
184 bit IsCommutable = 0,
185 bit IsKCommutable = 0,
186 bit IsKZCommutable = IsCommutable> {
187 let isCommutable = IsCommutable in
188 def NAME: AVX512<O, F, Outs, Ins,
189 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
190 "$dst, "#IntelSrcAsm#"}",
193 // Prefer over VMOV*rrk Pat<>
194 let isCommutable = IsKCommutable in
195 def NAME#k: AVX512<O, F, Outs, MaskingIns,
196 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
197 "$dst {${mask}}, "#IntelSrcAsm#"}",
200 // In case of the 3src subclass this is overridden with a let.
201 string Constraints = MaskingConstraint;
204 // Zero mask does not add any restrictions to commute operands transformation.
205 // So, it is Ok to use IsCommutable instead of IsKCommutable.
206 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
207 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
208 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
209 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
215 // Common base class of AVX512_maskable and AVX512_maskable_3src.
216 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
218 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
220 string AttSrcAsm, string IntelSrcAsm,
221 dag RHS, dag MaskingRHS,
222 SDNode Select = vselect,
223 string MaskingConstraint = "",
224 bit IsCommutable = 0,
225 bit IsKCommutable = 0,
226 bit IsKZCommutable = IsCommutable> :
227 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
228 AttSrcAsm, IntelSrcAsm,
229 [(set _.RC:$dst, RHS)],
230 [(set _.RC:$dst, MaskingRHS)],
232 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
233 MaskingConstraint, IsCommutable,
234 IsKCommutable, IsKZCommutable>;
236 // This multiclass generates the unconditional/non-masking, the masking and
237 // the zero-masking variant of the vector instruction. In the masking case, the
238 // perserved vector elements come from a new dummy input operand tied to $dst.
239 // This version uses a separate dag for non-masking and masking.
240 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
241 dag Outs, dag Ins, string OpcodeStr,
242 string AttSrcAsm, string IntelSrcAsm,
243 dag RHS, dag MaskRHS,
244 bit IsCommutable = 0, bit IsKCommutable = 0,
245 SDNode Select = vselect> :
246 AVX512_maskable_custom<O, F, Outs, Ins,
247 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
248 !con((ins _.KRCWM:$mask), Ins),
249 OpcodeStr, AttSrcAsm, IntelSrcAsm,
250 [(set _.RC:$dst, RHS)],
252 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
254 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
255 "$src0 = $dst", IsCommutable, IsKCommutable>;
257 // This multiclass generates the unconditional/non-masking, the masking and
258 // the zero-masking variant of the vector instruction. In the masking case, the
259 // perserved vector elements come from a new dummy input operand tied to $dst.
260 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
261 dag Outs, dag Ins, string OpcodeStr,
262 string AttSrcAsm, string IntelSrcAsm,
264 bit IsCommutable = 0, bit IsKCommutable = 0,
265 bit IsKZCommutable = IsCommutable,
266 SDNode Select = vselect> :
267 AVX512_maskable_common<O, F, _, Outs, Ins,
268 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
269 !con((ins _.KRCWM:$mask), Ins),
270 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
271 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
272 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
275 // This multiclass generates the unconditional/non-masking, the masking and
276 // the zero-masking variant of the scalar instruction.
277 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
278 dag Outs, dag Ins, string OpcodeStr,
279 string AttSrcAsm, string IntelSrcAsm,
281 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
282 RHS, 0, 0, 0, X86selects>;
284 // Similar to AVX512_maskable but in this case one of the source operands
285 // ($src1) is already tied to $dst so we just use that for the preserved
286 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
288 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
289 dag Outs, dag NonTiedIns, string OpcodeStr,
290 string AttSrcAsm, string IntelSrcAsm,
292 bit IsCommutable = 0,
293 bit IsKCommutable = 0,
294 SDNode Select = vselect,
296 AVX512_maskable_common<O, F, _, Outs,
297 !con((ins _.RC:$src1), NonTiedIns),
298 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
299 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300 OpcodeStr, AttSrcAsm, IntelSrcAsm,
301 !if(MaskOnly, (null_frag), RHS),
302 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
303 Select, "", IsCommutable, IsKCommutable>;
305 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
306 // operand differs from the output VT. This requires a bitconvert on
307 // the preserved vector going into the vselect.
308 // NOTE: The unmasked pattern is disabled.
309 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
310 X86VectorVTInfo InVT,
311 dag Outs, dag NonTiedIns, string OpcodeStr,
312 string AttSrcAsm, string IntelSrcAsm,
313 dag RHS, bit IsCommutable = 0> :
314 AVX512_maskable_common<O, F, OutVT, Outs,
315 !con((ins InVT.RC:$src1), NonTiedIns),
316 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
317 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
319 (vselect InVT.KRCWM:$mask, RHS,
320 (bitconvert InVT.RC:$src1)),
321 vselect, "", IsCommutable>;
323 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
324 dag Outs, dag NonTiedIns, string OpcodeStr,
325 string AttSrcAsm, string IntelSrcAsm,
327 bit IsCommutable = 0,
328 bit IsKCommutable = 0,
330 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
331 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
332 X86selects, MaskOnly>;
334 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
337 string AttSrcAsm, string IntelSrcAsm,
339 AVX512_maskable_custom<O, F, Outs, Ins,
340 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
341 !con((ins _.KRCWM:$mask), Ins),
342 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
345 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
346 dag Outs, dag NonTiedIns,
348 string AttSrcAsm, string IntelSrcAsm,
350 AVX512_maskable_custom<O, F, Outs,
351 !con((ins _.RC:$src1), NonTiedIns),
352 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
353 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
357 // Instruction with mask that puts result in mask register,
358 // like "compare" and "vptest"
359 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
361 dag Ins, dag MaskingIns,
363 string AttSrcAsm, string IntelSrcAsm,
365 list<dag> MaskingPattern,
366 bit IsCommutable = 0> {
367 let isCommutable = IsCommutable in {
368 def NAME: AVX512<O, F, Outs, Ins,
369 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
370 "$dst, "#IntelSrcAsm#"}",
373 def NAME#k: AVX512<O, F, Outs, MaskingIns,
374 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
375 "$dst {${mask}}, "#IntelSrcAsm#"}",
376 MaskingPattern>, EVEX_K;
380 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
382 dag Ins, dag MaskingIns,
384 string AttSrcAsm, string IntelSrcAsm,
385 dag RHS, dag MaskingRHS,
386 bit IsCommutable = 0> :
387 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
388 AttSrcAsm, IntelSrcAsm,
389 [(set _.KRC:$dst, RHS)],
390 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
392 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
393 dag Outs, dag Ins, string OpcodeStr,
394 string AttSrcAsm, string IntelSrcAsm,
395 dag RHS, dag RHS_su, bit IsCommutable = 0> :
396 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
397 !con((ins _.KRCWM:$mask), Ins),
398 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
399 (and _.KRCWM:$mask, RHS_su), IsCommutable>;
402 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
403 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
404 // swizzled by ExecutionDomainFix to pxor.
405 // We set canFoldAsLoad because this can be converted to a constant-pool
406 // load of an all-zeros value if folding it would be beneficial.
407 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
408 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
409 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
410 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
411 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
412 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
415 // Alias instructions that allow VPTERNLOG to be used with a mask to create
416 // a mix of all ones and all zeros elements. This is done this way to force
417 // the same register to be used as input for all three sources.
418 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
419 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
420 (ins VK16WM:$mask), "",
421 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
422 (v16i32 immAllOnesV),
423 (v16i32 immAllZerosV)))]>;
424 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
425 (ins VK8WM:$mask), "",
426 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
428 (v8i64 immAllZerosV)))]>;
431 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
432 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
433 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
434 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
435 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
436 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
439 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
440 // This is expanded by ExpandPostRAPseudos.
441 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
442 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
443 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
444 [(set FR32X:$dst, fp32imm0)]>;
445 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
446 [(set FR64X:$dst, fpimm0)]>;
449 //===----------------------------------------------------------------------===//
450 // AVX-512 - VECTOR INSERT
453 // Supports two different pattern operators for mask and unmasked ops. Allows
454 // null_frag to be passed for one.
455 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
457 SDPatternOperator vinsert_insert,
458 SDPatternOperator vinsert_for_mask,
459 X86FoldableSchedWrite sched> {
460 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
461 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
462 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
463 "vinsert" # From.EltTypeName # "x" # From.NumElts,
464 "$src3, $src2, $src1", "$src1, $src2, $src3",
465 (vinsert_insert:$src3 (To.VT To.RC:$src1),
466 (From.VT From.RC:$src2),
468 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
469 (From.VT From.RC:$src2),
471 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
473 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
474 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
475 "vinsert" # From.EltTypeName # "x" # From.NumElts,
476 "$src3, $src2, $src1", "$src1, $src2, $src3",
477 (vinsert_insert:$src3 (To.VT To.RC:$src1),
478 (From.VT (From.LdFrag addr:$src2)),
480 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
481 (From.VT (From.LdFrag addr:$src2)),
482 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
483 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
484 Sched<[sched.Folded, sched.ReadAfterFold]>;
488 // Passes the same pattern operator for masked and unmasked ops.
489 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
491 SDPatternOperator vinsert_insert,
492 X86FoldableSchedWrite sched> :
493 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
495 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
496 X86VectorVTInfo To, PatFrag vinsert_insert,
497 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
498 let Predicates = p in {
499 def : Pat<(vinsert_insert:$ins
500 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
501 (To.VT (!cast<Instruction>(InstrStr#"rr")
502 To.RC:$src1, From.RC:$src2,
503 (INSERT_get_vinsert_imm To.RC:$ins)))>;
505 def : Pat<(vinsert_insert:$ins
507 (From.VT (From.LdFrag addr:$src2)),
509 (To.VT (!cast<Instruction>(InstrStr#"rm")
510 To.RC:$src1, addr:$src2,
511 (INSERT_get_vinsert_imm To.RC:$ins)))>;
515 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
516 ValueType EltVT64, int Opcode256,
517 X86FoldableSchedWrite sched> {
519 let Predicates = [HasVLX] in
520 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
521 X86VectorVTInfo< 4, EltVT32, VR128X>,
522 X86VectorVTInfo< 8, EltVT32, VR256X>,
523 vinsert128_insert, sched>, EVEX_V256;
525 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
526 X86VectorVTInfo< 4, EltVT32, VR128X>,
527 X86VectorVTInfo<16, EltVT32, VR512>,
528 vinsert128_insert, sched>, EVEX_V512;
530 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
531 X86VectorVTInfo< 4, EltVT64, VR256X>,
532 X86VectorVTInfo< 8, EltVT64, VR512>,
533 vinsert256_insert, sched>, VEX_W, EVEX_V512;
535 // Even with DQI we'd like to only use these instructions for masking.
536 let Predicates = [HasVLX, HasDQI] in
537 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
538 X86VectorVTInfo< 2, EltVT64, VR128X>,
539 X86VectorVTInfo< 4, EltVT64, VR256X>,
540 null_frag, vinsert128_insert, sched>,
543 // Even with DQI we'd like to only use these instructions for masking.
544 let Predicates = [HasDQI] in {
545 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
546 X86VectorVTInfo< 2, EltVT64, VR128X>,
547 X86VectorVTInfo< 8, EltVT64, VR512>,
548 null_frag, vinsert128_insert, sched>,
551 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
552 X86VectorVTInfo< 8, EltVT32, VR256X>,
553 X86VectorVTInfo<16, EltVT32, VR512>,
554 null_frag, vinsert256_insert, sched>,
559 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
560 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
561 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
563 // Codegen pattern with the alternative types,
564 // Even with AVX512DQ we'll still use these for unmasked operations.
565 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
566 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
567 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
568 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
570 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
571 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
572 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
573 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
575 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
576 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
577 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
578 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
580 // Codegen pattern with the alternative types insert VEC128 into VEC256
581 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
582 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
583 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
584 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
585 // Codegen pattern with the alternative types insert VEC128 into VEC512
586 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
587 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
588 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
589 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
590 // Codegen pattern with the alternative types insert VEC256 into VEC512
591 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
592 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
593 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
594 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
597 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
598 X86VectorVTInfo To, X86VectorVTInfo Cast,
599 PatFrag vinsert_insert,
600 SDNodeXForm INSERT_get_vinsert_imm,
602 let Predicates = p in {
604 (vselect Cast.KRCWM:$mask,
606 (vinsert_insert:$ins (To.VT To.RC:$src1),
607 (From.VT From.RC:$src2),
610 (!cast<Instruction>(InstrStr#"rrk")
611 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
612 (INSERT_get_vinsert_imm To.RC:$ins))>;
614 (vselect Cast.KRCWM:$mask,
616 (vinsert_insert:$ins (To.VT To.RC:$src1),
619 (From.LdFrag addr:$src2))),
622 (!cast<Instruction>(InstrStr#"rmk")
623 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
624 (INSERT_get_vinsert_imm To.RC:$ins))>;
627 (vselect Cast.KRCWM:$mask,
629 (vinsert_insert:$ins (To.VT To.RC:$src1),
630 (From.VT From.RC:$src2),
633 (!cast<Instruction>(InstrStr#"rrkz")
634 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
635 (INSERT_get_vinsert_imm To.RC:$ins))>;
637 (vselect Cast.KRCWM:$mask,
639 (vinsert_insert:$ins (To.VT To.RC:$src1),
640 (From.VT (From.LdFrag addr:$src2)),
643 (!cast<Instruction>(InstrStr#"rmkz")
644 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
645 (INSERT_get_vinsert_imm To.RC:$ins))>;
649 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
650 v8f32x_info, vinsert128_insert,
651 INSERT_get_vinsert128_imm, [HasVLX]>;
652 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
653 v4f64x_info, vinsert128_insert,
654 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
656 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
657 v8i32x_info, vinsert128_insert,
658 INSERT_get_vinsert128_imm, [HasVLX]>;
659 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
660 v8i32x_info, vinsert128_insert,
661 INSERT_get_vinsert128_imm, [HasVLX]>;
662 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
663 v8i32x_info, vinsert128_insert,
664 INSERT_get_vinsert128_imm, [HasVLX]>;
665 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
666 v4i64x_info, vinsert128_insert,
667 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
668 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
669 v4i64x_info, vinsert128_insert,
670 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
671 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
672 v4i64x_info, vinsert128_insert,
673 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
675 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
676 v16f32_info, vinsert128_insert,
677 INSERT_get_vinsert128_imm, [HasAVX512]>;
678 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
679 v8f64_info, vinsert128_insert,
680 INSERT_get_vinsert128_imm, [HasDQI]>;
682 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
683 v16i32_info, vinsert128_insert,
684 INSERT_get_vinsert128_imm, [HasAVX512]>;
685 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
686 v16i32_info, vinsert128_insert,
687 INSERT_get_vinsert128_imm, [HasAVX512]>;
688 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
689 v16i32_info, vinsert128_insert,
690 INSERT_get_vinsert128_imm, [HasAVX512]>;
691 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
692 v8i64_info, vinsert128_insert,
693 INSERT_get_vinsert128_imm, [HasDQI]>;
694 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
695 v8i64_info, vinsert128_insert,
696 INSERT_get_vinsert128_imm, [HasDQI]>;
697 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
698 v8i64_info, vinsert128_insert,
699 INSERT_get_vinsert128_imm, [HasDQI]>;
701 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
702 v16f32_info, vinsert256_insert,
703 INSERT_get_vinsert256_imm, [HasDQI]>;
704 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
705 v8f64_info, vinsert256_insert,
706 INSERT_get_vinsert256_imm, [HasAVX512]>;
708 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
709 v16i32_info, vinsert256_insert,
710 INSERT_get_vinsert256_imm, [HasDQI]>;
711 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
712 v16i32_info, vinsert256_insert,
713 INSERT_get_vinsert256_imm, [HasDQI]>;
714 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
715 v16i32_info, vinsert256_insert,
716 INSERT_get_vinsert256_imm, [HasDQI]>;
717 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
718 v8i64_info, vinsert256_insert,
719 INSERT_get_vinsert256_imm, [HasAVX512]>;
720 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
721 v8i64_info, vinsert256_insert,
722 INSERT_get_vinsert256_imm, [HasAVX512]>;
723 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
724 v8i64_info, vinsert256_insert,
725 INSERT_get_vinsert256_imm, [HasAVX512]>;
727 // vinsertps - insert f32 to XMM
728 let ExeDomain = SSEPackedSingle in {
729 let isCommutable = 1 in
730 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
731 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
732 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
733 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
734 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
735 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
736 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
737 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
738 [(set VR128X:$dst, (X86insertps VR128X:$src1,
739 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
741 EVEX_4V, EVEX_CD8<32, CD8VT1>,
742 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
745 //===----------------------------------------------------------------------===//
746 // AVX-512 VECTOR EXTRACT
749 // Supports two different pattern operators for mask and unmasked ops. Allows
750 // null_frag to be passed for one.
751 multiclass vextract_for_size_split<int Opcode,
752 X86VectorVTInfo From, X86VectorVTInfo To,
753 SDPatternOperator vextract_extract,
754 SDPatternOperator vextract_for_mask,
755 SchedWrite SchedRR, SchedWrite SchedMR> {
757 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
758 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
759 (ins From.RC:$src1, u8imm:$idx),
760 "vextract" # To.EltTypeName # "x" # To.NumElts,
761 "$idx, $src1", "$src1, $idx",
762 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
763 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
764 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
766 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
767 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
768 "vextract" # To.EltTypeName # "x" # To.NumElts #
769 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
770 [(store (To.VT (vextract_extract:$idx
771 (From.VT From.RC:$src1), (iPTR imm))),
775 let mayStore = 1, hasSideEffects = 0 in
776 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
777 (ins To.MemOp:$dst, To.KRCWM:$mask,
778 From.RC:$src1, u8imm:$idx),
779 "vextract" # To.EltTypeName # "x" # To.NumElts #
780 "\t{$idx, $src1, $dst {${mask}}|"
781 "$dst {${mask}}, $src1, $idx}", []>,
782 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
786 // Passes the same pattern operator for masked and unmasked ops.
787 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
789 SDPatternOperator vextract_extract,
790 SchedWrite SchedRR, SchedWrite SchedMR> :
791 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
793 // Codegen pattern for the alternative types
794 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
795 X86VectorVTInfo To, PatFrag vextract_extract,
796 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
797 let Predicates = p in {
798 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
799 (To.VT (!cast<Instruction>(InstrStr#"rr")
801 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
802 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
803 (iPTR imm))), addr:$dst),
804 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
805 (EXTRACT_get_vextract_imm To.RC:$ext))>;
809 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
810 ValueType EltVT64, int Opcode256,
811 SchedWrite SchedRR, SchedWrite SchedMR> {
812 let Predicates = [HasAVX512] in {
813 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
814 X86VectorVTInfo<16, EltVT32, VR512>,
815 X86VectorVTInfo< 4, EltVT32, VR128X>,
816 vextract128_extract, SchedRR, SchedMR>,
817 EVEX_V512, EVEX_CD8<32, CD8VT4>;
818 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
819 X86VectorVTInfo< 8, EltVT64, VR512>,
820 X86VectorVTInfo< 4, EltVT64, VR256X>,
821 vextract256_extract, SchedRR, SchedMR>,
822 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
824 let Predicates = [HasVLX] in
825 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
826 X86VectorVTInfo< 8, EltVT32, VR256X>,
827 X86VectorVTInfo< 4, EltVT32, VR128X>,
828 vextract128_extract, SchedRR, SchedMR>,
829 EVEX_V256, EVEX_CD8<32, CD8VT4>;
831 // Even with DQI we'd like to only use these instructions for masking.
832 let Predicates = [HasVLX, HasDQI] in
833 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
834 X86VectorVTInfo< 4, EltVT64, VR256X>,
835 X86VectorVTInfo< 2, EltVT64, VR128X>,
836 null_frag, vextract128_extract, SchedRR, SchedMR>,
837 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
839 // Even with DQI we'd like to only use these instructions for masking.
840 let Predicates = [HasDQI] in {
841 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
842 X86VectorVTInfo< 8, EltVT64, VR512>,
843 X86VectorVTInfo< 2, EltVT64, VR128X>,
844 null_frag, vextract128_extract, SchedRR, SchedMR>,
845 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
846 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
847 X86VectorVTInfo<16, EltVT32, VR512>,
848 X86VectorVTInfo< 8, EltVT32, VR256X>,
849 null_frag, vextract256_extract, SchedRR, SchedMR>,
850 EVEX_V512, EVEX_CD8<32, CD8VT8>;
854 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
855 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
856 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
858 // extract_subvector codegen patterns with the alternative types.
859 // Even with AVX512DQ we'll still use these for unmasked operations.
860 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
861 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
862 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
863 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
865 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
866 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
867 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
868 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
870 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
871 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
872 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
873 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
875 // Codegen pattern with the alternative types extract VEC128 from VEC256
876 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
877 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
878 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
879 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
881 // Codegen pattern with the alternative types extract VEC128 from VEC512
882 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
883 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
884 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
885 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
886 // Codegen pattern with the alternative types extract VEC256 from VEC512
887 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
888 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
889 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
890 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
893 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
894 // smaller extract to enable EVEX->VEX.
895 let Predicates = [NoVLX] in {
896 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
897 (v2i64 (VEXTRACTI128rr
898 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
900 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
901 (v2f64 (VEXTRACTF128rr
902 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
904 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
905 (v4i32 (VEXTRACTI128rr
906 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
908 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
909 (v4f32 (VEXTRACTF128rr
910 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
912 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
913 (v8i16 (VEXTRACTI128rr
914 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
916 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
917 (v16i8 (VEXTRACTI128rr
918 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
922 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
923 // smaller extract to enable EVEX->VEX.
924 let Predicates = [HasVLX] in {
925 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
926 (v2i64 (VEXTRACTI32x4Z256rr
927 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
929 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
930 (v2f64 (VEXTRACTF32x4Z256rr
931 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
933 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
934 (v4i32 (VEXTRACTI32x4Z256rr
935 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
937 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
938 (v4f32 (VEXTRACTF32x4Z256rr
939 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
941 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
942 (v8i16 (VEXTRACTI32x4Z256rr
943 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
945 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
946 (v16i8 (VEXTRACTI32x4Z256rr
947 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
952 // Additional patterns for handling a bitcast between the vselect and the
953 // extract_subvector.
954 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
955 X86VectorVTInfo To, X86VectorVTInfo Cast,
956 PatFrag vextract_extract,
957 SDNodeXForm EXTRACT_get_vextract_imm,
959 let Predicates = p in {
960 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
962 (To.VT (vextract_extract:$ext
963 (From.VT From.RC:$src), (iPTR imm)))),
965 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
966 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
967 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
969 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
971 (To.VT (vextract_extract:$ext
972 (From.VT From.RC:$src), (iPTR imm)))),
974 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
975 Cast.KRCWM:$mask, From.RC:$src,
976 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
980 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
981 v4f32x_info, vextract128_extract,
982 EXTRACT_get_vextract128_imm, [HasVLX]>;
983 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
984 v2f64x_info, vextract128_extract,
985 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
987 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
988 v4i32x_info, vextract128_extract,
989 EXTRACT_get_vextract128_imm, [HasVLX]>;
990 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
991 v4i32x_info, vextract128_extract,
992 EXTRACT_get_vextract128_imm, [HasVLX]>;
993 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
994 v4i32x_info, vextract128_extract,
995 EXTRACT_get_vextract128_imm, [HasVLX]>;
996 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
997 v2i64x_info, vextract128_extract,
998 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
999 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1000 v2i64x_info, vextract128_extract,
1001 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1002 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1003 v2i64x_info, vextract128_extract,
1004 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1006 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1007 v4f32x_info, vextract128_extract,
1008 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1009 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1010 v2f64x_info, vextract128_extract,
1011 EXTRACT_get_vextract128_imm, [HasDQI]>;
1013 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1014 v4i32x_info, vextract128_extract,
1015 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1016 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1017 v4i32x_info, vextract128_extract,
1018 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1019 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1020 v4i32x_info, vextract128_extract,
1021 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1022 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1023 v2i64x_info, vextract128_extract,
1024 EXTRACT_get_vextract128_imm, [HasDQI]>;
1025 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1026 v2i64x_info, vextract128_extract,
1027 EXTRACT_get_vextract128_imm, [HasDQI]>;
1028 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1029 v2i64x_info, vextract128_extract,
1030 EXTRACT_get_vextract128_imm, [HasDQI]>;
1032 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1033 v8f32x_info, vextract256_extract,
1034 EXTRACT_get_vextract256_imm, [HasDQI]>;
1035 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1036 v4f64x_info, vextract256_extract,
1037 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1039 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1040 v8i32x_info, vextract256_extract,
1041 EXTRACT_get_vextract256_imm, [HasDQI]>;
1042 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1043 v8i32x_info, vextract256_extract,
1044 EXTRACT_get_vextract256_imm, [HasDQI]>;
1045 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1046 v8i32x_info, vextract256_extract,
1047 EXTRACT_get_vextract256_imm, [HasDQI]>;
1048 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1049 v4i64x_info, vextract256_extract,
1050 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1051 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1052 v4i64x_info, vextract256_extract,
1053 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1054 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1055 v4i64x_info, vextract256_extract,
1056 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1058 // vextractps - extract 32 bits from XMM
1059 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1060 (ins VR128X:$src1, u8imm:$src2),
1061 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1062 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1063 EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1065 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1066 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1067 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1068 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1070 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1072 //===---------------------------------------------------------------------===//
1073 // AVX-512 BROADCAST
1075 // broadcast with a scalar argument.
1076 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1078 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1079 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1080 (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1081 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1082 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1083 (X86VBroadcast SrcInfo.FRC:$src),
1084 DestInfo.RC:$src0)),
1085 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1086 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1087 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1088 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1089 (X86VBroadcast SrcInfo.FRC:$src),
1090 DestInfo.ImmAllZerosV)),
1091 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1092 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1095 // Split version to allow mask and broadcast node to be different types. This
1096 // helps support the 32x2 broadcasts.
1097 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1099 SchedWrite SchedRR, SchedWrite SchedRM,
1100 X86VectorVTInfo MaskInfo,
1101 X86VectorVTInfo DestInfo,
1102 X86VectorVTInfo SrcInfo,
1103 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1104 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1105 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1106 (outs MaskInfo.RC:$dst),
1107 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1111 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1115 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1116 T8PD, EVEX, Sched<[SchedRR]>;
1118 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1119 (outs MaskInfo.RC:$dst),
1120 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1123 (DestInfo.VT (UnmaskedOp
1124 (SrcInfo.ScalarLdFrag addr:$src))))),
1127 (DestInfo.VT (X86VBroadcast
1128 (SrcInfo.ScalarLdFrag addr:$src)))))>,
1129 T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1133 def : Pat<(MaskInfo.VT
1135 (DestInfo.VT (UnmaskedOp
1136 (SrcInfo.VT (scalar_to_vector
1137 (SrcInfo.ScalarLdFrag addr:$src))))))),
1138 (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
1139 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1143 (SrcInfo.VT (scalar_to_vector
1144 (SrcInfo.ScalarLdFrag addr:$src)))))),
1145 MaskInfo.RC:$src0)),
1146 (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
1147 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1148 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1152 (SrcInfo.VT (scalar_to_vector
1153 (SrcInfo.ScalarLdFrag addr:$src)))))),
1154 MaskInfo.ImmAllZerosV)),
1155 (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
1156 MaskInfo.KRCWM:$mask, addr:$src)>;
1159 // Helper class to force mask and broadcast result to same type.
1160 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1161 SchedWrite SchedRR, SchedWrite SchedRM,
1162 X86VectorVTInfo DestInfo,
1163 X86VectorVTInfo SrcInfo> :
1164 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1165 DestInfo, DestInfo, SrcInfo>;
1167 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1168 AVX512VLVectorVTInfo _> {
1169 let Predicates = [HasAVX512] in {
1170 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1171 WriteFShuffle256Ld, _.info512, _.info128>,
1172 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1177 let Predicates = [HasVLX] in {
1178 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1179 WriteFShuffle256Ld, _.info256, _.info128>,
1180 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1186 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1187 AVX512VLVectorVTInfo _> {
1188 let Predicates = [HasAVX512] in {
1189 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1190 WriteFShuffle256Ld, _.info512, _.info128>,
1191 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1196 let Predicates = [HasVLX] in {
1197 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1198 WriteFShuffle256Ld, _.info256, _.info128>,
1199 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1202 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1203 WriteFShuffle256Ld, _.info128, _.info128>,
1204 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1209 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1211 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1212 avx512vl_f64_info>, VEX_W1X;
1214 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1215 X86VectorVTInfo _, SDPatternOperator OpNode,
1216 RegisterClass SrcRC> {
1217 let ExeDomain = _.ExeDomain in
1218 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1220 "vpbroadcast"##_.Suffix, "$src", "$src",
1221 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1225 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1226 X86VectorVTInfo _, SDPatternOperator OpNode,
1227 RegisterClass SrcRC, SubRegIndex Subreg> {
1228 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1229 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1230 (outs _.RC:$dst), (ins GR32:$src),
1231 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1232 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1233 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1234 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1236 def : Pat <(_.VT (OpNode SrcRC:$src)),
1237 (!cast<Instruction>(Name#r)
1238 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1240 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1241 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1242 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1244 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1245 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1246 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1249 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1250 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1251 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1252 let Predicates = [prd] in
1253 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1254 OpNode, SrcRC, Subreg>, EVEX_V512;
1255 let Predicates = [prd, HasVLX] in {
1256 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1257 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1258 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1259 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1263 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1264 SDPatternOperator OpNode,
1265 RegisterClass SrcRC, Predicate prd> {
1266 let Predicates = [prd] in
1267 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1269 let Predicates = [prd, HasVLX] in {
1270 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1272 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1277 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1278 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1279 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1280 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1282 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1283 X86VBroadcast, GR32, HasAVX512>;
1284 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1285 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1287 // Provide aliases for broadcast from the same register class that
1288 // automatically does the extract.
1289 multiclass avx512_int_broadcast_rm_lowering<string Name,
1290 X86VectorVTInfo DestInfo,
1291 X86VectorVTInfo SrcInfo,
1292 X86VectorVTInfo ExtInfo> {
1293 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1294 (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
1295 (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>;
1298 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1299 AVX512VLVectorVTInfo _, Predicate prd> {
1300 let Predicates = [prd] in {
1301 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1302 WriteShuffle256Ld, _.info512, _.info128>,
1303 avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>,
1305 // Defined separately to avoid redefinition.
1306 defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>;
1308 let Predicates = [prd, HasVLX] in {
1309 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1310 WriteShuffle256Ld, _.info256, _.info128>,
1311 avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>,
1313 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1314 WriteShuffleXLd, _.info128, _.info128>,
1319 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1320 avx512vl_i8_info, HasBWI>;
1321 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1322 avx512vl_i16_info, HasBWI>;
1323 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1324 avx512vl_i32_info, HasAVX512>;
1325 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1326 avx512vl_i64_info, HasAVX512>, VEX_W1X;
1328 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1329 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1330 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1331 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1332 (_Dst.VT (X86SubVBroadcast
1333 (_Src.VT (_Src.LdFrag addr:$src))))>,
1334 Sched<[SchedWriteShuffle.YMM.Folded]>,
1338 // This should be used for the AVX512DQ broadcast instructions. It disables
1339 // the unmasked patterns so that we only use the DQ instructions when masking
1341 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1342 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1343 let hasSideEffects = 0, mayLoad = 1 in
1344 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1345 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1347 (_Dst.VT (X86SubVBroadcast
1348 (_Src.VT (_Src.LdFrag addr:$src))))>,
1349 Sched<[SchedWriteShuffle.YMM.Folded]>,
1353 let Predicates = [HasAVX512] in {
1354 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1355 def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1356 (VPBROADCASTQZm addr:$src)>;
1359 let Predicates = [HasVLX] in {
1360 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1361 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1362 (VPBROADCASTQZ128m addr:$src)>;
1363 def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1364 (VPBROADCASTQZ256m addr:$src)>;
1366 let Predicates = [HasVLX, HasBWI] in {
1367 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1368 // This means we'll encounter truncated i32 loads; match that here.
1369 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1370 (VPBROADCASTWZ128m addr:$src)>;
1371 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1372 (VPBROADCASTWZ256m addr:$src)>;
1373 def : Pat<(v8i16 (X86VBroadcast
1374 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1375 (VPBROADCASTWZ128m addr:$src)>;
1376 def : Pat<(v8i16 (X86VBroadcast
1377 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1378 (VPBROADCASTWZ128m addr:$src)>;
1379 def : Pat<(v16i16 (X86VBroadcast
1380 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1381 (VPBROADCASTWZ256m addr:$src)>;
1382 def : Pat<(v16i16 (X86VBroadcast
1383 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1384 (VPBROADCASTWZ256m addr:$src)>;
1386 let Predicates = [HasBWI] in {
1387 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1388 // This means we'll encounter truncated i32 loads; match that here.
1389 def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1390 (VPBROADCASTWZm addr:$src)>;
1391 def : Pat<(v32i16 (X86VBroadcast
1392 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1393 (VPBROADCASTWZm addr:$src)>;
1394 def : Pat<(v32i16 (X86VBroadcast
1395 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1396 (VPBROADCASTWZm addr:$src)>;
1399 //===----------------------------------------------------------------------===//
1400 // AVX-512 BROADCAST SUBVECTORS
1403 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1404 v16i32_info, v4i32x_info>,
1405 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1406 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1407 v16f32_info, v4f32x_info>,
1408 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1409 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1410 v8i64_info, v4i64x_info>, VEX_W,
1411 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1412 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1413 v8f64_info, v4f64x_info>, VEX_W,
1414 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1416 let Predicates = [HasAVX512] in {
1417 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1418 (VBROADCASTF64X4rm addr:$src)>;
1419 def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1420 (VBROADCASTI64X4rm addr:$src)>;
1421 def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1422 (VBROADCASTI64X4rm addr:$src)>;
1423 def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1424 (VBROADCASTI64X4rm addr:$src)>;
1426 // Provide fallback in case the load node that is used in the patterns above
1427 // is used by additional users, which prevents the pattern selection.
1428 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1429 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1430 (v4f64 VR256X:$src), 1)>;
1431 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1432 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1433 (v8f32 VR256X:$src), 1)>;
1434 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1435 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1436 (v4i64 VR256X:$src), 1)>;
1437 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1438 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1439 (v8i32 VR256X:$src), 1)>;
1440 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1441 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1442 (v16i16 VR256X:$src), 1)>;
1443 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1444 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1445 (v32i8 VR256X:$src), 1)>;
1447 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1448 (VBROADCASTF32X4rm addr:$src)>;
1449 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1450 (VBROADCASTI32X4rm addr:$src)>;
1451 def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1452 (VBROADCASTI32X4rm addr:$src)>;
1453 def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1454 (VBROADCASTI32X4rm addr:$src)>;
1456 // Patterns for selects of bitcasted operations.
1457 def : Pat<(vselect VK16WM:$mask,
1458 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1459 (v16f32 immAllZerosV)),
1460 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1461 def : Pat<(vselect VK16WM:$mask,
1462 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1464 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1465 def : Pat<(vselect VK16WM:$mask,
1466 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1467 (v16i32 immAllZerosV)),
1468 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1469 def : Pat<(vselect VK16WM:$mask,
1470 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1472 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1474 def : Pat<(vselect VK8WM:$mask,
1475 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1476 (v8f64 immAllZerosV)),
1477 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1478 def : Pat<(vselect VK8WM:$mask,
1479 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1481 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1482 def : Pat<(vselect VK8WM:$mask,
1483 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1484 (v8i64 immAllZerosV)),
1485 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1486 def : Pat<(vselect VK8WM:$mask,
1487 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1489 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1492 let Predicates = [HasVLX] in {
1493 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1494 v8i32x_info, v4i32x_info>,
1495 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1496 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1497 v8f32x_info, v4f32x_info>,
1498 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1500 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1501 (VBROADCASTF32X4Z256rm addr:$src)>;
1502 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1503 (VBROADCASTI32X4Z256rm addr:$src)>;
1504 def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1505 (VBROADCASTI32X4Z256rm addr:$src)>;
1506 def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1507 (VBROADCASTI32X4Z256rm addr:$src)>;
1509 // Patterns for selects of bitcasted operations.
1510 def : Pat<(vselect VK8WM:$mask,
1511 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1512 (v8f32 immAllZerosV)),
1513 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1514 def : Pat<(vselect VK8WM:$mask,
1515 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1517 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1518 def : Pat<(vselect VK8WM:$mask,
1519 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1520 (v8i32 immAllZerosV)),
1521 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1522 def : Pat<(vselect VK8WM:$mask,
1523 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1525 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1528 // Provide fallback in case the load node that is used in the patterns above
1529 // is used by additional users, which prevents the pattern selection.
1530 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1531 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1532 (v2f64 VR128X:$src), 1)>;
1533 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1534 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1535 (v4f32 VR128X:$src), 1)>;
1536 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1537 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1538 (v2i64 VR128X:$src), 1)>;
1539 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1540 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1541 (v4i32 VR128X:$src), 1)>;
1542 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1543 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1544 (v8i16 VR128X:$src), 1)>;
1545 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1546 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1547 (v16i8 VR128X:$src), 1)>;
1550 let Predicates = [HasVLX, HasDQI] in {
1551 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1552 v4i64x_info, v2i64x_info>, VEX_W1X,
1553 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1554 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1555 v4f64x_info, v2f64x_info>, VEX_W1X,
1556 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1558 // Patterns for selects of bitcasted operations.
1559 def : Pat<(vselect VK4WM:$mask,
1560 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1561 (v4f64 immAllZerosV)),
1562 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1563 def : Pat<(vselect VK4WM:$mask,
1564 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1566 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1567 def : Pat<(vselect VK4WM:$mask,
1568 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1569 (v4i64 immAllZerosV)),
1570 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1571 def : Pat<(vselect VK4WM:$mask,
1572 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1574 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1577 let Predicates = [HasDQI] in {
1578 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1579 v8i64_info, v2i64x_info>, VEX_W,
1580 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1581 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1582 v16i32_info, v8i32x_info>,
1583 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1584 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1585 v8f64_info, v2f64x_info>, VEX_W,
1586 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1587 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1588 v16f32_info, v8f32x_info>,
1589 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1591 // Patterns for selects of bitcasted operations.
1592 def : Pat<(vselect VK16WM:$mask,
1593 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1594 (v16f32 immAllZerosV)),
1595 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1596 def : Pat<(vselect VK16WM:$mask,
1597 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1599 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1600 def : Pat<(vselect VK16WM:$mask,
1601 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1602 (v16i32 immAllZerosV)),
1603 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1604 def : Pat<(vselect VK16WM:$mask,
1605 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1607 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1609 def : Pat<(vselect VK8WM:$mask,
1610 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1611 (v8f64 immAllZerosV)),
1612 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1613 def : Pat<(vselect VK8WM:$mask,
1614 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1616 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1617 def : Pat<(vselect VK8WM:$mask,
1618 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1619 (v8i64 immAllZerosV)),
1620 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1621 def : Pat<(vselect VK8WM:$mask,
1622 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1624 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1627 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1628 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1629 let Predicates = [HasDQI] in
1630 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1631 WriteShuffle256Ld, _Dst.info512,
1632 _Src.info512, _Src.info128, null_frag>,
1634 let Predicates = [HasDQI, HasVLX] in
1635 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1636 WriteShuffle256Ld, _Dst.info256,
1637 _Src.info256, _Src.info128, null_frag>,
1641 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1642 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1643 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1645 let Predicates = [HasDQI, HasVLX] in
1646 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1647 WriteShuffleXLd, _Dst.info128,
1648 _Src.info128, _Src.info128, null_frag>,
1652 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1653 avx512vl_i32_info, avx512vl_i64_info>;
1654 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1655 avx512vl_f32_info, avx512vl_f64_info>;
1657 let Predicates = [HasVLX] in {
1658 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1659 (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1660 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1661 (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1664 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1665 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>;
1666 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1667 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1669 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1670 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>;
1671 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1672 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1674 //===----------------------------------------------------------------------===//
1675 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1677 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1678 X86VectorVTInfo _, RegisterClass KRC> {
1679 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1680 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1681 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1682 EVEX, Sched<[WriteShuffle]>;
1685 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1686 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1687 let Predicates = [HasCDI] in
1688 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1689 let Predicates = [HasCDI, HasVLX] in {
1690 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1691 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1695 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1696 avx512vl_i32_info, VK16>;
1697 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1698 avx512vl_i64_info, VK8>, VEX_W;
1700 //===----------------------------------------------------------------------===//
1701 // -- VPERMI2 - 3 source operands form --
1702 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1703 X86FoldableSchedWrite sched,
1704 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1705 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1706 hasSideEffects = 0 in {
1707 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1708 (ins _.RC:$src2, _.RC:$src3),
1709 OpcodeStr, "$src3, $src2", "$src2, $src3",
1710 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1711 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1714 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1715 (ins _.RC:$src2, _.MemOp:$src3),
1716 OpcodeStr, "$src3, $src2", "$src2, $src3",
1717 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1718 (_.VT (_.LdFrag addr:$src3)))), 1>,
1719 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1723 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1724 X86FoldableSchedWrite sched,
1725 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1726 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1727 hasSideEffects = 0, mayLoad = 1 in
1728 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1729 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1730 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1731 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1732 (_.VT (X86VPermt2 _.RC:$src2,
1733 IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1734 AVX5128IBase, EVEX_4V, EVEX_B,
1735 Sched<[sched.Folded, sched.ReadAfterFold]>;
1738 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1739 X86FoldableSchedWrite sched,
1740 AVX512VLVectorVTInfo VTInfo,
1741 AVX512VLVectorVTInfo ShuffleMask> {
1742 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1743 ShuffleMask.info512>,
1744 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1745 ShuffleMask.info512>, EVEX_V512;
1746 let Predicates = [HasVLX] in {
1747 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1748 ShuffleMask.info128>,
1749 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1750 ShuffleMask.info128>, EVEX_V128;
1751 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1752 ShuffleMask.info256>,
1753 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1754 ShuffleMask.info256>, EVEX_V256;
1758 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1759 X86FoldableSchedWrite sched,
1760 AVX512VLVectorVTInfo VTInfo,
1761 AVX512VLVectorVTInfo Idx,
1763 let Predicates = [Prd] in
1764 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1765 Idx.info512>, EVEX_V512;
1766 let Predicates = [Prd, HasVLX] in {
1767 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1768 Idx.info128>, EVEX_V128;
1769 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1770 Idx.info256>, EVEX_V256;
1774 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1775 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1776 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1777 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1778 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1779 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1780 VEX_W, EVEX_CD8<16, CD8VF>;
1781 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1782 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1784 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1785 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1786 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1787 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1789 // Extra patterns to deal with extra bitcasts due to passthru and index being
1790 // different types on the fp versions.
1791 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1792 X86VectorVTInfo IdxVT,
1793 X86VectorVTInfo CastVT> {
1794 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1795 (X86VPermt2 (_.VT _.RC:$src2),
1796 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1797 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1798 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1799 _.RC:$src2, _.RC:$src3)>;
1800 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1801 (X86VPermt2 _.RC:$src2,
1802 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1803 (_.LdFrag addr:$src3)),
1804 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1805 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1806 _.RC:$src2, addr:$src3)>;
1807 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1808 (X86VPermt2 _.RC:$src2,
1809 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1810 (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1811 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1812 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1813 _.RC:$src2, addr:$src3)>;
1816 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1817 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1818 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1819 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1822 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1823 X86FoldableSchedWrite sched,
1824 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1825 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1826 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1827 (ins IdxVT.RC:$src2, _.RC:$src3),
1828 OpcodeStr, "$src3, $src2", "$src2, $src3",
1829 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1830 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1832 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1833 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1834 OpcodeStr, "$src3, $src2", "$src2, $src3",
1835 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1836 (_.LdFrag addr:$src3))), 1>,
1837 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1840 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1841 X86FoldableSchedWrite sched,
1842 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1843 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1844 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1845 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1846 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1847 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1848 (_.VT (X86VPermt2 _.RC:$src1,
1849 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1850 AVX5128IBase, EVEX_4V, EVEX_B,
1851 Sched<[sched.Folded, sched.ReadAfterFold]>;
1854 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1855 X86FoldableSchedWrite sched,
1856 AVX512VLVectorVTInfo VTInfo,
1857 AVX512VLVectorVTInfo ShuffleMask> {
1858 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1859 ShuffleMask.info512>,
1860 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1861 ShuffleMask.info512>, EVEX_V512;
1862 let Predicates = [HasVLX] in {
1863 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1864 ShuffleMask.info128>,
1865 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1866 ShuffleMask.info128>, EVEX_V128;
1867 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1868 ShuffleMask.info256>,
1869 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1870 ShuffleMask.info256>, EVEX_V256;
1874 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1875 X86FoldableSchedWrite sched,
1876 AVX512VLVectorVTInfo VTInfo,
1877 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1878 let Predicates = [Prd] in
1879 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1880 Idx.info512>, EVEX_V512;
1881 let Predicates = [Prd, HasVLX] in {
1882 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1883 Idx.info128>, EVEX_V128;
1884 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1885 Idx.info256>, EVEX_V256;
1889 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1890 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1891 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1892 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1893 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1894 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1895 VEX_W, EVEX_CD8<16, CD8VF>;
1896 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1897 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1899 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1900 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1901 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1902 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1904 //===----------------------------------------------------------------------===//
1905 // AVX-512 - BLEND using mask
1908 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1909 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1910 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1911 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1912 (ins _.RC:$src1, _.RC:$src2),
1913 !strconcat(OpcodeStr,
1914 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1915 EVEX_4V, Sched<[sched]>;
1916 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1917 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1918 !strconcat(OpcodeStr,
1919 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1920 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1921 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1922 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1923 !strconcat(OpcodeStr,
1924 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1925 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1926 let mayLoad = 1 in {
1927 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1928 (ins _.RC:$src1, _.MemOp:$src2),
1929 !strconcat(OpcodeStr,
1930 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1931 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1932 Sched<[sched.Folded, sched.ReadAfterFold]>;
1933 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1934 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1935 !strconcat(OpcodeStr,
1936 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1937 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1938 Sched<[sched.Folded, sched.ReadAfterFold]>;
1939 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1940 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1941 !strconcat(OpcodeStr,
1942 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1943 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1944 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1948 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1949 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1950 let mayLoad = 1, hasSideEffects = 0 in {
1951 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1952 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1953 !strconcat(OpcodeStr,
1954 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1955 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1956 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1957 Sched<[sched.Folded, sched.ReadAfterFold]>;
1959 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1960 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1961 !strconcat(OpcodeStr,
1962 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1963 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1964 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1965 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1967 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1968 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1969 !strconcat(OpcodeStr,
1970 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1971 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1972 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1973 Sched<[sched.Folded, sched.ReadAfterFold]>;
1977 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1978 AVX512VLVectorVTInfo VTInfo> {
1979 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1980 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1983 let Predicates = [HasVLX] in {
1984 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1985 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1987 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1988 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1993 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1994 AVX512VLVectorVTInfo VTInfo> {
1995 let Predicates = [HasBWI] in
1996 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1999 let Predicates = [HasBWI, HasVLX] in {
2000 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2002 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2007 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2009 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2010 avx512vl_f64_info>, VEX_W;
2011 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2013 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2014 avx512vl_i64_info>, VEX_W;
2015 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2017 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2018 avx512vl_i16_info>, VEX_W;
2020 //===----------------------------------------------------------------------===//
2021 // Compare Instructions
2022 //===----------------------------------------------------------------------===//
2024 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2026 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2027 PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2028 X86FoldableSchedWrite sched> {
2029 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2031 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2033 "$cc, $src2, $src1", "$src1, $src2, $cc",
2034 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2035 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2036 imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
2038 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2040 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2042 "$cc, $src2, $src1", "$src1, $src2, $cc",
2043 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2045 (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2046 imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2047 Sched<[sched.Folded, sched.ReadAfterFold]>;
2049 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2051 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2053 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2054 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2056 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2058 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2060 let isCodeGenOnly = 1 in {
2061 let isCommutable = 1 in
2062 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2063 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2064 !strconcat("vcmp", _.Suffix,
2065 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2066 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2069 EVEX_4V, VEX_LIG, Sched<[sched]>;
2070 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2072 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2073 !strconcat("vcmp", _.Suffix,
2074 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2075 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2076 (_.ScalarLdFrag addr:$src2),
2078 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2079 Sched<[sched.Folded, sched.ReadAfterFold]>;
2083 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2084 (X86cmpms node:$src1, node:$src2, node:$cc), [{
2085 return N->hasOneUse();
2087 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2088 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2089 return N->hasOneUse();
2092 let Predicates = [HasAVX512] in {
2093 let ExeDomain = SSEPackedSingle in
2094 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2095 X86cmpms_su, X86cmpmsSAE_su,
2096 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2097 let ExeDomain = SSEPackedDouble in
2098 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2099 X86cmpms_su, X86cmpmsSAE_su,
2100 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2103 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2104 PatFrag OpNode_su, X86FoldableSchedWrite sched,
2105 X86VectorVTInfo _, bit IsCommutable> {
2106 let isCommutable = IsCommutable in
2107 def rr : AVX512BI<opc, MRMSrcReg,
2108 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2109 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2110 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2111 EVEX_4V, Sched<[sched]>;
2112 def rm : AVX512BI<opc, MRMSrcMem,
2113 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2114 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2115 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2116 (_.VT (_.LdFrag addr:$src2))))]>,
2117 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2118 let isCommutable = IsCommutable in
2119 def rrk : AVX512BI<opc, MRMSrcReg,
2120 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2121 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2122 "$dst {${mask}}, $src1, $src2}"),
2123 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2124 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2125 EVEX_4V, EVEX_K, Sched<[sched]>;
2126 def rmk : AVX512BI<opc, MRMSrcMem,
2127 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2128 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2129 "$dst {${mask}}, $src1, $src2}"),
2130 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2131 (OpNode_su (_.VT _.RC:$src1),
2132 (_.VT (_.LdFrag addr:$src2)))))]>,
2133 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2136 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2138 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2140 avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> {
2141 def rmb : AVX512BI<opc, MRMSrcMem,
2142 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2143 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2144 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2145 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2146 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2147 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2148 def rmbk : AVX512BI<opc, MRMSrcMem,
2149 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2150 _.ScalarMemOp:$src2),
2151 !strconcat(OpcodeStr,
2152 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2153 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2154 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2155 (OpNode_su (_.VT _.RC:$src1),
2157 (_.ScalarLdFrag addr:$src2)))))]>,
2158 EVEX_4V, EVEX_K, EVEX_B,
2159 Sched<[sched.Folded, sched.ReadAfterFold]>;
2162 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2163 PatFrag OpNode_su, X86SchedWriteWidths sched,
2164 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2165 bit IsCommutable = 0> {
2166 let Predicates = [prd] in
2167 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
2168 VTInfo.info512, IsCommutable>, EVEX_V512;
2170 let Predicates = [prd, HasVLX] in {
2171 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
2172 VTInfo.info256, IsCommutable>, EVEX_V256;
2173 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
2174 VTInfo.info128, IsCommutable>, EVEX_V128;
2178 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2179 PatFrag OpNode, PatFrag OpNode_su,
2180 X86SchedWriteWidths sched,
2181 AVX512VLVectorVTInfo VTInfo,
2182 Predicate prd, bit IsCommutable = 0> {
2183 let Predicates = [prd] in
2184 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
2185 VTInfo.info512, IsCommutable>, EVEX_V512;
2187 let Predicates = [prd, HasVLX] in {
2188 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
2189 VTInfo.info256, IsCommutable>, EVEX_V256;
2190 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
2191 VTInfo.info128, IsCommutable>, EVEX_V128;
2195 // This fragment treats X86cmpm as commutable to help match loads in both
2196 // operands for PCMPEQ.
2197 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2198 def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2199 (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2200 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2201 (setcc node:$src1, node:$src2, SETGT)>;
2203 def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2),
2204 (X86pcmpeqm_c node:$src1, node:$src2), [{
2205 return N->hasOneUse();
2207 def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2),
2208 (X86pcmpgtm node:$src1, node:$src2), [{
2209 return N->hasOneUse();
2212 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2213 // increase the pattern complexity the way an immediate would.
2214 let AddedComplexity = 2 in {
2215 // FIXME: Is there a better scheduler class for VPCMP?
2216 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su,
2217 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2218 EVEX_CD8<8, CD8VF>, VEX_WIG;
2220 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su,
2221 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2222 EVEX_CD8<16, CD8VF>, VEX_WIG;
2224 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su,
2225 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2226 EVEX_CD8<32, CD8VF>;
2228 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su,
2229 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2230 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2232 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su,
2233 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2234 EVEX_CD8<8, CD8VF>, VEX_WIG;
2236 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su,
2237 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2238 EVEX_CD8<16, CD8VF>, VEX_WIG;
2240 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su,
2241 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2242 EVEX_CD8<32, CD8VF>;
2244 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su,
2245 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2246 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2249 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2250 PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2251 X86FoldableSchedWrite sched,
2252 X86VectorVTInfo _, string Name> {
2253 let isCommutable = 1 in
2254 def rri : AVX512AIi8<opc, MRMSrcReg,
2255 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2256 !strconcat("vpcmp", Suffix,
2257 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2258 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2261 EVEX_4V, Sched<[sched]>;
2262 def rmi : AVX512AIi8<opc, MRMSrcMem,
2263 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2264 !strconcat("vpcmp", Suffix,
2265 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2266 [(set _.KRC:$dst, (_.KVT
2269 (_.VT (_.LdFrag addr:$src2)),
2271 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2272 let isCommutable = 1 in
2273 def rrik : AVX512AIi8<opc, MRMSrcReg,
2274 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2276 !strconcat("vpcmp", Suffix,
2277 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2278 "$dst {${mask}}, $src1, $src2, $cc}"),
2279 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2280 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2283 EVEX_4V, EVEX_K, Sched<[sched]>;
2284 def rmik : AVX512AIi8<opc, MRMSrcMem,
2285 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2287 !strconcat("vpcmp", Suffix,
2288 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2289 "$dst {${mask}}, $src1, $src2, $cc}"),
2290 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2294 (_.VT (_.LdFrag addr:$src2)),
2296 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2298 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2299 (_.VT _.RC:$src1), cond)),
2300 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2301 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2303 def : Pat<(and _.KRCWM:$mask,
2304 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2305 (_.VT _.RC:$src1), cond))),
2306 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2307 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2308 (CommFrag.OperandTransform $cc))>;
2311 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2312 PatFrag Frag_su, PatFrag CommFrag,
2313 PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2314 X86VectorVTInfo _, string Name> :
2315 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2317 def rmib : AVX512AIi8<opc, MRMSrcMem,
2318 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2320 !strconcat("vpcmp", Suffix,
2321 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2322 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2323 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2326 (_.ScalarLdFrag addr:$src2)),
2328 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2329 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2330 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2331 _.ScalarMemOp:$src2, u8imm:$cc),
2332 !strconcat("vpcmp", Suffix,
2333 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2334 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2335 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2339 (_.ScalarLdFrag addr:$src2)),
2341 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2343 def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2344 (_.VT _.RC:$src1), cond)),
2345 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2346 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2348 def : Pat<(and _.KRCWM:$mask,
2349 (_.KVT (CommFrag_su:$cc (X86VBroadcast
2350 (_.ScalarLdFrag addr:$src2)),
2351 (_.VT _.RC:$src1), cond))),
2352 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2353 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2354 (CommFrag.OperandTransform $cc))>;
2357 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2358 PatFrag Frag_su, PatFrag CommFrag,
2359 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2360 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2361 let Predicates = [prd] in
2362 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2363 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2365 let Predicates = [prd, HasVLX] in {
2366 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2367 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2368 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2369 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2373 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2374 PatFrag Frag_su, PatFrag CommFrag,
2375 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2376 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2377 let Predicates = [prd] in
2378 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2379 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2381 let Predicates = [prd, HasVLX] in {
2382 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2383 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2384 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2385 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2389 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2390 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2391 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2392 return getI8Imm(SSECC, SDLoc(N));
2395 // Swapped operand version of the above.
2396 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2397 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2398 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2399 SSECC = X86::getSwappedVPCMPImm(SSECC);
2400 return getI8Imm(SSECC, SDLoc(N));
2403 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2404 (setcc node:$src1, node:$src2, node:$cc), [{
2405 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2406 return !ISD::isUnsignedIntSetCC(CC);
2409 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2410 (setcc node:$src1, node:$src2, node:$cc), [{
2411 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2412 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2415 // Same as above, but commutes immediate. Use for load folding.
2416 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2417 (setcc node:$src1, node:$src2, node:$cc), [{
2418 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2419 return !ISD::isUnsignedIntSetCC(CC);
2420 }], X86pcmpm_imm_commute>;
2422 def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2423 (setcc node:$src1, node:$src2, node:$cc), [{
2424 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2425 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2426 }], X86pcmpm_imm_commute>;
2428 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2429 (setcc node:$src1, node:$src2, node:$cc), [{
2430 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2431 return ISD::isUnsignedIntSetCC(CC);
2434 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2435 (setcc node:$src1, node:$src2, node:$cc), [{
2436 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2437 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2440 // Same as above, but commutes immediate. Use for load folding.
2441 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2442 (setcc node:$src1, node:$src2, node:$cc), [{
2443 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2444 return ISD::isUnsignedIntSetCC(CC);
2445 }], X86pcmpm_imm_commute>;
2447 def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2448 (setcc node:$src1, node:$src2, node:$cc), [{
2449 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2450 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2451 }], X86pcmpm_imm_commute>;
2453 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2454 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2455 X86pcmpm_commute, X86pcmpm_commute_su,
2456 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2458 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2459 X86pcmpum_commute, X86pcmpum_commute_su,
2460 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2463 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2464 X86pcmpm_commute, X86pcmpm_commute_su,
2465 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2466 VEX_W, EVEX_CD8<16, CD8VF>;
2467 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2468 X86pcmpum_commute, X86pcmpum_commute_su,
2469 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2470 VEX_W, EVEX_CD8<16, CD8VF>;
2472 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2473 X86pcmpm_commute, X86pcmpm_commute_su,
2474 SchedWriteVecALU, avx512vl_i32_info,
2475 HasAVX512>, EVEX_CD8<32, CD8VF>;
2476 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2477 X86pcmpum_commute, X86pcmpum_commute_su,
2478 SchedWriteVecALU, avx512vl_i32_info,
2479 HasAVX512>, EVEX_CD8<32, CD8VF>;
2481 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2482 X86pcmpm_commute, X86pcmpm_commute_su,
2483 SchedWriteVecALU, avx512vl_i64_info,
2484 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2485 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2486 X86pcmpum_commute, X86pcmpum_commute_su,
2487 SchedWriteVecALU, avx512vl_i64_info,
2488 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2490 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2491 (X86cmpm node:$src1, node:$src2, node:$cc), [{
2492 return N->hasOneUse();
2494 def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2495 (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2496 return N->hasOneUse();
2499 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2501 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2502 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2504 "$cc, $src2, $src1", "$src1, $src2, $cc",
2505 (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2506 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2509 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2510 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2512 "$cc, $src2, $src1", "$src1, $src2, $cc",
2513 (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2515 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2517 Sched<[sched.Folded, sched.ReadAfterFold]>;
2519 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2521 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2523 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2524 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2525 (X86cmpm (_.VT _.RC:$src1),
2526 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2528 (X86cmpm_su (_.VT _.RC:$src1),
2529 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2531 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2533 // Patterns for selecting with loads in other operand.
2534 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2535 CommutableCMPCC:$cc),
2536 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2539 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2541 CommutableCMPCC:$cc)),
2542 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2543 _.RC:$src1, addr:$src2,
2546 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2547 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2548 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2551 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast
2552 (_.ScalarLdFrag addr:$src2)),
2554 CommutableCMPCC:$cc)),
2555 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2556 _.RC:$src1, addr:$src2,
2560 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2561 // comparison code form (VCMP[EQ/LT/LE/...]
2562 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2563 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2565 "$cc, {sae}, $src2, $src1",
2566 "$src1, $src2, {sae}, $cc",
2567 (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2568 (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2570 EVEX_B, Sched<[sched]>;
2573 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2574 let Predicates = [HasAVX512] in {
2575 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2576 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2579 let Predicates = [HasAVX512,HasVLX] in {
2580 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2581 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2585 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2586 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2587 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2588 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2590 // Patterns to select fp compares with load as first operand.
2591 let Predicates = [HasAVX512] in {
2592 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2593 CommutableCMPCC:$cc)),
2594 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2596 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2597 CommutableCMPCC:$cc)),
2598 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2601 // ----------------------------------------------------------------
2604 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2605 (X86Vfpclasss node:$src1, node:$src2), [{
2606 return N->hasOneUse();
2609 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2610 (X86Vfpclass node:$src1, node:$src2), [{
2611 return N->hasOneUse();
2614 //handle fpclass instruction mask = op(reg_scalar,imm)
2615 // op(mem_scalar,imm)
2616 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2617 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2619 let Predicates = [prd], ExeDomain = _.ExeDomain in {
2620 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2621 (ins _.RC:$src1, i32u8imm:$src2),
2622 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2623 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2624 (i32 imm:$src2)))]>,
2626 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2627 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2628 OpcodeStr##_.Suffix#
2629 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2630 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2631 (X86Vfpclasss_su (_.VT _.RC:$src1),
2632 (i32 imm:$src2))))]>,
2633 EVEX_K, Sched<[sched]>;
2634 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2635 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2636 OpcodeStr##_.Suffix##
2637 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2639 (X86Vfpclasss _.ScalarIntMemCPat:$src1,
2640 (i32 imm:$src2)))]>,
2641 Sched<[sched.Folded, sched.ReadAfterFold]>;
2642 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2643 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2644 OpcodeStr##_.Suffix##
2645 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2646 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2647 (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
2648 (i32 imm:$src2))))]>,
2649 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2653 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2654 // fpclass(reg_vec, mem_vec, imm)
2655 // fpclass(reg_vec, broadcast(eltVt), imm)
2656 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2657 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2659 let ExeDomain = _.ExeDomain in {
2660 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2661 (ins _.RC:$src1, i32u8imm:$src2),
2662 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2663 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2664 (i32 imm:$src2)))]>,
2666 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2667 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2668 OpcodeStr##_.Suffix#
2669 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2670 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2671 (X86Vfpclass_su (_.VT _.RC:$src1),
2672 (i32 imm:$src2))))]>,
2673 EVEX_K, Sched<[sched]>;
2674 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2675 (ins _.MemOp:$src1, i32u8imm:$src2),
2676 OpcodeStr##_.Suffix#"{"#mem#"}"#
2677 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2678 [(set _.KRC:$dst,(X86Vfpclass
2679 (_.VT (_.LdFrag addr:$src1)),
2680 (i32 imm:$src2)))]>,
2681 Sched<[sched.Folded, sched.ReadAfterFold]>;
2682 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2683 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2684 OpcodeStr##_.Suffix#"{"#mem#"}"#
2685 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2686 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2687 (_.VT (_.LdFrag addr:$src1)),
2688 (i32 imm:$src2))))]>,
2689 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2690 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2691 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2692 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2693 _.BroadcastStr##", $dst|$dst, ${src1}"
2694 ##_.BroadcastStr##", $src2}",
2695 [(set _.KRC:$dst,(X86Vfpclass
2696 (_.VT (X86VBroadcast
2697 (_.ScalarLdFrag addr:$src1))),
2698 (i32 imm:$src2)))]>,
2699 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2700 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2701 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2702 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2703 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2704 _.BroadcastStr##", $src2}",
2705 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2706 (_.VT (X86VBroadcast
2707 (_.ScalarLdFrag addr:$src1))),
2708 (i32 imm:$src2))))]>,
2709 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2712 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2714 def : InstAlias<OpcodeStr#_.Suffix#mem#
2715 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2716 (!cast<Instruction>(NAME#"rr")
2717 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2718 def : InstAlias<OpcodeStr#_.Suffix#mem#
2719 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2720 (!cast<Instruction>(NAME#"rrk")
2721 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2722 def : InstAlias<OpcodeStr#_.Suffix#mem#
2723 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2724 _.BroadcastStr#", $src2}",
2725 (!cast<Instruction>(NAME#"rmb")
2726 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2727 def : InstAlias<OpcodeStr#_.Suffix#mem#
2728 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2729 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2730 (!cast<Instruction>(NAME#"rmbk")
2731 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2734 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2735 bits<8> opc, X86SchedWriteWidths sched,
2737 let Predicates = [prd] in {
2738 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2739 _.info512, "z">, EVEX_V512;
2741 let Predicates = [prd, HasVLX] in {
2742 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2743 _.info128, "x">, EVEX_V128;
2744 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2745 _.info256, "y">, EVEX_V256;
2749 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2750 bits<8> opcScalar, X86SchedWriteWidths sched,
2752 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2754 EVEX_CD8<32, CD8VF>;
2755 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2757 EVEX_CD8<64, CD8VF> , VEX_W;
2758 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2759 sched.Scl, f32x_info, prd>, VEX_LIG,
2760 EVEX_CD8<32, CD8VT1>;
2761 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2762 sched.Scl, f64x_info, prd>, VEX_LIG,
2763 EVEX_CD8<64, CD8VT1>, VEX_W;
2766 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2767 HasDQI>, AVX512AIi8Base, EVEX;
2769 //-----------------------------------------------------------------
2770 // Mask register copy, including
2771 // - copy between mask registers
2772 // - load/store mask registers
2773 // - copy from GPR to mask register and vice versa
2775 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2776 string OpcodeStr, RegisterClass KRC,
2777 ValueType vvt, X86MemOperand x86memop> {
2778 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2779 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2780 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2782 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2783 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2784 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2786 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2787 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2788 [(store KRC:$src, addr:$dst)]>,
2789 Sched<[WriteStore]>;
2792 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2794 RegisterClass KRC, RegisterClass GRC> {
2795 let hasSideEffects = 0 in {
2796 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2797 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2799 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2800 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2805 let Predicates = [HasDQI] in
2806 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2807 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2810 let Predicates = [HasAVX512] in
2811 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2812 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2815 let Predicates = [HasBWI] in {
2816 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2818 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2820 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2822 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2826 // GR from/to mask register
2827 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2828 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2829 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2830 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2832 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2833 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2834 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2835 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2837 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2838 (KMOVWrk VK16:$src)>;
2839 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2840 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2841 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2842 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2843 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2844 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2846 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2847 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2848 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2849 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2850 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2851 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2852 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2853 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2855 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2856 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2857 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2858 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2859 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2860 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2861 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2862 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2865 let Predicates = [HasDQI] in {
2866 def : Pat<(store VK1:$src, addr:$dst),
2867 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2869 def : Pat<(v1i1 (load addr:$src)),
2870 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2871 def : Pat<(v2i1 (load addr:$src)),
2872 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2873 def : Pat<(v4i1 (load addr:$src)),
2874 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2877 let Predicates = [HasAVX512] in {
2878 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2879 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2880 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2881 (KMOVWkm addr:$src)>;
2884 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2885 SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2886 SDTCVecEltisVT<1, i1>,
2889 let Predicates = [HasAVX512] in {
2890 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2891 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2892 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2894 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2895 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2897 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2898 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2900 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2901 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2904 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2905 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2906 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2907 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2908 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2909 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2910 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2912 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2913 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2916 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2920 // Mask unary operation
2922 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2923 RegisterClass KRC, SDPatternOperator OpNode,
2924 X86FoldableSchedWrite sched, Predicate prd> {
2925 let Predicates = [prd] in
2926 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2927 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2928 [(set KRC:$dst, (OpNode KRC:$src))]>,
2932 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2933 SDPatternOperator OpNode,
2934 X86FoldableSchedWrite sched> {
2935 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2936 sched, HasDQI>, VEX, PD;
2937 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2938 sched, HasAVX512>, VEX, PS;
2939 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2940 sched, HasBWI>, VEX, PD, VEX_W;
2941 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2942 sched, HasBWI>, VEX, PS, VEX_W;
2945 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2946 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2948 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2949 let Predicates = [HasAVX512, NoDQI] in
2950 def : Pat<(vnot VK8:$src),
2951 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2953 def : Pat<(vnot VK4:$src),
2954 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2955 def : Pat<(vnot VK2:$src),
2956 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2958 // Mask binary operation
2959 // - KAND, KANDN, KOR, KXNOR, KXOR
2960 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2961 RegisterClass KRC, SDPatternOperator OpNode,
2962 X86FoldableSchedWrite sched, Predicate prd,
2964 let Predicates = [prd], isCommutable = IsCommutable in
2965 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2966 !strconcat(OpcodeStr,
2967 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2968 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2972 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2973 SDPatternOperator OpNode,
2974 X86FoldableSchedWrite sched, bit IsCommutable,
2975 Predicate prdW = HasAVX512> {
2976 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2977 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2978 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2979 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2980 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2981 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2982 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2983 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2986 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2987 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2988 // These nodes use 'vnot' instead of 'not' to support vectors.
2989 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2990 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2992 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2993 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
2994 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
2995 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
2996 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
2997 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
2998 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3000 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3002 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3003 // for the DQI set, this type is legal and KxxxB instruction is used
3004 let Predicates = [NoDQI] in
3005 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3007 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3008 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3010 // All types smaller than 8 bits require conversion anyway
3011 def : Pat<(OpNode VK1:$src1, VK1:$src2),
3012 (COPY_TO_REGCLASS (Inst
3013 (COPY_TO_REGCLASS VK1:$src1, VK16),
3014 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3015 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3016 (COPY_TO_REGCLASS (Inst
3017 (COPY_TO_REGCLASS VK2:$src1, VK16),
3018 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3019 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3020 (COPY_TO_REGCLASS (Inst
3021 (COPY_TO_REGCLASS VK4:$src1, VK16),
3022 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3025 defm : avx512_binop_pat<and, and, KANDWrr>;
3026 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3027 defm : avx512_binop_pat<or, or, KORWrr>;
3028 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3029 defm : avx512_binop_pat<xor, xor, KXORWrr>;
3032 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3033 X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3035 let Predicates = [prd] in {
3036 let hasSideEffects = 0 in
3037 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3038 (ins Src.KRC:$src1, Src.KRC:$src2),
3039 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3040 VEX_4V, VEX_L, Sched<[sched]>;
3042 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3043 (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
3047 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
3048 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3049 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3052 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3053 SDNode OpNode, X86FoldableSchedWrite sched,
3055 let Predicates = [prd], Defs = [EFLAGS] in
3056 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3057 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3058 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3062 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3063 X86FoldableSchedWrite sched,
3064 Predicate prdW = HasAVX512> {
3065 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3067 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3069 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3071 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3075 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3076 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3077 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3080 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3081 SDNode OpNode, X86FoldableSchedWrite sched> {
3082 let Predicates = [HasAVX512] in
3083 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3084 !strconcat(OpcodeStr,
3085 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3086 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3090 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3091 SDNode OpNode, X86FoldableSchedWrite sched> {
3092 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3093 sched>, VEX, TAPD, VEX_W;
3094 let Predicates = [HasDQI] in
3095 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3097 let Predicates = [HasBWI] in {
3098 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3099 sched>, VEX, TAPD, VEX_W;
3100 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3105 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3106 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3108 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3109 multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3111 X86VectorVTInfo Narrow,
3112 X86VectorVTInfo Wide> {
3113 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3114 (Narrow.VT Narrow.RC:$src2))),
3116 (!cast<Instruction>(InstStr#"Zrr")
3117 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3118 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3121 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3122 (Frag_su (Narrow.VT Narrow.RC:$src1),
3123 (Narrow.VT Narrow.RC:$src2)))),
3125 (!cast<Instruction>(InstStr#"Zrrk")
3126 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3127 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3128 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3132 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3133 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3135 X86VectorVTInfo Narrow,
3136 X86VectorVTInfo Wide> {
3137 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3138 (Narrow.VT Narrow.RC:$src2), cond)),
3140 (!cast<Instruction>(InstStr##Zrri)
3141 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3142 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3143 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3145 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3146 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3147 (Narrow.VT Narrow.RC:$src2),
3149 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3150 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3151 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3152 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3153 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3156 // Same as above, but for fp types which don't use PatFrags.
3157 multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su,
3159 X86VectorVTInfo Narrow,
3160 X86VectorVTInfo Wide> {
3161 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3162 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3164 (!cast<Instruction>(InstStr##Zrri)
3165 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3166 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3167 imm:$cc), Narrow.KRC)>;
3169 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3170 (OpNode_su (Narrow.VT Narrow.RC:$src1),
3171 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3172 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3173 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3174 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3175 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3176 imm:$cc), Narrow.KRC)>;
3179 let Predicates = [HasAVX512, NoVLX] in {
3180 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3181 // increase the pattern complexity the way an immediate would.
3182 let AddedComplexity = 2 in {
3183 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
3184 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
3186 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
3187 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
3189 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3190 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3192 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3193 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3196 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3197 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3199 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3200 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3202 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3203 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3205 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3206 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3208 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>;
3209 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>;
3210 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>;
3211 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>;
3214 let Predicates = [HasBWI, NoVLX] in {
3215 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3216 // increase the pattern complexity the way an immediate would.
3217 let AddedComplexity = 2 in {
3218 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v32i8x_info, v64i8_info>;
3219 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v32i8x_info, v64i8_info>;
3221 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v16i8x_info, v64i8_info>;
3222 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v16i8x_info, v64i8_info>;
3224 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v16i16x_info, v32i16_info>;
3225 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v16i16x_info, v32i16_info>;
3227 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v8i16x_info, v32i16_info>;
3228 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v8i16x_info, v32i16_info>;
3231 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3232 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3234 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3235 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3237 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3238 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3240 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3241 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3244 // Mask setting all 0s or 1s
3245 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3246 let Predicates = [HasAVX512] in
3247 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3248 SchedRW = [WriteZero] in
3249 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3250 [(set KRC:$dst, (VT Val))]>;
3253 multiclass avx512_mask_setop_w<PatFrag Val> {
3254 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3255 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3256 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3259 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3260 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3262 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3263 let Predicates = [HasAVX512] in {
3264 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3265 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3266 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3267 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3268 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3269 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3270 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3271 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3274 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3275 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3276 RegisterClass RC, ValueType VT> {
3277 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3278 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3280 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3281 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3283 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3284 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3285 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3286 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3287 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3288 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3290 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3291 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3292 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3293 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3294 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3296 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3297 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3298 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3299 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3301 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3302 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3303 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3305 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3306 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3308 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3310 //===----------------------------------------------------------------------===//
3311 // AVX-512 - Aligned and unaligned load and store
3314 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3315 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3316 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3317 bit NoRMPattern = 0,
3318 SDPatternOperator SelectOprr = vselect> {
3319 let hasSideEffects = 0 in {
3320 let isMoveReg = 1 in
3321 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3322 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3323 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3324 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3325 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3326 (ins _.KRCWM:$mask, _.RC:$src),
3327 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3328 "${dst} {${mask}} {z}, $src}"),
3329 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3331 _.ImmAllZerosV)))], _.ExeDomain>,
3332 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3334 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3335 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3336 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3337 !if(NoRMPattern, [],
3339 (_.VT (ld_frag addr:$src)))]),
3340 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3341 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3343 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3344 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3345 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3346 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3347 "${dst} {${mask}}, $src1}"),
3348 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3350 (_.VT _.RC:$src0))))], _.ExeDomain>,
3351 EVEX, EVEX_K, Sched<[Sched.RR]>;
3352 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3353 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3354 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3355 "${dst} {${mask}}, $src1}"),
3356 [(set _.RC:$dst, (_.VT
3357 (vselect _.KRCWM:$mask,
3358 (_.VT (ld_frag addr:$src1)),
3359 (_.VT _.RC:$src0))))], _.ExeDomain>,
3360 EVEX, EVEX_K, Sched<[Sched.RM]>;
3362 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3363 (ins _.KRCWM:$mask, _.MemOp:$src),
3364 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3365 "${dst} {${mask}} {z}, $src}",
3366 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3367 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3368 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3370 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3371 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3373 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3374 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3376 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3377 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3378 _.KRCWM:$mask, addr:$ptr)>;
3381 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3382 AVX512VLVectorVTInfo _, Predicate prd,
3383 X86SchedWriteMoveLSWidths Sched,
3384 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3385 let Predicates = [prd] in
3386 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3387 _.info512.AlignedLdFrag, masked_load_aligned,
3388 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3390 let Predicates = [prd, HasVLX] in {
3391 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3392 _.info256.AlignedLdFrag, masked_load_aligned,
3393 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3394 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3395 _.info128.AlignedLdFrag, masked_load_aligned,
3396 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3400 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3401 AVX512VLVectorVTInfo _, Predicate prd,
3402 X86SchedWriteMoveLSWidths Sched,
3403 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3404 SDPatternOperator SelectOprr = vselect> {
3405 let Predicates = [prd] in
3406 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3407 masked_load, Sched.ZMM, "",
3408 NoRMPattern, SelectOprr>, EVEX_V512;
3410 let Predicates = [prd, HasVLX] in {
3411 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3412 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3413 NoRMPattern, SelectOprr>, EVEX_V256;
3414 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3415 masked_load, Sched.XMM, EVEX2VEXOvrd,
3416 NoRMPattern, SelectOprr>, EVEX_V128;
3420 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3421 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3422 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3423 bit NoMRPattern = 0> {
3424 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3425 let isMoveReg = 1 in
3426 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3427 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3428 [], _.ExeDomain>, EVEX,
3429 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3430 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3431 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3432 (ins _.KRCWM:$mask, _.RC:$src),
3433 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3434 "${dst} {${mask}}, $src}",
3435 [], _.ExeDomain>, EVEX, EVEX_K,
3436 FoldGenData<BaseName#_.ZSuffix#rrk>,
3438 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3439 (ins _.KRCWM:$mask, _.RC:$src),
3440 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3441 "${dst} {${mask}} {z}, $src}",
3442 [], _.ExeDomain>, EVEX, EVEX_KZ,
3443 FoldGenData<BaseName#_.ZSuffix#rrkz>,
3447 let hasSideEffects = 0, mayStore = 1 in
3448 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3449 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3450 !if(NoMRPattern, [],
3451 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3452 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3453 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3454 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3455 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3456 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3457 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3460 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3461 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3462 _.KRCWM:$mask, _.RC:$src)>;
3464 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3465 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3466 _.RC:$dst, _.RC:$src), 0>;
3467 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3468 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3469 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3470 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3471 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3472 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3475 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3476 AVX512VLVectorVTInfo _, Predicate prd,
3477 X86SchedWriteMoveLSWidths Sched,
3478 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3479 let Predicates = [prd] in
3480 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3481 masked_store, Sched.ZMM, "",
3482 NoMRPattern>, EVEX_V512;
3483 let Predicates = [prd, HasVLX] in {
3484 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3485 masked_store, Sched.YMM,
3486 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3487 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3488 masked_store, Sched.XMM, EVEX2VEXOvrd,
3489 NoMRPattern>, EVEX_V128;
3493 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3494 AVX512VLVectorVTInfo _, Predicate prd,
3495 X86SchedWriteMoveLSWidths Sched,
3496 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3497 let Predicates = [prd] in
3498 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3499 masked_store_aligned, Sched.ZMM, "",
3500 NoMRPattern>, EVEX_V512;
3502 let Predicates = [prd, HasVLX] in {
3503 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3504 masked_store_aligned, Sched.YMM,
3505 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3506 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3507 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3508 NoMRPattern>, EVEX_V128;
3512 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3513 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3514 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3515 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3516 PS, EVEX_CD8<32, CD8VF>;
3518 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3519 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3520 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3521 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3522 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3524 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3525 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3526 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3527 SchedWriteFMoveLS, "VMOVUPS">,
3528 PS, EVEX_CD8<32, CD8VF>;
3530 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3531 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3532 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3533 SchedWriteFMoveLS, "VMOVUPD">,
3534 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3536 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3537 HasAVX512, SchedWriteVecMoveLS,
3539 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3540 HasAVX512, SchedWriteVecMoveLS,
3542 PD, EVEX_CD8<32, CD8VF>;
3544 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3545 HasAVX512, SchedWriteVecMoveLS,
3547 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3548 HasAVX512, SchedWriteVecMoveLS,
3550 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3552 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3553 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3554 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3555 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3556 XD, EVEX_CD8<8, CD8VF>;
3558 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3559 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3560 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3561 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3562 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3564 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3565 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3566 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3567 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3568 XS, EVEX_CD8<32, CD8VF>;
3570 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3571 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3572 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3573 SchedWriteVecMoveLS, "VMOVDQU">,
3574 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3576 // Special instructions to help with spilling when we don't have VLX. We need
3577 // to load or store from a ZMM register instead. These are converted in
3578 // expandPostRAPseudos.
3579 let isReMaterializable = 1, canFoldAsLoad = 1,
3580 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3581 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3582 "", []>, Sched<[WriteFLoadX]>;
3583 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3584 "", []>, Sched<[WriteFLoadY]>;
3585 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3586 "", []>, Sched<[WriteFLoadX]>;
3587 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3588 "", []>, Sched<[WriteFLoadY]>;
3591 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3592 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3593 "", []>, Sched<[WriteFStoreX]>;
3594 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3595 "", []>, Sched<[WriteFStoreY]>;
3596 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3597 "", []>, Sched<[WriteFStoreX]>;
3598 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3599 "", []>, Sched<[WriteFStoreY]>;
3602 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3603 (v8i64 VR512:$src))),
3604 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3607 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3608 (v16i32 VR512:$src))),
3609 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3611 // These patterns exist to prevent the above patterns from introducing a second
3612 // mask inversion when one already exists.
3613 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3614 (v8i64 immAllZerosV),
3615 (v8i64 VR512:$src))),
3616 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3617 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3618 (v16i32 immAllZerosV),
3619 (v16i32 VR512:$src))),
3620 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3622 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3623 X86VectorVTInfo Wide> {
3624 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3625 Narrow.RC:$src1, Narrow.RC:$src0)),
3628 (!cast<Instruction>(InstrStr#"rrk")
3629 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3630 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3631 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3634 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3635 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3638 (!cast<Instruction>(InstrStr#"rrkz")
3639 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3640 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3644 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3645 // available. Use a 512-bit operation and extract.
3646 let Predicates = [HasAVX512, NoVLX] in {
3647 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3648 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3649 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3650 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3652 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3653 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3654 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3655 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3658 let Predicates = [HasBWI, NoVLX] in {
3659 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3660 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3662 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3663 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3666 let Predicates = [HasAVX512] in {
3668 def : Pat<(alignedloadv16i32 addr:$src),
3669 (VMOVDQA64Zrm addr:$src)>;
3670 def : Pat<(alignedloadv32i16 addr:$src),
3671 (VMOVDQA64Zrm addr:$src)>;
3672 def : Pat<(alignedloadv64i8 addr:$src),
3673 (VMOVDQA64Zrm addr:$src)>;
3674 def : Pat<(loadv16i32 addr:$src),
3675 (VMOVDQU64Zrm addr:$src)>;
3676 def : Pat<(loadv32i16 addr:$src),
3677 (VMOVDQU64Zrm addr:$src)>;
3678 def : Pat<(loadv64i8 addr:$src),
3679 (VMOVDQU64Zrm addr:$src)>;
3682 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3683 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3684 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3685 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3686 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3687 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3688 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3689 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3690 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3691 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3692 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3693 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3696 let Predicates = [HasVLX] in {
3698 def : Pat<(alignedloadv4i32 addr:$src),
3699 (VMOVDQA64Z128rm addr:$src)>;
3700 def : Pat<(alignedloadv8i16 addr:$src),
3701 (VMOVDQA64Z128rm addr:$src)>;
3702 def : Pat<(alignedloadv16i8 addr:$src),
3703 (VMOVDQA64Z128rm addr:$src)>;
3704 def : Pat<(loadv4i32 addr:$src),
3705 (VMOVDQU64Z128rm addr:$src)>;
3706 def : Pat<(loadv8i16 addr:$src),
3707 (VMOVDQU64Z128rm addr:$src)>;
3708 def : Pat<(loadv16i8 addr:$src),
3709 (VMOVDQU64Z128rm addr:$src)>;
3712 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3713 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3714 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3715 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3716 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3717 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3718 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3719 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3720 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3721 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3722 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3723 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3726 def : Pat<(alignedloadv8i32 addr:$src),
3727 (VMOVDQA64Z256rm addr:$src)>;
3728 def : Pat<(alignedloadv16i16 addr:$src),
3729 (VMOVDQA64Z256rm addr:$src)>;
3730 def : Pat<(alignedloadv32i8 addr:$src),
3731 (VMOVDQA64Z256rm addr:$src)>;
3732 def : Pat<(loadv8i32 addr:$src),
3733 (VMOVDQU64Z256rm addr:$src)>;
3734 def : Pat<(loadv16i16 addr:$src),
3735 (VMOVDQU64Z256rm addr:$src)>;
3736 def : Pat<(loadv32i8 addr:$src),
3737 (VMOVDQU64Z256rm addr:$src)>;
3740 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3741 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3742 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3743 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3744 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3745 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3746 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3747 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3748 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3749 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3750 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3751 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3754 // Move Int Doubleword to Packed Double Int
3756 let ExeDomain = SSEPackedInt in {
3757 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3758 "vmovd\t{$src, $dst|$dst, $src}",
3760 (v4i32 (scalar_to_vector GR32:$src)))]>,
3761 EVEX, Sched<[WriteVecMoveFromGpr]>;
3762 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3763 "vmovd\t{$src, $dst|$dst, $src}",
3765 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3766 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3767 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3768 "vmovq\t{$src, $dst|$dst, $src}",
3770 (v2i64 (scalar_to_vector GR64:$src)))]>,
3771 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3772 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3773 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3775 "vmovq\t{$src, $dst|$dst, $src}", []>,
3776 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3777 let isCodeGenOnly = 1 in {
3778 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3779 "vmovq\t{$src, $dst|$dst, $src}",
3780 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3781 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3782 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3783 "vmovq\t{$src, $dst|$dst, $src}",
3784 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3785 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3787 } // ExeDomain = SSEPackedInt
3789 // Move Int Doubleword to Single Scalar
3791 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3792 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3793 "vmovd\t{$src, $dst|$dst, $src}",
3794 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3795 EVEX, Sched<[WriteVecMoveFromGpr]>;
3796 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3798 // Move doubleword from xmm register to r/m32
3800 let ExeDomain = SSEPackedInt in {
3801 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3802 "vmovd\t{$src, $dst|$dst, $src}",
3803 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3805 EVEX, Sched<[WriteVecMoveToGpr]>;
3806 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3807 (ins i32mem:$dst, VR128X:$src),
3808 "vmovd\t{$src, $dst|$dst, $src}",
3809 [(store (i32 (extractelt (v4i32 VR128X:$src),
3810 (iPTR 0))), addr:$dst)]>,
3811 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3812 } // ExeDomain = SSEPackedInt
3814 // Move quadword from xmm1 register to r/m64
3816 let ExeDomain = SSEPackedInt in {
3817 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3818 "vmovq\t{$src, $dst|$dst, $src}",
3819 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3821 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3822 Requires<[HasAVX512]>;
3824 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3825 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3826 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3827 EVEX, VEX_W, Sched<[WriteVecStore]>,
3828 Requires<[HasAVX512, In64BitMode]>;
3830 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3831 (ins i64mem:$dst, VR128X:$src),
3832 "vmovq\t{$src, $dst|$dst, $src}",
3833 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3835 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3836 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3838 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3839 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3841 "vmovq\t{$src, $dst|$dst, $src}", []>,
3842 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3843 } // ExeDomain = SSEPackedInt
3845 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3846 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3848 let Predicates = [HasAVX512] in {
3849 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3850 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3853 // Move Scalar Single to Double Int
3855 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3856 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3858 "vmovd\t{$src, $dst|$dst, $src}",
3859 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3860 EVEX, Sched<[WriteVecMoveToGpr]>;
3861 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3863 // Move Quadword Int to Packed Quadword Int
3865 let ExeDomain = SSEPackedInt in {
3866 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3868 "vmovq\t{$src, $dst|$dst, $src}",
3870 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3871 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3872 } // ExeDomain = SSEPackedInt
3874 // Allow "vmovd" but print "vmovq".
3875 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3876 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3877 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3878 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3880 //===----------------------------------------------------------------------===//
3881 // AVX-512 MOVSS, MOVSD
3882 //===----------------------------------------------------------------------===//
3884 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3885 X86VectorVTInfo _> {
3886 let Predicates = [HasAVX512, OptForSize] in
3887 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3888 (ins _.RC:$src1, _.RC:$src2),
3889 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3890 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3891 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3892 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3893 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3894 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3895 "$dst {${mask}} {z}, $src1, $src2}"),
3896 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3897 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3899 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3900 let Constraints = "$src0 = $dst" in
3901 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3902 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3903 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3904 "$dst {${mask}}, $src1, $src2}"),
3905 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3906 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3907 (_.VT _.RC:$src0))))],
3908 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3909 let canFoldAsLoad = 1, isReMaterializable = 1 in {
3910 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3911 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3912 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3913 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3914 // _alt version uses FR32/FR64 register class.
3915 let isCodeGenOnly = 1 in
3916 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3917 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3918 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3919 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3921 let mayLoad = 1, hasSideEffects = 0 in {
3922 let Constraints = "$src0 = $dst" in
3923 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3924 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3925 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3926 "$dst {${mask}}, $src}"),
3927 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3928 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3929 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3930 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3931 "$dst {${mask}} {z}, $src}"),
3932 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3934 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3935 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3936 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
3937 EVEX, Sched<[WriteFStore]>;
3938 let mayStore = 1, hasSideEffects = 0 in
3939 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3940 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3941 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3942 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3946 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3947 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3949 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3950 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3953 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3954 PatLeaf ZeroFP, X86VectorVTInfo _> {
3956 def : Pat<(_.VT (OpNode _.RC:$src0,
3957 (_.VT (scalar_to_vector
3958 (_.EltVT (X86selects VK1WM:$mask,
3959 (_.EltVT _.FRC:$src1),
3960 (_.EltVT _.FRC:$src2))))))),
3961 (!cast<Instruction>(InstrStr#rrk)
3962 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3965 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3967 def : Pat<(_.VT (OpNode _.RC:$src0,
3968 (_.VT (scalar_to_vector
3969 (_.EltVT (X86selects VK1WM:$mask,
3970 (_.EltVT _.FRC:$src1),
3971 (_.EltVT ZeroFP))))))),
3972 (!cast<Instruction>(InstrStr#rrkz)
3975 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3978 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3979 dag Mask, RegisterClass MaskRC> {
3981 def : Pat<(masked_store
3982 (_.info512.VT (insert_subvector undef,
3983 (_.info128.VT _.info128.RC:$src),
3984 (iPTR 0))), addr:$dst, Mask),
3985 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3986 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3987 _.info128.RC:$src)>;
3991 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3992 AVX512VLVectorVTInfo _,
3993 dag Mask, RegisterClass MaskRC,
3994 SubRegIndex subreg> {
3996 def : Pat<(masked_store
3997 (_.info512.VT (insert_subvector undef,
3998 (_.info128.VT _.info128.RC:$src),
3999 (iPTR 0))), addr:$dst, Mask),
4000 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4001 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4002 _.info128.RC:$src)>;
4006 // This matches the more recent codegen from clang that avoids emitting a 512
4007 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4008 // bits on AVX512F only targets.
4009 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4010 AVX512VLVectorVTInfo _,
4011 dag Mask512, dag Mask128,
4012 RegisterClass MaskRC,
4013 SubRegIndex subreg> {
4016 def : Pat<(masked_store
4017 (_.info512.VT (insert_subvector undef,
4018 (_.info128.VT _.info128.RC:$src),
4019 (iPTR 0))), addr:$dst, Mask512),
4020 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4021 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4022 _.info128.RC:$src)>;
4024 // AVX512VL pattern.
4025 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4026 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4027 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4028 _.info128.RC:$src)>;
4031 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4032 dag Mask, RegisterClass MaskRC> {
4034 def : Pat<(_.info128.VT (extract_subvector
4035 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4036 _.info512.ImmAllZerosV)),
4038 (!cast<Instruction>(InstrStr#rmkz)
4039 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4042 def : Pat<(_.info128.VT (extract_subvector
4043 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4044 (_.info512.VT (insert_subvector undef,
4045 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4048 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4049 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4054 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4055 AVX512VLVectorVTInfo _,
4056 dag Mask, RegisterClass MaskRC,
4057 SubRegIndex subreg> {
4059 def : Pat<(_.info128.VT (extract_subvector
4060 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4061 _.info512.ImmAllZerosV)),
4063 (!cast<Instruction>(InstrStr#rmkz)
4064 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4067 def : Pat<(_.info128.VT (extract_subvector
4068 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4069 (_.info512.VT (insert_subvector undef,
4070 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4073 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4074 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4079 // This matches the more recent codegen from clang that avoids emitting a 512
4080 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4081 // bits on AVX512F only targets.
4082 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4083 AVX512VLVectorVTInfo _,
4084 dag Mask512, dag Mask128,
4085 RegisterClass MaskRC,
4086 SubRegIndex subreg> {
4087 // AVX512F patterns.
4088 def : Pat<(_.info128.VT (extract_subvector
4089 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4090 _.info512.ImmAllZerosV)),
4092 (!cast<Instruction>(InstrStr#rmkz)
4093 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4096 def : Pat<(_.info128.VT (extract_subvector
4097 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4098 (_.info512.VT (insert_subvector undef,
4099 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4102 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4103 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4106 // AVX512Vl patterns.
4107 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4108 _.info128.ImmAllZerosV)),
4109 (!cast<Instruction>(InstrStr#rmkz)
4110 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4113 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4114 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4115 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4116 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4120 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4121 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4123 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4124 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4125 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4126 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4127 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4128 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4130 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4131 (v16i1 (insert_subvector
4132 (v16i1 immAllZerosV),
4133 (v4i1 (extract_subvector
4134 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4137 (v4i1 (extract_subvector
4138 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4139 (iPTR 0))), GR8, sub_8bit>;
4140 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4145 (v16i1 immAllZerosV),
4146 (v2i1 (extract_subvector
4147 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4151 (v2i1 (extract_subvector
4152 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4153 (iPTR 0))), GR8, sub_8bit>;
4155 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4156 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4157 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4158 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4159 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4160 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4162 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4163 (v16i1 (insert_subvector
4164 (v16i1 immAllZerosV),
4165 (v4i1 (extract_subvector
4166 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4169 (v4i1 (extract_subvector
4170 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4171 (iPTR 0))), GR8, sub_8bit>;
4172 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4177 (v16i1 immAllZerosV),
4178 (v2i1 (extract_subvector
4179 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4183 (v2i1 (extract_subvector
4184 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4185 (iPTR 0))), GR8, sub_8bit>;
4187 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4188 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4189 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4190 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4191 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4193 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4194 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4195 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4197 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4198 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4199 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4200 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4201 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4203 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
4204 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4205 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4207 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4208 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4209 (ins VR128X:$src1, VR128X:$src2),
4210 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4211 []>, XS, EVEX_4V, VEX_LIG,
4212 FoldGenData<"VMOVSSZrr">,
4213 Sched<[SchedWriteFShuffle.XMM]>;
4215 let Constraints = "$src0 = $dst" in
4216 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4217 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4218 VR128X:$src1, VR128X:$src2),
4219 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4220 "$dst {${mask}}, $src1, $src2}",
4221 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4222 FoldGenData<"VMOVSSZrrk">,
4223 Sched<[SchedWriteFShuffle.XMM]>;
4225 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4226 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4227 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4228 "$dst {${mask}} {z}, $src1, $src2}",
4229 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4230 FoldGenData<"VMOVSSZrrkz">,
4231 Sched<[SchedWriteFShuffle.XMM]>;
4233 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4234 (ins VR128X:$src1, VR128X:$src2),
4235 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4236 []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4237 FoldGenData<"VMOVSDZrr">,
4238 Sched<[SchedWriteFShuffle.XMM]>;
4240 let Constraints = "$src0 = $dst" in
4241 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4242 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4243 VR128X:$src1, VR128X:$src2),
4244 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4245 "$dst {${mask}}, $src1, $src2}",
4246 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4247 VEX_W, FoldGenData<"VMOVSDZrrk">,
4248 Sched<[SchedWriteFShuffle.XMM]>;
4250 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4251 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4253 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4254 "$dst {${mask}} {z}, $src1, $src2}",
4255 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4256 VEX_W, FoldGenData<"VMOVSDZrrkz">,
4257 Sched<[SchedWriteFShuffle.XMM]>;
4260 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4261 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4262 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4263 "$dst {${mask}}, $src1, $src2}",
4264 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4265 VR128X:$src1, VR128X:$src2), 0>;
4266 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4267 "$dst {${mask}} {z}, $src1, $src2}",
4268 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4269 VR128X:$src1, VR128X:$src2), 0>;
4270 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4271 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4272 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4273 "$dst {${mask}}, $src1, $src2}",
4274 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4275 VR128X:$src1, VR128X:$src2), 0>;
4276 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4277 "$dst {${mask}} {z}, $src1, $src2}",
4278 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4279 VR128X:$src1, VR128X:$src2), 0>;
4281 let Predicates = [HasAVX512, OptForSize] in {
4282 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4283 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4284 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4285 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4287 // Move low f32 and clear high bits.
4288 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4289 (SUBREG_TO_REG (i32 0),
4290 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4291 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4292 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4293 (SUBREG_TO_REG (i32 0),
4294 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4295 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4297 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4298 (SUBREG_TO_REG (i32 0),
4299 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4300 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4301 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4302 (SUBREG_TO_REG (i32 0),
4303 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4304 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4307 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4308 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4309 let Predicates = [HasAVX512, OptForSpeed] in {
4310 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4311 (SUBREG_TO_REG (i32 0),
4312 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4313 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4314 (i8 1))), sub_xmm)>;
4315 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4316 (SUBREG_TO_REG (i32 0),
4317 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4318 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4319 (i8 3))), sub_xmm)>;
4322 let Predicates = [HasAVX512] in {
4323 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4324 (VMOVSSZrm addr:$src)>;
4325 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4326 (VMOVSDZrm addr:$src)>;
4328 // Represent the same patterns above but in the form they appear for
4330 def : Pat<(v8f32 (X86vzload32 addr:$src)),
4331 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4332 def : Pat<(v4f64 (X86vzload64 addr:$src)),
4333 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4335 // Represent the same patterns above but in the form they appear for
4337 def : Pat<(v16f32 (X86vzload32 addr:$src)),
4338 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4339 def : Pat<(v8f64 (X86vzload64 addr:$src)),
4340 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4343 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4344 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4346 "vmovq\t{$src, $dst|$dst, $src}",
4347 [(set VR128X:$dst, (v2i64 (X86vzmovl
4348 (v2i64 VR128X:$src))))]>,
4352 let Predicates = [HasAVX512] in {
4353 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4354 (VMOVDI2PDIZrr GR32:$src)>;
4356 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4357 (VMOV64toPQIZrr GR64:$src)>;
4359 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4360 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4361 (VMOVDI2PDIZrm addr:$src)>;
4362 def : Pat<(v4i32 (X86vzload32 addr:$src)),
4363 (VMOVDI2PDIZrm addr:$src)>;
4364 def : Pat<(v8i32 (X86vzload32 addr:$src)),
4365 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4366 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4367 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4368 def : Pat<(v2i64 (X86vzload64 addr:$src)),
4369 (VMOVQI2PQIZrm addr:$src)>;
4370 def : Pat<(v4i64 (X86vzload64 addr:$src)),
4371 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4373 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4374 def : Pat<(v16i32 (X86vzload32 addr:$src)),
4375 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4376 def : Pat<(v8i64 (X86vzload64 addr:$src)),
4377 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4379 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4380 (SUBREG_TO_REG (i32 0),
4381 (v2f64 (VMOVZPQILo2PQIZrr
4382 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4384 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4385 (SUBREG_TO_REG (i32 0),
4386 (v2i64 (VMOVZPQILo2PQIZrr
4387 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4390 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4391 (SUBREG_TO_REG (i32 0),
4392 (v2f64 (VMOVZPQILo2PQIZrr
4393 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4395 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4396 (SUBREG_TO_REG (i32 0),
4397 (v2i64 (VMOVZPQILo2PQIZrr
4398 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4402 //===----------------------------------------------------------------------===//
4403 // AVX-512 - Non-temporals
4404 //===----------------------------------------------------------------------===//
4406 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4407 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4408 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4409 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4411 let Predicates = [HasVLX] in {
4412 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4414 "vmovntdqa\t{$src, $dst|$dst, $src}",
4415 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4416 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4418 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4420 "vmovntdqa\t{$src, $dst|$dst, $src}",
4421 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4422 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4425 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4426 X86SchedWriteMoveLS Sched,
4427 PatFrag st_frag = alignednontemporalstore> {
4428 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4429 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4430 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4431 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4432 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4435 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4436 AVX512VLVectorVTInfo VTInfo,
4437 X86SchedWriteMoveLSWidths Sched> {
4438 let Predicates = [HasAVX512] in
4439 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4441 let Predicates = [HasAVX512, HasVLX] in {
4442 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4443 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4447 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4448 SchedWriteVecMoveLSNT>, PD;
4449 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4450 SchedWriteFMoveLSNT>, PD, VEX_W;
4451 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4452 SchedWriteFMoveLSNT>, PS;
4454 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4455 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4456 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4457 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4458 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4459 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4460 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4462 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4463 (VMOVNTDQAZrm addr:$src)>;
4464 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4465 (VMOVNTDQAZrm addr:$src)>;
4466 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4467 (VMOVNTDQAZrm addr:$src)>;
4468 def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4469 (VMOVNTDQAZrm addr:$src)>;
4470 def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4471 (VMOVNTDQAZrm addr:$src)>;
4472 def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4473 (VMOVNTDQAZrm addr:$src)>;
4476 let Predicates = [HasVLX], AddedComplexity = 400 in {
4477 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4478 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4479 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4480 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4481 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4482 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4484 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4485 (VMOVNTDQAZ256rm addr:$src)>;
4486 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4487 (VMOVNTDQAZ256rm addr:$src)>;
4488 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4489 (VMOVNTDQAZ256rm addr:$src)>;
4490 def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4491 (VMOVNTDQAZ256rm addr:$src)>;
4492 def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4493 (VMOVNTDQAZ256rm addr:$src)>;
4494 def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4495 (VMOVNTDQAZ256rm addr:$src)>;
4497 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4498 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4499 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4500 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4501 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4502 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4504 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4505 (VMOVNTDQAZ128rm addr:$src)>;
4506 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4507 (VMOVNTDQAZ128rm addr:$src)>;
4508 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4509 (VMOVNTDQAZ128rm addr:$src)>;
4510 def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4511 (VMOVNTDQAZ128rm addr:$src)>;
4512 def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4513 (VMOVNTDQAZ128rm addr:$src)>;
4514 def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4515 (VMOVNTDQAZ128rm addr:$src)>;
4518 //===----------------------------------------------------------------------===//
4519 // AVX-512 - Integer arithmetic
4521 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4522 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4523 bit IsCommutable = 0> {
4524 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4525 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4526 "$src2, $src1", "$src1, $src2",
4527 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4528 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4531 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4532 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4533 "$src2, $src1", "$src1, $src2",
4534 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4535 AVX512BIBase, EVEX_4V,
4536 Sched<[sched.Folded, sched.ReadAfterFold]>;
4539 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4540 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4541 bit IsCommutable = 0> :
4542 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4543 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4544 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4545 "${src2}"##_.BroadcastStr##", $src1",
4546 "$src1, ${src2}"##_.BroadcastStr,
4547 (_.VT (OpNode _.RC:$src1,
4549 (_.ScalarLdFrag addr:$src2))))>,
4550 AVX512BIBase, EVEX_4V, EVEX_B,
4551 Sched<[sched.Folded, sched.ReadAfterFold]>;
4554 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4555 AVX512VLVectorVTInfo VTInfo,
4556 X86SchedWriteWidths sched, Predicate prd,
4557 bit IsCommutable = 0> {
4558 let Predicates = [prd] in
4559 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4560 IsCommutable>, EVEX_V512;
4562 let Predicates = [prd, HasVLX] in {
4563 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4564 sched.YMM, IsCommutable>, EVEX_V256;
4565 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4566 sched.XMM, IsCommutable>, EVEX_V128;
4570 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4571 AVX512VLVectorVTInfo VTInfo,
4572 X86SchedWriteWidths sched, Predicate prd,
4573 bit IsCommutable = 0> {
4574 let Predicates = [prd] in
4575 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4576 IsCommutable>, EVEX_V512;
4578 let Predicates = [prd, HasVLX] in {
4579 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4580 sched.YMM, IsCommutable>, EVEX_V256;
4581 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4582 sched.XMM, IsCommutable>, EVEX_V128;
4586 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4587 X86SchedWriteWidths sched, Predicate prd,
4588 bit IsCommutable = 0> {
4589 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4590 sched, prd, IsCommutable>,
4591 VEX_W, EVEX_CD8<64, CD8VF>;
4594 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4595 X86SchedWriteWidths sched, Predicate prd,
4596 bit IsCommutable = 0> {
4597 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4598 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4601 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4602 X86SchedWriteWidths sched, Predicate prd,
4603 bit IsCommutable = 0> {
4604 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4605 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4609 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4610 X86SchedWriteWidths sched, Predicate prd,
4611 bit IsCommutable = 0> {
4612 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4613 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4617 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4618 SDNode OpNode, X86SchedWriteWidths sched,
4619 Predicate prd, bit IsCommutable = 0> {
4620 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4623 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4627 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4628 SDNode OpNode, X86SchedWriteWidths sched,
4629 Predicate prd, bit IsCommutable = 0> {
4630 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4633 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4637 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4638 bits<8> opc_d, bits<8> opc_q,
4639 string OpcodeStr, SDNode OpNode,
4640 X86SchedWriteWidths sched,
4641 bit IsCommutable = 0> {
4642 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4643 sched, HasAVX512, IsCommutable>,
4644 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4645 sched, HasBWI, IsCommutable>;
4648 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4649 X86FoldableSchedWrite sched,
4650 SDNode OpNode,X86VectorVTInfo _Src,
4651 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4652 bit IsCommutable = 0> {
4653 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4654 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4655 "$src2, $src1","$src1, $src2",
4657 (_Src.VT _Src.RC:$src1),
4658 (_Src.VT _Src.RC:$src2))),
4660 AVX512BIBase, EVEX_4V, Sched<[sched]>;
4661 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4662 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4663 "$src2, $src1", "$src1, $src2",
4664 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4665 (_Src.LdFrag addr:$src2)))>,
4666 AVX512BIBase, EVEX_4V,
4667 Sched<[sched.Folded, sched.ReadAfterFold]>;
4669 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4670 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4672 "${src2}"##_Brdct.BroadcastStr##", $src1",
4673 "$src1, ${src2}"##_Brdct.BroadcastStr,
4674 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4675 (_Brdct.VT (X86VBroadcast
4676 (_Brdct.ScalarLdFrag addr:$src2))))))>,
4677 AVX512BIBase, EVEX_4V, EVEX_B,
4678 Sched<[sched.Folded, sched.ReadAfterFold]>;
4681 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4682 SchedWriteVecALU, 1>;
4683 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4684 SchedWriteVecALU, 0>;
4685 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4686 SchedWriteVecALU, HasBWI, 1>;
4687 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4688 SchedWriteVecALU, HasBWI, 0>;
4689 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4690 SchedWriteVecALU, HasBWI, 1>;
4691 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4692 SchedWriteVecALU, HasBWI, 0>;
4693 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4694 SchedWritePMULLD, HasAVX512, 1>, T8PD;
4695 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4696 SchedWriteVecIMul, HasBWI, 1>;
4697 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4698 SchedWriteVecIMul, HasDQI, 1>, T8PD,
4699 NotEVEX2VEXConvertible;
4700 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4702 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4704 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4705 SchedWriteVecIMul, HasBWI, 1>, T8PD;
4706 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4707 SchedWriteVecALU, HasBWI, 1>;
4708 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4709 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4710 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4711 SchedWriteVecIMul, HasAVX512, 1>;
4713 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4714 X86SchedWriteWidths sched,
4715 AVX512VLVectorVTInfo _SrcVTInfo,
4716 AVX512VLVectorVTInfo _DstVTInfo,
4717 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4718 let Predicates = [prd] in
4719 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4720 _SrcVTInfo.info512, _DstVTInfo.info512,
4721 v8i64_info, IsCommutable>,
4722 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4723 let Predicates = [HasVLX, prd] in {
4724 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4725 _SrcVTInfo.info256, _DstVTInfo.info256,
4726 v4i64x_info, IsCommutable>,
4727 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4728 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4729 _SrcVTInfo.info128, _DstVTInfo.info128,
4730 v2i64x_info, IsCommutable>,
4731 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4735 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4736 avx512vl_i8_info, avx512vl_i8_info,
4737 X86multishift, HasVBMI, 0>, T8PD;
4739 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4740 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4741 X86FoldableSchedWrite sched> {
4742 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4743 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4745 "${src2}"##_Src.BroadcastStr##", $src1",
4746 "$src1, ${src2}"##_Src.BroadcastStr,
4747 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4748 (_Src.VT (X86VBroadcast
4749 (_Src.ScalarLdFrag addr:$src2))))))>,
4750 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4751 Sched<[sched.Folded, sched.ReadAfterFold]>;
4754 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4755 SDNode OpNode,X86VectorVTInfo _Src,
4756 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4757 bit IsCommutable = 0> {
4758 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4759 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4760 "$src2, $src1","$src1, $src2",
4762 (_Src.VT _Src.RC:$src1),
4763 (_Src.VT _Src.RC:$src2))),
4764 IsCommutable, IsCommutable>,
4765 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4766 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4767 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4768 "$src2, $src1", "$src1, $src2",
4769 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4770 (_Src.LdFrag addr:$src2)))>,
4771 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4772 Sched<[sched.Folded, sched.ReadAfterFold]>;
4775 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4777 let Predicates = [HasBWI] in
4778 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4779 v32i16_info, SchedWriteShuffle.ZMM>,
4780 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4781 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4782 let Predicates = [HasBWI, HasVLX] in {
4783 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4784 v16i16x_info, SchedWriteShuffle.YMM>,
4785 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4786 v16i16x_info, SchedWriteShuffle.YMM>,
4788 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4789 v8i16x_info, SchedWriteShuffle.XMM>,
4790 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4791 v8i16x_info, SchedWriteShuffle.XMM>,
4795 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4797 let Predicates = [HasBWI] in
4798 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4799 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4800 let Predicates = [HasBWI, HasVLX] in {
4801 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4802 v32i8x_info, SchedWriteShuffle.YMM>,
4804 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4805 v16i8x_info, SchedWriteShuffle.XMM>,
4810 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4811 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4812 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4813 let Predicates = [HasBWI] in
4814 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4815 _Dst.info512, SchedWriteVecIMul.ZMM,
4816 IsCommutable>, EVEX_V512;
4817 let Predicates = [HasBWI, HasVLX] in {
4818 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4819 _Dst.info256, SchedWriteVecIMul.YMM,
4820 IsCommutable>, EVEX_V256;
4821 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4822 _Dst.info128, SchedWriteVecIMul.XMM,
4823 IsCommutable>, EVEX_V128;
4827 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4828 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4829 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4830 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4832 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4833 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4834 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4835 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4837 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4838 SchedWriteVecALU, HasBWI, 1>, T8PD;
4839 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4840 SchedWriteVecALU, HasBWI, 1>;
4841 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4842 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4843 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4844 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4845 NotEVEX2VEXConvertible;
4847 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4848 SchedWriteVecALU, HasBWI, 1>;
4849 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4850 SchedWriteVecALU, HasBWI, 1>, T8PD;
4851 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4852 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4853 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4854 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4855 NotEVEX2VEXConvertible;
4857 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4858 SchedWriteVecALU, HasBWI, 1>, T8PD;
4859 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4860 SchedWriteVecALU, HasBWI, 1>;
4861 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4862 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4863 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4864 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4865 NotEVEX2VEXConvertible;
4867 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4868 SchedWriteVecALU, HasBWI, 1>;
4869 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4870 SchedWriteVecALU, HasBWI, 1>, T8PD;
4871 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4872 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4873 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4874 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4875 NotEVEX2VEXConvertible;
4877 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4878 let Predicates = [HasDQI, NoVLX] in {
4879 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4882 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4883 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4886 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4889 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4890 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4894 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4895 let Predicates = [HasDQI, NoVLX] in {
4896 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4899 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4900 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4903 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4906 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4907 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4911 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4912 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4915 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4916 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4919 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4922 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4923 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4927 let Predicates = [HasAVX512, NoVLX] in {
4928 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4929 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4930 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4931 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4934 //===----------------------------------------------------------------------===//
4935 // AVX-512 Logical Instructions
4936 //===----------------------------------------------------------------------===//
4938 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
4939 SchedWriteVecLogic, HasAVX512, 1>;
4940 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
4941 SchedWriteVecLogic, HasAVX512, 1>;
4942 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
4943 SchedWriteVecLogic, HasAVX512, 1>;
4944 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
4945 SchedWriteVecLogic, HasAVX512>;
4947 let Predicates = [HasVLX] in {
4948 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
4949 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
4950 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
4951 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
4953 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
4954 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
4955 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
4956 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
4958 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
4959 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
4960 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
4961 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
4963 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
4964 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
4965 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
4966 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
4968 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
4969 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
4970 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
4971 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
4973 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
4974 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
4975 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
4976 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
4978 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
4979 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
4980 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
4981 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
4983 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
4984 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
4985 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
4986 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
4988 def : Pat<(and VR128X:$src1,
4989 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4990 (VPANDDZ128rmb VR128X:$src1, addr:$src2)>;
4991 def : Pat<(or VR128X:$src1,
4992 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4993 (VPORDZ128rmb VR128X:$src1, addr:$src2)>;
4994 def : Pat<(xor VR128X:$src1,
4995 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4996 (VPXORDZ128rmb VR128X:$src1, addr:$src2)>;
4997 def : Pat<(X86andnp VR128X:$src1,
4998 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4999 (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>;
5001 def : Pat<(and VR128X:$src1,
5002 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5003 (VPANDQZ128rmb VR128X:$src1, addr:$src2)>;
5004 def : Pat<(or VR128X:$src1,
5005 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5006 (VPORQZ128rmb VR128X:$src1, addr:$src2)>;
5007 def : Pat<(xor VR128X:$src1,
5008 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5009 (VPXORQZ128rmb VR128X:$src1, addr:$src2)>;
5010 def : Pat<(X86andnp VR128X:$src1,
5011 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5012 (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>;
5014 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5015 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5016 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5017 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5019 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5020 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5021 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5022 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5024 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5025 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5026 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5027 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5029 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5030 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5031 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5032 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5034 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5035 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5036 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5037 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5039 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5040 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5041 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5042 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5044 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5045 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5046 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5047 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5049 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5050 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5051 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5052 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5054 def : Pat<(and VR256X:$src1,
5055 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5056 (VPANDDZ256rmb VR256X:$src1, addr:$src2)>;
5057 def : Pat<(or VR256X:$src1,
5058 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5059 (VPORDZ256rmb VR256X:$src1, addr:$src2)>;
5060 def : Pat<(xor VR256X:$src1,
5061 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5062 (VPXORDZ256rmb VR256X:$src1, addr:$src2)>;
5063 def : Pat<(X86andnp VR256X:$src1,
5064 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5065 (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>;
5067 def : Pat<(and VR256X:$src1,
5068 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5069 (VPANDQZ256rmb VR256X:$src1, addr:$src2)>;
5070 def : Pat<(or VR256X:$src1,
5071 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5072 (VPORQZ256rmb VR256X:$src1, addr:$src2)>;
5073 def : Pat<(xor VR256X:$src1,
5074 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5075 (VPXORQZ256rmb VR256X:$src1, addr:$src2)>;
5076 def : Pat<(X86andnp VR256X:$src1,
5077 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5078 (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>;
5081 let Predicates = [HasAVX512] in {
5082 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5083 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5084 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5085 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5087 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5088 (VPORQZrr VR512:$src1, VR512:$src2)>;
5089 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5090 (VPORQZrr VR512:$src1, VR512:$src2)>;
5092 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5093 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5094 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5095 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5097 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5098 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5099 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5100 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5102 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5103 (VPANDQZrm VR512:$src1, addr:$src2)>;
5104 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5105 (VPANDQZrm VR512:$src1, addr:$src2)>;
5107 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5108 (VPORQZrm VR512:$src1, addr:$src2)>;
5109 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5110 (VPORQZrm VR512:$src1, addr:$src2)>;
5112 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5113 (VPXORQZrm VR512:$src1, addr:$src2)>;
5114 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5115 (VPXORQZrm VR512:$src1, addr:$src2)>;
5117 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5118 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5119 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5120 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5122 def : Pat<(and VR512:$src1,
5123 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5124 (VPANDDZrmb VR512:$src1, addr:$src2)>;
5125 def : Pat<(or VR512:$src1,
5126 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5127 (VPORDZrmb VR512:$src1, addr:$src2)>;
5128 def : Pat<(xor VR512:$src1,
5129 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5130 (VPXORDZrmb VR512:$src1, addr:$src2)>;
5131 def : Pat<(X86andnp VR512:$src1,
5132 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5133 (VPANDNDZrmb VR512:$src1, addr:$src2)>;
5135 def : Pat<(and VR512:$src1,
5136 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5137 (VPANDQZrmb VR512:$src1, addr:$src2)>;
5138 def : Pat<(or VR512:$src1,
5139 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5140 (VPORQZrmb VR512:$src1, addr:$src2)>;
5141 def : Pat<(xor VR512:$src1,
5142 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5143 (VPXORQZrmb VR512:$src1, addr:$src2)>;
5144 def : Pat<(X86andnp VR512:$src1,
5145 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5146 (VPANDNQZrmb VR512:$src1, addr:$src2)>;
5149 // Patterns to catch vselect with different type than logic op.
5150 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5152 X86VectorVTInfo IntInfo> {
5153 // Masked register-register logical operations.
5154 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5155 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5157 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5158 _.RC:$src1, _.RC:$src2)>;
5160 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5161 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5163 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5166 // Masked register-memory logical operations.
5167 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5168 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5169 (load addr:$src2)))),
5171 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5172 _.RC:$src1, addr:$src2)>;
5173 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5174 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5175 (load addr:$src2)))),
5177 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5181 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5183 X86VectorVTInfo IntInfo> {
5184 // Register-broadcast logical operations.
5185 def : Pat<(IntInfo.VT (OpNode _.RC:$src1,
5186 (bitconvert (_.VT (X86VBroadcast
5187 (_.ScalarLdFrag addr:$src2)))))),
5188 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5189 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5191 (IntInfo.VT (OpNode _.RC:$src1,
5194 (_.ScalarLdFrag addr:$src2))))))),
5196 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5197 _.RC:$src1, addr:$src2)>;
5198 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5200 (IntInfo.VT (OpNode _.RC:$src1,
5203 (_.ScalarLdFrag addr:$src2))))))),
5205 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5206 _.RC:$src1, addr:$src2)>;
5209 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5210 AVX512VLVectorVTInfo SelectInfo,
5211 AVX512VLVectorVTInfo IntInfo> {
5212 let Predicates = [HasVLX] in {
5213 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5215 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5218 let Predicates = [HasAVX512] in {
5219 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5224 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5225 AVX512VLVectorVTInfo SelectInfo,
5226 AVX512VLVectorVTInfo IntInfo> {
5227 let Predicates = [HasVLX] in {
5228 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5229 SelectInfo.info128, IntInfo.info128>;
5230 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5231 SelectInfo.info256, IntInfo.info256>;
5233 let Predicates = [HasAVX512] in {
5234 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5235 SelectInfo.info512, IntInfo.info512>;
5239 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5240 // i64 vselect with i32/i16/i8 logic op
5241 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5243 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5245 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5248 // i32 vselect with i64/i16/i8 logic op
5249 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5251 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5253 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5256 // f32 vselect with i64/i32/i16/i8 logic op
5257 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5259 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5261 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5263 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5266 // f64 vselect with i64/i32/i16/i8 logic op
5267 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5269 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5271 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5273 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5276 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5279 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5284 defm : avx512_logical_lowering_types<"VPAND", and>;
5285 defm : avx512_logical_lowering_types<"VPOR", or>;
5286 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5287 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5289 //===----------------------------------------------------------------------===//
5290 // AVX-512 FP arithmetic
5291 //===----------------------------------------------------------------------===//
5293 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5294 SDNode OpNode, SDNode VecNode,
5295 X86FoldableSchedWrite sched, bit IsCommutable> {
5296 let ExeDomain = _.ExeDomain in {
5297 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5298 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5299 "$src2, $src1", "$src1, $src2",
5300 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5303 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5304 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5305 "$src2, $src1", "$src1, $src2",
5306 (_.VT (VecNode _.RC:$src1,
5307 _.ScalarIntMemCPat:$src2))>,
5308 Sched<[sched.Folded, sched.ReadAfterFold]>;
5309 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5310 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5311 (ins _.FRC:$src1, _.FRC:$src2),
5312 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5313 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5315 let isCommutable = IsCommutable;
5317 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5318 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5319 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5320 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5321 (_.ScalarLdFrag addr:$src2)))]>,
5322 Sched<[sched.Folded, sched.ReadAfterFold]>;
5327 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5328 SDNode VecNode, X86FoldableSchedWrite sched,
5329 bit IsCommutable = 0> {
5330 let ExeDomain = _.ExeDomain in
5331 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5332 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5333 "$rc, $src2, $src1", "$src1, $src2, $rc",
5334 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5336 EVEX_B, EVEX_RC, Sched<[sched]>;
5338 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5339 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5340 X86FoldableSchedWrite sched, bit IsCommutable> {
5341 let ExeDomain = _.ExeDomain in {
5342 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5343 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5344 "$src2, $src1", "$src1, $src2",
5345 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5348 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5349 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5350 "$src2, $src1", "$src1, $src2",
5351 (_.VT (VecNode _.RC:$src1,
5352 _.ScalarIntMemCPat:$src2))>,
5353 Sched<[sched.Folded, sched.ReadAfterFold]>;
5355 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5356 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5357 (ins _.FRC:$src1, _.FRC:$src2),
5358 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5359 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5361 let isCommutable = IsCommutable;
5363 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5364 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5365 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5366 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5367 (_.ScalarLdFrag addr:$src2)))]>,
5368 Sched<[sched.Folded, sched.ReadAfterFold]>;
5371 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5372 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5373 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5374 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5375 EVEX_B, Sched<[sched]>;
5379 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5380 SDNode VecNode, SDNode RndNode,
5381 X86SchedWriteSizes sched, bit IsCommutable> {
5382 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5383 sched.PS.Scl, IsCommutable>,
5384 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5385 sched.PS.Scl, IsCommutable>,
5386 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5387 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5388 sched.PD.Scl, IsCommutable>,
5389 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5390 sched.PD.Scl, IsCommutable>,
5391 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5394 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5395 SDNode VecNode, SDNode SaeNode,
5396 X86SchedWriteSizes sched, bit IsCommutable> {
5397 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5398 VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5399 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5400 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5401 VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5402 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5404 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
5405 SchedWriteFAddSizes, 1>;
5406 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
5407 SchedWriteFMulSizes, 1>;
5408 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
5409 SchedWriteFAddSizes, 0>;
5410 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
5411 SchedWriteFDivSizes, 0>;
5412 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5413 SchedWriteFCmpSizes, 0>;
5414 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5415 SchedWriteFCmpSizes, 0>;
5417 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5418 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5419 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5420 X86VectorVTInfo _, SDNode OpNode,
5421 X86FoldableSchedWrite sched> {
5422 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5423 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5424 (ins _.FRC:$src1, _.FRC:$src2),
5425 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5426 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5428 let isCommutable = 1;
5430 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5431 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5432 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5433 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5434 (_.ScalarLdFrag addr:$src2)))]>,
5435 Sched<[sched.Folded, sched.ReadAfterFold]>;
5438 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5439 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5440 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5442 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5443 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5444 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5446 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5447 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5448 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5450 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5451 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5452 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5454 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5455 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5457 bit IsKCommutable = IsCommutable> {
5458 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5459 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5460 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5461 "$src2, $src1", "$src1, $src2",
5462 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5463 IsKCommutable, IsKCommutable>,
5464 EVEX_4V, Sched<[sched]>;
5465 let mayLoad = 1 in {
5466 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5467 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5468 "$src2, $src1", "$src1, $src2",
5469 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5470 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5471 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5472 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5473 "${src2}"##_.BroadcastStr##", $src1",
5474 "$src1, ${src2}"##_.BroadcastStr,
5475 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5476 (_.ScalarLdFrag addr:$src2))))>,
5478 Sched<[sched.Folded, sched.ReadAfterFold]>;
5483 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5484 SDPatternOperator OpNodeRnd,
5485 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5486 let ExeDomain = _.ExeDomain in
5487 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5488 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5489 "$rc, $src2, $src1", "$src1, $src2, $rc",
5490 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5491 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5494 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5495 SDPatternOperator OpNodeSAE,
5496 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5497 let ExeDomain = _.ExeDomain in
5498 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5499 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5500 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5501 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5502 EVEX_4V, EVEX_B, Sched<[sched]>;
5505 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5506 Predicate prd, X86SchedWriteSizes sched,
5507 bit IsCommutable = 0,
5508 bit IsPD128Commutable = IsCommutable> {
5509 let Predicates = [prd] in {
5510 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5511 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5512 EVEX_CD8<32, CD8VF>;
5513 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5514 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5515 EVEX_CD8<64, CD8VF>;
5518 // Define only if AVX512VL feature is present.
5519 let Predicates = [prd, HasVLX] in {
5520 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5521 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5522 EVEX_CD8<32, CD8VF>;
5523 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5524 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5525 EVEX_CD8<32, CD8VF>;
5526 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5527 sched.PD.XMM, IsPD128Commutable,
5528 IsCommutable>, EVEX_V128, PD, VEX_W,
5529 EVEX_CD8<64, CD8VF>;
5530 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5531 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5532 EVEX_CD8<64, CD8VF>;
5536 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5537 X86SchedWriteSizes sched> {
5538 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5540 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5541 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5543 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5546 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5547 X86SchedWriteSizes sched> {
5548 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5550 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5551 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5553 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5556 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5557 SchedWriteFAddSizes, 1>,
5558 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5559 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5560 SchedWriteFMulSizes, 1>,
5561 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5562 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5563 SchedWriteFAddSizes>,
5564 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5565 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5566 SchedWriteFDivSizes>,
5567 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5568 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5569 SchedWriteFCmpSizes, 0>,
5570 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5571 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5572 SchedWriteFCmpSizes, 0>,
5573 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5574 let isCodeGenOnly = 1 in {
5575 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5576 SchedWriteFCmpSizes, 1>;
5577 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5578 SchedWriteFCmpSizes, 1>;
5580 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5581 SchedWriteFLogicSizes, 1>;
5582 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5583 SchedWriteFLogicSizes, 0>;
5584 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5585 SchedWriteFLogicSizes, 1>;
5586 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5587 SchedWriteFLogicSizes, 1>;
5589 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5590 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5591 let ExeDomain = _.ExeDomain in {
5592 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5593 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5594 "$src2, $src1", "$src1, $src2",
5595 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5596 EVEX_4V, Sched<[sched]>;
5597 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5598 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5599 "$src2, $src1", "$src1, $src2",
5600 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5601 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5602 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5603 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5604 "${src2}"##_.BroadcastStr##", $src1",
5605 "$src1, ${src2}"##_.BroadcastStr,
5606 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5607 (_.ScalarLdFrag addr:$src2))))>,
5608 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5612 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5613 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5614 let ExeDomain = _.ExeDomain in {
5615 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5616 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5617 "$src2, $src1", "$src1, $src2",
5618 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5620 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5621 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5622 "$src2, $src1", "$src1, $src2",
5623 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
5624 Sched<[sched.Folded, sched.ReadAfterFold]>;
5628 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5629 X86SchedWriteWidths sched> {
5630 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5631 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5632 EVEX_V512, EVEX_CD8<32, CD8VF>;
5633 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5634 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5635 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5636 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5637 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
5638 X86scalefsRnd, sched.Scl>,
5639 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5640 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5641 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
5642 X86scalefsRnd, sched.Scl>,
5643 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5645 // Define only if AVX512VL feature is present.
5646 let Predicates = [HasVLX] in {
5647 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5648 EVEX_V128, EVEX_CD8<32, CD8VF>;
5649 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5650 EVEX_V256, EVEX_CD8<32, CD8VF>;
5651 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5652 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5653 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5654 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5657 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5658 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5660 //===----------------------------------------------------------------------===//
5661 // AVX-512 VPTESTM instructions
5662 //===----------------------------------------------------------------------===//
5664 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5665 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5667 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5668 // There are just too many permuations due to commutability and bitcasts.
5669 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5670 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5671 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5672 "$src2, $src1", "$src1, $src2",
5673 (null_frag), (null_frag), 1>,
5674 EVEX_4V, Sched<[sched]>;
5676 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5677 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5678 "$src2, $src1", "$src1, $src2",
5679 (null_frag), (null_frag)>,
5680 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5681 Sched<[sched.Folded, sched.ReadAfterFold]>;
5685 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5686 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5687 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5688 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5689 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5690 "${src2}"##_.BroadcastStr##", $src1",
5691 "$src1, ${src2}"##_.BroadcastStr,
5692 (null_frag), (null_frag)>,
5693 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5694 Sched<[sched.Folded, sched.ReadAfterFold]>;
5697 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5698 X86SchedWriteWidths sched,
5699 AVX512VLVectorVTInfo _> {
5700 let Predicates = [HasAVX512] in
5701 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5702 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5704 let Predicates = [HasAVX512, HasVLX] in {
5705 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5706 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5707 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5708 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5712 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5713 X86SchedWriteWidths sched> {
5714 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5716 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5717 avx512vl_i64_info>, VEX_W;
5720 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5721 X86SchedWriteWidths sched> {
5722 let Predicates = [HasBWI] in {
5723 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5724 v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5725 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5726 v64i8_info, NAME#"B">, EVEX_V512;
5728 let Predicates = [HasVLX, HasBWI] in {
5730 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5731 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5732 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5733 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5734 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5735 v32i8x_info, NAME#"B">, EVEX_V256;
5736 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5737 v16i8x_info, NAME#"B">, EVEX_V128;
5741 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5742 X86SchedWriteWidths sched> :
5743 avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5744 avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5746 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5747 SchedWriteVecLogic>, T8PD;
5748 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5749 SchedWriteVecLogic>, T8XS;
5751 //===----------------------------------------------------------------------===//
5752 // AVX-512 Shift instructions
5753 //===----------------------------------------------------------------------===//
5755 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5756 string OpcodeStr, SDNode OpNode,
5757 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5758 let ExeDomain = _.ExeDomain in {
5759 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5760 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5761 "$src2, $src1", "$src1, $src2",
5762 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
5764 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5765 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5766 "$src2, $src1", "$src1, $src2",
5767 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5769 Sched<[sched.Folded]>;
5773 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5774 string OpcodeStr, SDNode OpNode,
5775 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5776 let ExeDomain = _.ExeDomain in
5777 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5778 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5779 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5780 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
5781 EVEX_B, Sched<[sched.Folded]>;
5784 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5785 X86FoldableSchedWrite sched, ValueType SrcVT,
5786 X86VectorVTInfo _> {
5787 // src2 is always 128-bit
5788 let ExeDomain = _.ExeDomain in {
5789 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5790 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5791 "$src2, $src1", "$src1, $src2",
5792 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5793 AVX512BIBase, EVEX_4V, Sched<[sched]>;
5794 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5795 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5796 "$src2, $src1", "$src1, $src2",
5797 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5799 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5803 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5804 X86SchedWriteWidths sched, ValueType SrcVT,
5805 AVX512VLVectorVTInfo VTInfo,
5807 let Predicates = [prd] in
5808 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5809 VTInfo.info512>, EVEX_V512,
5810 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5811 let Predicates = [prd, HasVLX] in {
5812 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5813 VTInfo.info256>, EVEX_V256,
5814 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5815 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5816 VTInfo.info128>, EVEX_V128,
5817 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5821 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5822 string OpcodeStr, SDNode OpNode,
5823 X86SchedWriteWidths sched,
5824 bit NotEVEX2VEXConvertibleQ = 0> {
5825 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5826 avx512vl_i32_info, HasAVX512>;
5827 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5828 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5829 avx512vl_i64_info, HasAVX512>, VEX_W;
5830 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5831 avx512vl_i16_info, HasBWI>;
5834 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5835 string OpcodeStr, SDNode OpNode,
5836 X86SchedWriteWidths sched,
5837 AVX512VLVectorVTInfo VTInfo> {
5838 let Predicates = [HasAVX512] in
5839 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5840 sched.ZMM, VTInfo.info512>,
5841 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5842 VTInfo.info512>, EVEX_V512;
5843 let Predicates = [HasAVX512, HasVLX] in {
5844 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5845 sched.YMM, VTInfo.info256>,
5846 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5847 VTInfo.info256>, EVEX_V256;
5848 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5849 sched.XMM, VTInfo.info128>,
5850 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5851 VTInfo.info128>, EVEX_V128;
5855 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5856 string OpcodeStr, SDNode OpNode,
5857 X86SchedWriteWidths sched> {
5858 let Predicates = [HasBWI] in
5859 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5860 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5861 let Predicates = [HasVLX, HasBWI] in {
5862 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5863 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5864 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5865 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5869 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5870 Format ImmFormR, Format ImmFormM,
5871 string OpcodeStr, SDNode OpNode,
5872 X86SchedWriteWidths sched,
5873 bit NotEVEX2VEXConvertibleQ = 0> {
5874 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5875 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5876 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5877 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5878 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5881 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5882 SchedWriteVecShiftImm>,
5883 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5884 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5886 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5887 SchedWriteVecShiftImm>,
5888 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5889 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5891 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5892 SchedWriteVecShiftImm, 1>,
5893 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5894 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5896 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5897 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5898 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5899 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5901 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5902 SchedWriteVecShift>;
5903 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5904 SchedWriteVecShift, 1>;
5905 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5906 SchedWriteVecShift>;
5908 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5909 let Predicates = [HasAVX512, NoVLX] in {
5910 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5911 (EXTRACT_SUBREG (v8i64
5913 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5914 VR128X:$src2)), sub_ymm)>;
5916 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5917 (EXTRACT_SUBREG (v8i64
5919 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5920 VR128X:$src2)), sub_xmm)>;
5922 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5923 (EXTRACT_SUBREG (v8i64
5925 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5926 imm:$src2)), sub_ymm)>;
5928 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5929 (EXTRACT_SUBREG (v8i64
5931 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5932 imm:$src2)), sub_xmm)>;
5935 //===-------------------------------------------------------------------===//
5936 // Variable Bit Shifts
5937 //===-------------------------------------------------------------------===//
5939 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5940 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5941 let ExeDomain = _.ExeDomain in {
5942 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5943 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5944 "$src2, $src1", "$src1, $src2",
5945 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5946 AVX5128IBase, EVEX_4V, Sched<[sched]>;
5947 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5948 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5949 "$src2, $src1", "$src1, $src2",
5950 (_.VT (OpNode _.RC:$src1,
5951 (_.VT (_.LdFrag addr:$src2))))>,
5952 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5953 Sched<[sched.Folded, sched.ReadAfterFold]>;
5957 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5958 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5959 let ExeDomain = _.ExeDomain in
5960 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5961 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5962 "${src2}"##_.BroadcastStr##", $src1",
5963 "$src1, ${src2}"##_.BroadcastStr,
5964 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5965 (_.ScalarLdFrag addr:$src2)))))>,
5966 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5967 Sched<[sched.Folded, sched.ReadAfterFold]>;
5970 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5971 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5972 let Predicates = [HasAVX512] in
5973 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5974 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5976 let Predicates = [HasAVX512, HasVLX] in {
5977 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5978 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5979 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5980 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5984 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5985 SDNode OpNode, X86SchedWriteWidths sched> {
5986 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5988 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5989 avx512vl_i64_info>, VEX_W;
5992 // Use 512bit version to implement 128/256 bit in case NoVLX.
5993 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5994 SDNode OpNode, list<Predicate> p> {
5995 let Predicates = p in {
5996 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5997 (_.info256.VT _.info256.RC:$src2))),
5999 (!cast<Instruction>(OpcodeStr#"Zrr")
6000 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6001 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6004 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6005 (_.info128.VT _.info128.RC:$src2))),
6007 (!cast<Instruction>(OpcodeStr#"Zrr")
6008 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6009 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6013 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6014 SDNode OpNode, X86SchedWriteWidths sched> {
6015 let Predicates = [HasBWI] in
6016 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6018 let Predicates = [HasVLX, HasBWI] in {
6020 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6022 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6027 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6028 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6030 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6031 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6033 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6034 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6036 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6037 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6039 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6040 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6041 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6042 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6045 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6046 let Predicates = [HasAVX512, NoVLX] in {
6047 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6048 (EXTRACT_SUBREG (v8i64
6050 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6051 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6053 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6054 (EXTRACT_SUBREG (v8i64
6056 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6057 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6060 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6061 (EXTRACT_SUBREG (v16i32
6063 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6064 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6066 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6067 (EXTRACT_SUBREG (v16i32
6069 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6070 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6073 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6074 (EXTRACT_SUBREG (v8i64
6076 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6077 imm:$src2)), sub_xmm)>;
6078 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6079 (EXTRACT_SUBREG (v8i64
6081 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6082 imm:$src2)), sub_ymm)>;
6084 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6085 (EXTRACT_SUBREG (v16i32
6087 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6088 imm:$src2)), sub_xmm)>;
6089 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6090 (EXTRACT_SUBREG (v16i32
6092 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6093 imm:$src2)), sub_ymm)>;
6096 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6097 let Predicates = [HasAVX512, NoVLX] in {
6098 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6099 (EXTRACT_SUBREG (v8i64
6101 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6102 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6104 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6105 (EXTRACT_SUBREG (v8i64
6107 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6108 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6111 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6112 (EXTRACT_SUBREG (v16i32
6114 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6115 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6117 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6118 (EXTRACT_SUBREG (v16i32
6120 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6121 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6124 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6125 (EXTRACT_SUBREG (v8i64
6127 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6128 imm:$src2)), sub_xmm)>;
6129 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6130 (EXTRACT_SUBREG (v8i64
6132 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6133 imm:$src2)), sub_ymm)>;
6135 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6136 (EXTRACT_SUBREG (v16i32
6138 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6139 imm:$src2)), sub_xmm)>;
6140 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6141 (EXTRACT_SUBREG (v16i32
6143 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6144 imm:$src2)), sub_ymm)>;
6147 //===-------------------------------------------------------------------===//
6148 // 1-src variable permutation VPERMW/D/Q
6149 //===-------------------------------------------------------------------===//
6151 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6152 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6153 let Predicates = [HasAVX512] in
6154 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6155 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6157 let Predicates = [HasAVX512, HasVLX] in
6158 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6159 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6162 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6163 string OpcodeStr, SDNode OpNode,
6164 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6165 let Predicates = [HasAVX512] in
6166 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6167 sched, VTInfo.info512>,
6168 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6169 sched, VTInfo.info512>, EVEX_V512;
6170 let Predicates = [HasAVX512, HasVLX] in
6171 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6172 sched, VTInfo.info256>,
6173 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6174 sched, VTInfo.info256>, EVEX_V256;
6177 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6178 Predicate prd, SDNode OpNode,
6179 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6180 let Predicates = [prd] in
6181 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6183 let Predicates = [HasVLX, prd] in {
6184 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6186 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6191 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6192 WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6193 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6194 WriteVarShuffle256, avx512vl_i8_info>;
6196 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6197 WriteVarShuffle256, avx512vl_i32_info>;
6198 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6199 WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6200 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6201 WriteFVarShuffle256, avx512vl_f32_info>;
6202 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6203 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6205 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6206 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6207 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6208 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6209 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6210 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6212 //===----------------------------------------------------------------------===//
6213 // AVX-512 - VPERMIL
6214 //===----------------------------------------------------------------------===//
6216 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6217 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6218 X86VectorVTInfo Ctrl> {
6219 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6220 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6221 "$src2, $src1", "$src1, $src2",
6222 (_.VT (OpNode _.RC:$src1,
6223 (Ctrl.VT Ctrl.RC:$src2)))>,
6224 T8PD, EVEX_4V, Sched<[sched]>;
6225 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6226 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6227 "$src2, $src1", "$src1, $src2",
6230 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6231 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6232 Sched<[sched.Folded, sched.ReadAfterFold]>;
6233 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6234 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6235 "${src2}"##_.BroadcastStr##", $src1",
6236 "$src1, ${src2}"##_.BroadcastStr,
6239 (Ctrl.VT (X86VBroadcast
6240 (Ctrl.ScalarLdFrag addr:$src2)))))>,
6241 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6242 Sched<[sched.Folded, sched.ReadAfterFold]>;
6245 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6246 X86SchedWriteWidths sched,
6247 AVX512VLVectorVTInfo _,
6248 AVX512VLVectorVTInfo Ctrl> {
6249 let Predicates = [HasAVX512] in {
6250 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6251 _.info512, Ctrl.info512>, EVEX_V512;
6253 let Predicates = [HasAVX512, HasVLX] in {
6254 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6255 _.info128, Ctrl.info128>, EVEX_V128;
6256 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6257 _.info256, Ctrl.info256>, EVEX_V256;
6261 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6262 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6263 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6265 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6266 X86VPermilpi, SchedWriteFShuffle, _>,
6267 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6270 let ExeDomain = SSEPackedSingle in
6271 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6273 let ExeDomain = SSEPackedDouble in
6274 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6275 avx512vl_i64_info>, VEX_W1X;
6277 //===----------------------------------------------------------------------===//
6278 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6279 //===----------------------------------------------------------------------===//
6281 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6282 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6283 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6284 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6285 X86PShufhw, SchedWriteShuffle>,
6286 EVEX, AVX512XSIi8Base;
6287 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6288 X86PShuflw, SchedWriteShuffle>,
6289 EVEX, AVX512XDIi8Base;
6291 //===----------------------------------------------------------------------===//
6292 // AVX-512 - VPSHUFB
6293 //===----------------------------------------------------------------------===//
6295 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6296 X86SchedWriteWidths sched> {
6297 let Predicates = [HasBWI] in
6298 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6301 let Predicates = [HasVLX, HasBWI] in {
6302 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6304 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6309 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6310 SchedWriteVarShuffle>, VEX_WIG;
6312 //===----------------------------------------------------------------------===//
6313 // Move Low to High and High to Low packed FP Instructions
6314 //===----------------------------------------------------------------------===//
6316 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6317 (ins VR128X:$src1, VR128X:$src2),
6318 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6319 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6320 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6321 let isCommutable = 1 in
6322 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6323 (ins VR128X:$src1, VR128X:$src2),
6324 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6325 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6326 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6328 //===----------------------------------------------------------------------===//
6329 // VMOVHPS/PD VMOVLPS Instructions
6330 // All patterns was taken from SSS implementation.
6331 //===----------------------------------------------------------------------===//
6333 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6334 SDPatternOperator OpNode,
6335 X86VectorVTInfo _> {
6336 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6337 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6338 (ins _.RC:$src1, f64mem:$src2),
6339 !strconcat(OpcodeStr,
6340 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6344 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6345 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6348 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6349 // SSE1. And MOVLPS pattern is even more complex.
6350 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6351 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6352 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6353 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6354 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6355 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6356 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6357 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6359 let Predicates = [HasAVX512] in {
6361 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6362 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6363 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6364 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6365 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6368 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6369 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6372 let SchedRW = [WriteFStore] in {
6373 let mayStore = 1, hasSideEffects = 0 in
6374 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6375 (ins f64mem:$dst, VR128X:$src),
6376 "vmovhps\t{$src, $dst|$dst, $src}",
6377 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6378 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6379 (ins f64mem:$dst, VR128X:$src),
6380 "vmovhpd\t{$src, $dst|$dst, $src}",
6381 [(store (f64 (extractelt
6382 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6383 (iPTR 0))), addr:$dst)]>,
6384 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6385 let mayStore = 1, hasSideEffects = 0 in
6386 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6387 (ins f64mem:$dst, VR128X:$src),
6388 "vmovlps\t{$src, $dst|$dst, $src}",
6389 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6390 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6391 (ins f64mem:$dst, VR128X:$src),
6392 "vmovlpd\t{$src, $dst|$dst, $src}",
6393 [(store (f64 (extractelt (v2f64 VR128X:$src),
6394 (iPTR 0))), addr:$dst)]>,
6395 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6398 let Predicates = [HasAVX512] in {
6400 def : Pat<(store (f64 (extractelt
6401 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6402 (iPTR 0))), addr:$dst),
6403 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6405 //===----------------------------------------------------------------------===//
6406 // FMA - Fused Multiply Operations
6409 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6410 X86FoldableSchedWrite sched,
6411 X86VectorVTInfo _, string Suff> {
6412 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6413 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6414 (ins _.RC:$src2, _.RC:$src3),
6415 OpcodeStr, "$src3, $src2", "$src2, $src3",
6416 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6417 AVX512FMA3Base, Sched<[sched]>;
6419 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6420 (ins _.RC:$src2, _.MemOp:$src3),
6421 OpcodeStr, "$src3, $src2", "$src2, $src3",
6422 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6423 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6425 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6426 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6427 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6428 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6430 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6431 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6435 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6436 X86FoldableSchedWrite sched,
6437 X86VectorVTInfo _, string Suff> {
6438 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6439 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6440 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6441 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6442 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6443 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6446 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6447 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6448 AVX512VLVectorVTInfo _, string Suff> {
6449 let Predicates = [HasAVX512] in {
6450 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6452 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6454 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6456 let Predicates = [HasVLX, HasAVX512] in {
6457 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6459 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6460 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6462 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6466 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6468 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6469 SchedWriteFMA, avx512vl_f32_info, "PS">;
6470 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6471 SchedWriteFMA, avx512vl_f64_info, "PD">,
6475 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6476 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6477 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6478 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6479 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6480 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6483 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6484 X86FoldableSchedWrite sched,
6485 X86VectorVTInfo _, string Suff> {
6486 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6487 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6488 (ins _.RC:$src2, _.RC:$src3),
6489 OpcodeStr, "$src3, $src2", "$src2, $src3",
6490 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6491 vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6493 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6494 (ins _.RC:$src2, _.MemOp:$src3),
6495 OpcodeStr, "$src3, $src2", "$src2, $src3",
6496 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6497 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6499 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6500 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6501 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6502 "$src2, ${src3}"##_.BroadcastStr,
6503 (_.VT (OpNode _.RC:$src2,
6504 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6505 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6506 Sched<[sched.Folded, sched.ReadAfterFold]>;
6510 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6511 X86FoldableSchedWrite sched,
6512 X86VectorVTInfo _, string Suff> {
6513 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6514 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6515 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6516 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6517 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6519 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6522 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6523 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6524 AVX512VLVectorVTInfo _, string Suff> {
6525 let Predicates = [HasAVX512] in {
6526 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6528 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6530 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6532 let Predicates = [HasVLX, HasAVX512] in {
6533 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6535 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6536 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6538 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6542 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6543 SDNode OpNodeRnd > {
6544 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6545 SchedWriteFMA, avx512vl_f32_info, "PS">;
6546 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6547 SchedWriteFMA, avx512vl_f64_info, "PD">,
6551 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6552 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6553 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6554 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6555 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6556 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6558 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6559 X86FoldableSchedWrite sched,
6560 X86VectorVTInfo _, string Suff> {
6561 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6562 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6563 (ins _.RC:$src2, _.RC:$src3),
6564 OpcodeStr, "$src3, $src2", "$src2, $src3",
6565 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6566 AVX512FMA3Base, Sched<[sched]>;
6568 // Pattern is 312 order so that the load is in a different place from the
6569 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6570 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6571 (ins _.RC:$src2, _.MemOp:$src3),
6572 OpcodeStr, "$src3, $src2", "$src2, $src3",
6573 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6574 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6576 // Pattern is 312 order so that the load is in a different place from the
6577 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6578 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6579 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6580 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6581 "$src2, ${src3}"##_.BroadcastStr,
6582 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6583 _.RC:$src1, _.RC:$src2)), 1, 0>,
6584 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6588 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6589 X86FoldableSchedWrite sched,
6590 X86VectorVTInfo _, string Suff> {
6591 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6592 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6593 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6594 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6595 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6597 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6600 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6601 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6602 AVX512VLVectorVTInfo _, string Suff> {
6603 let Predicates = [HasAVX512] in {
6604 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6606 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6608 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6610 let Predicates = [HasVLX, HasAVX512] in {
6611 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6613 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6614 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6616 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6620 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6621 SDNode OpNodeRnd > {
6622 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6623 SchedWriteFMA, avx512vl_f32_info, "PS">;
6624 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6625 SchedWriteFMA, avx512vl_f64_info, "PD">,
6629 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6630 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6631 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6632 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6633 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6634 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6637 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6638 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6639 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6640 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6641 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6642 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6643 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6646 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6647 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6648 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6649 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6651 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6652 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6653 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6654 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6656 let isCodeGenOnly = 1, isCommutable = 1 in {
6657 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6658 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6659 !strconcat(OpcodeStr,
6660 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6661 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6662 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6663 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6664 !strconcat(OpcodeStr,
6665 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6666 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6668 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6669 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6670 !strconcat(OpcodeStr,
6671 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6672 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6673 Sched<[SchedWriteFMA.Scl]>;
6674 }// isCodeGenOnly = 1
6675 }// Constraints = "$src1 = $dst"
6678 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6679 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6680 X86VectorVTInfo _, string SUFF> {
6681 let ExeDomain = _.ExeDomain in {
6682 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6683 // Operands for intrinsic are in 123 order to preserve passthu
6685 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6687 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6688 (_.ScalarLdFrag addr:$src3)))),
6689 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6690 _.FRC:$src3, (i32 timm:$rc)))), 0>;
6692 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6693 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6695 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6696 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6697 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6698 _.FRC:$src1, (i32 timm:$rc)))), 1>;
6700 // One pattern is 312 order so that the load is in a different place from the
6701 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6702 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6703 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6705 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6706 _.FRC:$src1, _.FRC:$src2))),
6707 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6708 _.FRC:$src2, (i32 timm:$rc)))), 1>;
6712 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6713 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6714 let Predicates = [HasAVX512] in {
6715 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6716 OpNodeRnd, f32x_info, "SS">,
6717 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6718 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6719 OpNodeRnd, f64x_info, "SD">,
6720 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6724 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6725 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6726 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6727 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6729 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6730 string Suffix, SDNode Move,
6731 X86VectorVTInfo _, PatLeaf ZeroFP> {
6732 let Predicates = [HasAVX512] in {
6733 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6735 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6737 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6738 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6739 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6741 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6742 (Op _.FRC:$src2, _.FRC:$src3,
6743 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6744 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6745 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6746 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6748 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6750 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6751 (_.ScalarLdFrag addr:$src3)))))),
6752 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6753 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6756 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6757 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6758 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6759 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6760 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6763 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6764 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6765 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6766 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6767 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6770 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6771 (X86selects VK1WM:$mask,
6773 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6775 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6776 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6777 VR128X:$src1, VK1WM:$mask,
6778 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6779 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6781 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6782 (X86selects VK1WM:$mask,
6784 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6785 (_.ScalarLdFrag addr:$src3)),
6786 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6787 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6788 VR128X:$src1, VK1WM:$mask,
6789 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6791 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6792 (X86selects VK1WM:$mask,
6793 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6794 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6795 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6796 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6797 VR128X:$src1, VK1WM:$mask,
6798 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6800 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6801 (X86selects VK1WM:$mask,
6802 (Op _.FRC:$src2, _.FRC:$src3,
6803 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6804 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6805 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6806 VR128X:$src1, VK1WM:$mask,
6807 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6808 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6810 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6811 (X86selects VK1WM:$mask,
6812 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6813 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6814 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6815 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6816 VR128X:$src1, VK1WM:$mask,
6817 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6819 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6820 (X86selects VK1WM:$mask,
6822 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6824 (_.EltVT ZeroFP)))))),
6825 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6826 VR128X:$src1, VK1WM:$mask,
6827 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6828 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6830 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6831 (X86selects VK1WM:$mask,
6832 (Op _.FRC:$src2, _.FRC:$src3,
6833 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6834 (_.EltVT ZeroFP)))))),
6835 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6836 VR128X:$src1, VK1WM:$mask,
6837 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6838 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6840 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6841 (X86selects VK1WM:$mask,
6843 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6844 (_.ScalarLdFrag addr:$src3)),
6845 (_.EltVT ZeroFP)))))),
6846 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6847 VR128X:$src1, VK1WM:$mask,
6848 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6850 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6851 (X86selects VK1WM:$mask,
6852 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6853 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6854 (_.EltVT ZeroFP)))))),
6855 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6856 VR128X:$src1, VK1WM:$mask,
6857 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6859 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6860 (X86selects VK1WM:$mask,
6861 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6862 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6863 (_.EltVT ZeroFP)))))),
6864 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6865 VR128X:$src1, VK1WM:$mask,
6866 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6868 // Patterns with rounding mode.
6869 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6871 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6872 _.FRC:$src3, (i32 timm:$rc)))))),
6873 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6874 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6875 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6877 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6878 (RndOp _.FRC:$src2, _.FRC:$src3,
6879 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6880 (i32 timm:$rc)))))),
6881 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6882 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6883 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6885 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6886 (X86selects VK1WM:$mask,
6888 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6889 _.FRC:$src3, (i32 timm:$rc)),
6890 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6891 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6892 VR128X:$src1, VK1WM:$mask,
6893 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6894 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6896 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6897 (X86selects VK1WM:$mask,
6898 (RndOp _.FRC:$src2, _.FRC:$src3,
6899 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6901 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6902 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6903 VR128X:$src1, VK1WM:$mask,
6904 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6905 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6907 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6908 (X86selects VK1WM:$mask,
6910 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6911 _.FRC:$src3, (i32 timm:$rc)),
6912 (_.EltVT ZeroFP)))))),
6913 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6914 VR128X:$src1, VK1WM:$mask,
6915 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6916 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6918 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6919 (X86selects VK1WM:$mask,
6920 (RndOp _.FRC:$src2, _.FRC:$src3,
6921 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6923 (_.EltVT ZeroFP)))))),
6924 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6925 VR128X:$src1, VK1WM:$mask,
6926 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6927 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6931 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
6932 X86Movss, v4f32x_info, fp32imm0>;
6933 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6934 X86Movss, v4f32x_info, fp32imm0>;
6935 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6936 X86Movss, v4f32x_info, fp32imm0>;
6937 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6938 X86Movss, v4f32x_info, fp32imm0>;
6940 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
6941 X86Movsd, v2f64x_info, fp64imm0>;
6942 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6943 X86Movsd, v2f64x_info, fp64imm0>;
6944 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6945 X86Movsd, v2f64x_info, fp64imm0>;
6946 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6947 X86Movsd, v2f64x_info, fp64imm0>;
6949 //===----------------------------------------------------------------------===//
6950 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6951 //===----------------------------------------------------------------------===//
6952 let Constraints = "$src1 = $dst" in {
6953 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6954 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6955 // NOTE: The SDNode have the multiply operands first with the add last.
6956 // This enables commuted load patterns to be autogenerated by tablegen.
6957 let ExeDomain = _.ExeDomain in {
6958 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6959 (ins _.RC:$src2, _.RC:$src3),
6960 OpcodeStr, "$src3, $src2", "$src2, $src3",
6961 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6962 AVX512FMA3Base, Sched<[sched]>;
6964 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6965 (ins _.RC:$src2, _.MemOp:$src3),
6966 OpcodeStr, "$src3, $src2", "$src2, $src3",
6967 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6968 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6970 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6971 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6972 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6973 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6975 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
6977 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6980 } // Constraints = "$src1 = $dst"
6982 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6983 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6984 let Predicates = [HasIFMA] in {
6985 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6986 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6988 let Predicates = [HasVLX, HasIFMA] in {
6989 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6990 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6991 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6992 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6996 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6997 SchedWriteVecIMul, avx512vl_i64_info>,
6999 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7000 SchedWriteVecIMul, avx512vl_i64_info>,
7003 //===----------------------------------------------------------------------===//
7004 // AVX-512 Scalar convert from sign integer to float/double
7005 //===----------------------------------------------------------------------===//
7007 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7008 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7009 X86MemOperand x86memop, PatFrag ld_frag, string asm,
7011 let hasSideEffects = 0, isCodeGenOnly = 1 in {
7012 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7013 (ins DstVT.FRC:$src1, SrcRC:$src),
7014 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7015 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7017 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7018 (ins DstVT.FRC:$src1, x86memop:$src),
7019 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7020 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7021 } // hasSideEffects = 0
7022 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7023 (ins DstVT.RC:$src1, SrcRC:$src2),
7024 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7025 [(set DstVT.RC:$dst,
7026 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7027 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7029 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7030 (ins DstVT.RC:$src1, x86memop:$src2),
7031 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7032 [(set DstVT.RC:$dst,
7033 (OpNode (DstVT.VT DstVT.RC:$src1),
7034 (ld_frag addr:$src2)))]>,
7035 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7036 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7037 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7038 DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7041 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7042 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7043 X86VectorVTInfo DstVT, string asm,
7045 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7046 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7048 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7049 [(set DstVT.RC:$dst,
7050 (OpNode (DstVT.VT DstVT.RC:$src1),
7053 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7054 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7055 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7056 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7059 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7060 X86FoldableSchedWrite sched,
7061 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7062 X86MemOperand x86memop, PatFrag ld_frag,
7063 string asm, string mem> {
7064 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7065 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7066 ld_frag, asm, mem>, VEX_LIG;
7069 let Predicates = [HasAVX512] in {
7070 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7072 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7073 XS, EVEX_CD8<32, CD8VT1>;
7074 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7076 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7077 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7078 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7079 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
7080 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7081 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7083 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7084 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7086 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7087 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7088 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7089 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7091 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7092 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7093 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7094 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7095 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7096 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7097 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7098 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7100 def : Pat<(f32 (sint_to_fp GR32:$src)),
7101 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7102 def : Pat<(f32 (sint_to_fp GR64:$src)),
7103 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7104 def : Pat<(f64 (sint_to_fp GR32:$src)),
7105 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7106 def : Pat<(f64 (sint_to_fp GR64:$src)),
7107 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7109 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7111 v4f32x_info, i32mem, loadi32,
7112 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7113 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7115 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7116 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7117 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7118 i32mem, loadi32, "cvtusi2sd", "l">,
7119 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7120 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7122 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7123 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7125 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7126 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7127 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7128 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7130 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7131 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7132 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7133 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7134 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7135 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7136 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7137 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7139 def : Pat<(f32 (uint_to_fp GR32:$src)),
7140 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7141 def : Pat<(f32 (uint_to_fp GR64:$src)),
7142 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7143 def : Pat<(f64 (uint_to_fp GR32:$src)),
7144 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7145 def : Pat<(f64 (uint_to_fp GR64:$src)),
7146 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7149 //===----------------------------------------------------------------------===//
7150 // AVX-512 Scalar convert from float/double to integer
7151 //===----------------------------------------------------------------------===//
7153 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7154 X86VectorVTInfo DstVT, SDNode OpNode,
7156 X86FoldableSchedWrite sched, string asm,
7158 let Predicates = [HasAVX512] in {
7159 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7160 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7161 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7162 EVEX, VEX_LIG, Sched<[sched]>;
7163 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7164 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7165 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7166 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7168 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7169 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7170 [(set DstVT.RC:$dst, (OpNode
7171 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7172 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7173 } // Predicates = [HasAVX512]
7175 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7176 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7177 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7178 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7179 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7180 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7181 SrcVT.IntScalarMemOp:$src), 0, "att">;
7184 // Convert float/double to signed/unsigned int 32/64
7185 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7186 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7187 XS, EVEX_CD8<32, CD8VT1>;
7188 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7189 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7190 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7191 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7192 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7193 XS, EVEX_CD8<32, CD8VT1>;
7194 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7195 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7196 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7197 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7198 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7199 XD, EVEX_CD8<64, CD8VT1>;
7200 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7201 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7202 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7203 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7204 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7205 XD, EVEX_CD8<64, CD8VT1>;
7206 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7207 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7208 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7210 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7211 // which produce unnecessary vmovs{s,d} instructions
7212 let Predicates = [HasAVX512] in {
7213 def : Pat<(v4f32 (X86Movss
7214 (v4f32 VR128X:$dst),
7215 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7216 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7218 def : Pat<(v4f32 (X86Movss
7219 (v4f32 VR128X:$dst),
7220 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7221 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7223 def : Pat<(v4f32 (X86Movss
7224 (v4f32 VR128X:$dst),
7225 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7226 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7228 def : Pat<(v4f32 (X86Movss
7229 (v4f32 VR128X:$dst),
7230 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7231 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7233 def : Pat<(v2f64 (X86Movsd
7234 (v2f64 VR128X:$dst),
7235 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7236 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7238 def : Pat<(v2f64 (X86Movsd
7239 (v2f64 VR128X:$dst),
7240 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7241 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7243 def : Pat<(v2f64 (X86Movsd
7244 (v2f64 VR128X:$dst),
7245 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7246 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7248 def : Pat<(v2f64 (X86Movsd
7249 (v2f64 VR128X:$dst),
7250 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7251 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7253 def : Pat<(v4f32 (X86Movss
7254 (v4f32 VR128X:$dst),
7255 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7256 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7258 def : Pat<(v4f32 (X86Movss
7259 (v4f32 VR128X:$dst),
7260 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7261 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7263 def : Pat<(v4f32 (X86Movss
7264 (v4f32 VR128X:$dst),
7265 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7266 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7268 def : Pat<(v4f32 (X86Movss
7269 (v4f32 VR128X:$dst),
7270 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7271 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7273 def : Pat<(v2f64 (X86Movsd
7274 (v2f64 VR128X:$dst),
7275 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7276 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7278 def : Pat<(v2f64 (X86Movsd
7279 (v2f64 VR128X:$dst),
7280 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7281 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7283 def : Pat<(v2f64 (X86Movsd
7284 (v2f64 VR128X:$dst),
7285 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7286 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7288 def : Pat<(v2f64 (X86Movsd
7289 (v2f64 VR128X:$dst),
7290 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7291 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7292 } // Predicates = [HasAVX512]
7294 // Convert float/double to signed/unsigned int 32/64 with truncation
7295 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7296 X86VectorVTInfo _DstRC, SDNode OpNode,
7297 SDNode OpNodeInt, SDNode OpNodeSAE,
7298 X86FoldableSchedWrite sched, string aliasStr>{
7299 let Predicates = [HasAVX512] in {
7300 let isCodeGenOnly = 1 in {
7301 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7302 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7303 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7304 EVEX, VEX_LIG, Sched<[sched]>;
7305 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7306 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7307 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7308 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7311 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7312 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7313 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7314 EVEX, VEX_LIG, Sched<[sched]>;
7315 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7316 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7317 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7318 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7319 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7320 (ins _SrcRC.IntScalarMemOp:$src),
7321 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7322 [(set _DstRC.RC:$dst,
7323 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7324 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7327 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7328 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7329 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7330 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7331 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7332 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7333 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7336 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7337 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7338 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7339 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7340 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7341 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7342 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7343 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7344 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7345 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7346 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7347 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7349 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7350 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7351 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7352 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7353 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7354 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7355 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7356 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7357 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7358 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7359 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7360 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7362 //===----------------------------------------------------------------------===//
7363 // AVX-512 Convert form float to double and back
7364 //===----------------------------------------------------------------------===//
7366 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7367 X86VectorVTInfo _Src, SDNode OpNode,
7368 X86FoldableSchedWrite sched> {
7369 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7370 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7371 "$src2, $src1", "$src1, $src2",
7372 (_.VT (OpNode (_.VT _.RC:$src1),
7373 (_Src.VT _Src.RC:$src2)))>,
7374 EVEX_4V, VEX_LIG, Sched<[sched]>;
7375 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7376 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7377 "$src2, $src1", "$src1, $src2",
7378 (_.VT (OpNode (_.VT _.RC:$src1),
7379 (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
7381 Sched<[sched.Folded, sched.ReadAfterFold]>;
7383 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7384 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7385 (ins _.FRC:$src1, _Src.FRC:$src2),
7386 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7387 EVEX_4V, VEX_LIG, Sched<[sched]>;
7389 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7390 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7391 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7392 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7396 // Scalar Coversion with SAE - suppress all exceptions
7397 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7398 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7399 X86FoldableSchedWrite sched> {
7400 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7401 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7402 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7403 (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7404 (_Src.VT _Src.RC:$src2)))>,
7405 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7408 // Scalar Conversion with rounding control (RC)
7409 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7410 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7411 X86FoldableSchedWrite sched> {
7412 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7413 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7414 "$rc, $src2, $src1", "$src1, $src2, $rc",
7415 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7416 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7417 EVEX_4V, VEX_LIG, Sched<[sched]>,
7420 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7421 SDNode OpNode, SDNode OpNodeRnd,
7422 X86FoldableSchedWrite sched,
7423 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7424 let Predicates = [HasAVX512] in {
7425 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7426 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7427 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7431 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7432 SDNode OpNode, SDNode OpNodeSAE,
7433 X86FoldableSchedWrite sched,
7434 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7435 let Predicates = [HasAVX512] in {
7436 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7437 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7438 EVEX_CD8<32, CD8VT1>, XS;
7441 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7442 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7444 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7445 X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7448 def : Pat<(f64 (fpextend FR32X:$src)),
7449 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7450 Requires<[HasAVX512]>;
7451 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7452 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7453 Requires<[HasAVX512, OptForSize]>;
7455 def : Pat<(f32 (fpround FR64X:$src)),
7456 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7457 Requires<[HasAVX512]>;
7459 def : Pat<(v4f32 (X86Movss
7460 (v4f32 VR128X:$dst),
7461 (v4f32 (scalar_to_vector
7462 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7463 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7464 Requires<[HasAVX512]>;
7466 def : Pat<(v2f64 (X86Movsd
7467 (v2f64 VR128X:$dst),
7468 (v2f64 (scalar_to_vector
7469 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7470 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7471 Requires<[HasAVX512]>;
7473 //===----------------------------------------------------------------------===//
7474 // AVX-512 Vector convert from signed/unsigned integer to float/double
7475 // and from float/double to signed/unsigned integer
7476 //===----------------------------------------------------------------------===//
7478 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7479 X86VectorVTInfo _Src, SDNode OpNode,
7480 X86FoldableSchedWrite sched,
7481 string Broadcast = _.BroadcastStr,
7482 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7483 RegisterClass MaskRC = _.KRCWM,
7484 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7486 defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
7488 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7489 (ins MaskRC:$mask, _Src.RC:$src),
7490 OpcodeStr, "$src", "$src",
7491 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7492 (vselect MaskRC:$mask,
7493 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7495 vselect, "$src0 = $dst">,
7496 EVEX, Sched<[sched]>;
7498 defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7500 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7501 (ins MaskRC:$mask, MemOp:$src),
7502 OpcodeStr#Alias, "$src", "$src",
7504 (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
7505 vselect, "$src0 = $dst">,
7506 EVEX, Sched<[sched.Folded]>;
7508 defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7509 (ins _Src.ScalarMemOp:$src),
7510 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7511 (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7513 "${src}"##Broadcast, "${src}"##Broadcast,
7514 (_.VT (OpNode (_Src.VT
7515 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
7517 (vselect MaskRC:$mask,
7522 (_Src.ScalarLdFrag addr:$src))))),
7524 vselect, "$src0 = $dst">,
7525 EVEX, EVEX_B, Sched<[sched.Folded]>;
7527 // Coversion with SAE - suppress all exceptions
7528 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7529 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7530 X86FoldableSchedWrite sched> {
7531 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7532 (ins _Src.RC:$src), OpcodeStr,
7533 "{sae}, $src", "$src, {sae}",
7534 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7535 EVEX, EVEX_B, Sched<[sched]>;
7538 // Conversion with rounding control (RC)
7539 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7540 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7541 X86FoldableSchedWrite sched> {
7542 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7543 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7544 "$rc, $src", "$src, $rc",
7545 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7546 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7549 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7550 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7551 X86VectorVTInfo _Src, SDNode OpNode,
7552 X86FoldableSchedWrite sched,
7553 string Broadcast = _.BroadcastStr,
7554 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7555 RegisterClass MaskRC = _.KRCWM>
7556 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
7558 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7560 // Extend Float to Double
7561 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7562 X86SchedWriteWidths sched> {
7563 let Predicates = [HasAVX512] in {
7564 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7565 fpextend, sched.ZMM>,
7566 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7567 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7569 let Predicates = [HasVLX] in {
7570 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7571 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7572 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7573 sched.YMM>, EVEX_V256;
7577 // Truncate Double to Float
7578 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7579 let Predicates = [HasAVX512] in {
7580 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
7581 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7582 X86vfproundRnd, sched.ZMM>, EVEX_V512;
7584 let Predicates = [HasVLX] in {
7585 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7586 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
7588 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
7589 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7592 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7593 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7594 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7595 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7596 VK2WM:$mask, VR128X:$src), 0, "att">;
7597 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
7598 "$dst {${mask}} {z}, $src}",
7599 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7600 VK2WM:$mask, VR128X:$src), 0, "att">;
7601 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7602 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7603 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7604 "$dst {${mask}}, ${src}{1to2}}",
7605 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7606 VK2WM:$mask, f64mem:$src), 0, "att">;
7607 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7608 "$dst {${mask}} {z}, ${src}{1to2}}",
7609 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7610 VK2WM:$mask, f64mem:$src), 0, "att">;
7612 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7613 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7614 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7615 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7616 VK4WM:$mask, VR256X:$src), 0, "att">;
7617 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
7618 "$dst {${mask}} {z}, $src}",
7619 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7620 VK4WM:$mask, VR256X:$src), 0, "att">;
7621 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7622 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7623 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7624 "$dst {${mask}}, ${src}{1to4}}",
7625 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7626 VK4WM:$mask, f64mem:$src), 0, "att">;
7627 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7628 "$dst {${mask}} {z}, ${src}{1to4}}",
7629 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7630 VK4WM:$mask, f64mem:$src), 0, "att">;
7633 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7634 VEX_W, PD, EVEX_CD8<64, CD8VF>;
7635 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7636 PS, EVEX_CD8<32, CD8VH>;
7638 let Predicates = [HasAVX512] in {
7639 def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
7640 (VCVTPD2PSZrr VR512:$src)>;
7641 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7643 (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
7644 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7645 v8f32x_info.ImmAllZerosV),
7646 (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
7648 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7649 (VCVTPD2PSZrm addr:$src)>;
7650 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7652 (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7653 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7654 v8f32x_info.ImmAllZerosV),
7655 (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
7657 def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))),
7658 (VCVTPD2PSZrmb addr:$src)>;
7659 def : Pat<(vselect VK8WM:$mask,
7660 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
7661 (v8f32 VR256X:$src0)),
7662 (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7663 def : Pat<(vselect VK8WM:$mask,
7664 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
7665 v8f32x_info.ImmAllZerosV),
7666 (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
7669 let Predicates = [HasVLX] in {
7670 def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
7671 (VCVTPD2PSZ256rr VR256X:$src)>;
7672 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7674 (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
7675 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7676 v4f32x_info.ImmAllZerosV),
7677 (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
7679 def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
7680 (VCVTPD2PSZ256rm addr:$src)>;
7681 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7683 (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7684 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7685 v4f32x_info.ImmAllZerosV),
7686 (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
7688 def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7689 (VCVTPD2PSZ256rmb addr:$src)>;
7690 def : Pat<(vselect VK4WM:$mask,
7691 (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7693 (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7694 def : Pat<(vselect VK4WM:$mask,
7695 (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7696 v4f32x_info.ImmAllZerosV),
7697 (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
7699 // Special patterns to allow use of X86vmfpround for masking. Instruction
7700 // patterns have been disabled with null_frag.
7701 def : Pat<(X86vfpround (v2f64 VR128X:$src)),
7702 (VCVTPD2PSZ128rr VR128X:$src)>;
7703 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7705 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7706 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7708 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7710 def : Pat<(X86vfpround (loadv2f64 addr:$src)),
7711 (VCVTPD2PSZ128rm addr:$src)>;
7712 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7714 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7715 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7717 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7719 def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))),
7720 (VCVTPD2PSZ128rmb addr:$src)>;
7721 def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
7722 (v4f32 VR128X:$src0), VK2WM:$mask),
7723 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7724 def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
7725 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7726 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7729 // Convert Signed/Unsigned Doubleword to Double
7730 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7731 SDNode OpNode128, X86SchedWriteWidths sched> {
7732 // No rounding in this op
7733 let Predicates = [HasAVX512] in
7734 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7735 sched.ZMM>, EVEX_V512;
7737 let Predicates = [HasVLX] in {
7738 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7739 OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
7740 (v2f64 (OpNode128 (bc_v4i32
7742 (scalar_to_vector (loadi64 addr:$src))))))>,
7744 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7745 sched.YMM>, EVEX_V256;
7749 // Convert Signed/Unsigned Doubleword to Float
7750 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7751 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7752 let Predicates = [HasAVX512] in
7753 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7755 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7756 OpNodeRnd, sched.ZMM>, EVEX_V512;
7758 let Predicates = [HasVLX] in {
7759 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7760 sched.XMM>, EVEX_V128;
7761 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7762 sched.YMM>, EVEX_V256;
7766 // Convert Float to Signed/Unsigned Doubleword with truncation
7767 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7768 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7769 let Predicates = [HasAVX512] in {
7770 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7772 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7773 OpNodeSAE, sched.ZMM>, EVEX_V512;
7775 let Predicates = [HasVLX] in {
7776 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7777 sched.XMM>, EVEX_V128;
7778 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7779 sched.YMM>, EVEX_V256;
7783 // Convert Float to Signed/Unsigned Doubleword
7784 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7785 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7786 let Predicates = [HasAVX512] in {
7787 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7789 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7790 OpNodeRnd, sched.ZMM>, EVEX_V512;
7792 let Predicates = [HasVLX] in {
7793 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7794 sched.XMM>, EVEX_V128;
7795 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7796 sched.YMM>, EVEX_V256;
7800 // Convert Double to Signed/Unsigned Doubleword with truncation
7801 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7802 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7803 let Predicates = [HasAVX512] in {
7804 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7806 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7807 OpNodeSAE, sched.ZMM>, EVEX_V512;
7809 let Predicates = [HasVLX] in {
7810 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7811 // memory forms of these instructions in Asm Parser. They have the same
7812 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7813 // due to the same reason.
7814 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7815 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7817 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7818 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7821 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7822 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7823 VR128X:$src), 0, "att">;
7824 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7825 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7826 VK2WM:$mask, VR128X:$src), 0, "att">;
7827 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7828 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7829 VK2WM:$mask, VR128X:$src), 0, "att">;
7830 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7831 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7832 f64mem:$src), 0, "att">;
7833 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7834 "$dst {${mask}}, ${src}{1to2}}",
7835 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7836 VK2WM:$mask, f64mem:$src), 0, "att">;
7837 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7838 "$dst {${mask}} {z}, ${src}{1to2}}",
7839 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7840 VK2WM:$mask, f64mem:$src), 0, "att">;
7842 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7843 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7844 VR256X:$src), 0, "att">;
7845 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7846 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7847 VK4WM:$mask, VR256X:$src), 0, "att">;
7848 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7849 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7850 VK4WM:$mask, VR256X:$src), 0, "att">;
7851 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7852 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7853 f64mem:$src), 0, "att">;
7854 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7855 "$dst {${mask}}, ${src}{1to4}}",
7856 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7857 VK4WM:$mask, f64mem:$src), 0, "att">;
7858 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7859 "$dst {${mask}} {z}, ${src}{1to4}}",
7860 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7861 VK4WM:$mask, f64mem:$src), 0, "att">;
7864 // Convert Double to Signed/Unsigned Doubleword
7865 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7866 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7867 let Predicates = [HasAVX512] in {
7868 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7870 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7871 OpNodeRnd, sched.ZMM>, EVEX_V512;
7873 let Predicates = [HasVLX] in {
7874 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7875 // memory forms of these instructions in Asm Parcer. They have the same
7876 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7877 // due to the same reason.
7878 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7879 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7881 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7882 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7885 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7886 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7887 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7888 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7889 VK2WM:$mask, VR128X:$src), 0, "att">;
7890 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7891 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7892 VK2WM:$mask, VR128X:$src), 0, "att">;
7893 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7894 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7895 f64mem:$src), 0, "att">;
7896 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7897 "$dst {${mask}}, ${src}{1to2}}",
7898 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7899 VK2WM:$mask, f64mem:$src), 0, "att">;
7900 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7901 "$dst {${mask}} {z}, ${src}{1to2}}",
7902 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7903 VK2WM:$mask, f64mem:$src), 0, "att">;
7905 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7906 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7907 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7908 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7909 VK4WM:$mask, VR256X:$src), 0, "att">;
7910 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7911 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7912 VK4WM:$mask, VR256X:$src), 0, "att">;
7913 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7914 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7915 f64mem:$src), 0, "att">;
7916 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7917 "$dst {${mask}}, ${src}{1to4}}",
7918 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7919 VK4WM:$mask, f64mem:$src), 0, "att">;
7920 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7921 "$dst {${mask}} {z}, ${src}{1to4}}",
7922 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7923 VK4WM:$mask, f64mem:$src), 0, "att">;
7926 // Convert Double to Signed/Unsigned Quardword
7927 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7928 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7929 let Predicates = [HasDQI] in {
7930 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7932 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7933 OpNodeRnd, sched.ZMM>, EVEX_V512;
7935 let Predicates = [HasDQI, HasVLX] in {
7936 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7937 sched.XMM>, EVEX_V128;
7938 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7939 sched.YMM>, EVEX_V256;
7943 // Convert Double to Signed/Unsigned Quardword with truncation
7944 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7945 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7946 let Predicates = [HasDQI] in {
7947 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7949 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7950 OpNodeRnd, sched.ZMM>, EVEX_V512;
7952 let Predicates = [HasDQI, HasVLX] in {
7953 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7954 sched.XMM>, EVEX_V128;
7955 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7956 sched.YMM>, EVEX_V256;
7960 // Convert Signed/Unsigned Quardword to Double
7961 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7962 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7963 let Predicates = [HasDQI] in {
7964 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7966 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7967 OpNodeRnd, sched.ZMM>, EVEX_V512;
7969 let Predicates = [HasDQI, HasVLX] in {
7970 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7971 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7972 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7973 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7977 // Convert Float to Signed/Unsigned Quardword
7978 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7979 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7980 let Predicates = [HasDQI] in {
7981 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7983 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7984 OpNodeRnd, sched.ZMM>, EVEX_V512;
7986 let Predicates = [HasDQI, HasVLX] in {
7987 // Explicitly specified broadcast string, since we take only 2 elements
7988 // from v4f32x_info source
7989 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7990 sched.XMM, "{1to2}", "", f64mem, VK2WM,
7991 (v2i64 (OpNode (bc_v4f32
7993 (scalar_to_vector (loadf64 addr:$src))))))>,
7995 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7996 sched.YMM>, EVEX_V256;
8000 // Convert Float to Signed/Unsigned Quardword with truncation
8001 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8002 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8003 let Predicates = [HasDQI] in {
8004 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
8005 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8006 OpNodeRnd, sched.ZMM>, EVEX_V512;
8008 let Predicates = [HasDQI, HasVLX] in {
8009 // Explicitly specified broadcast string, since we take only 2 elements
8010 // from v4f32x_info source
8011 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8012 sched.XMM, "{1to2}", "", f64mem, VK2WM,
8013 (v2i64 (OpNode (bc_v4f32
8015 (scalar_to_vector (loadf64 addr:$src))))))>,
8017 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8018 sched.YMM>, EVEX_V256;
8022 // Convert Signed/Unsigned Quardword to Float
8023 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8024 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8025 let Predicates = [HasDQI] in {
8026 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8028 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8029 OpNodeRnd, sched.ZMM>, EVEX_V512;
8031 let Predicates = [HasDQI, HasVLX] in {
8032 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8033 // memory forms of these instructions in Asm Parcer. They have the same
8034 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8035 // due to the same reason.
8036 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8037 sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8038 EVEX_V128, NotEVEX2VEXConvertible;
8039 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8040 sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8041 NotEVEX2VEXConvertible;
8044 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8045 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8046 VR128X:$src), 0, "att">;
8047 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8048 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8049 VK2WM:$mask, VR128X:$src), 0, "att">;
8050 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8051 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8052 VK2WM:$mask, VR128X:$src), 0, "att">;
8053 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8054 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8055 i64mem:$src), 0, "att">;
8056 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
8057 "$dst {${mask}}, ${src}{1to2}}",
8058 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8059 VK2WM:$mask, i64mem:$src), 0, "att">;
8060 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8061 "$dst {${mask}} {z}, ${src}{1to2}}",
8062 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8063 VK2WM:$mask, i64mem:$src), 0, "att">;
8065 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8066 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8067 VR256X:$src), 0, "att">;
8068 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
8069 "$dst {${mask}}, $src}",
8070 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8071 VK4WM:$mask, VR256X:$src), 0, "att">;
8072 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
8073 "$dst {${mask}} {z}, $src}",
8074 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8075 VK4WM:$mask, VR256X:$src), 0, "att">;
8076 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8077 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8078 i64mem:$src), 0, "att">;
8079 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
8080 "$dst {${mask}}, ${src}{1to4}}",
8081 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8082 VK4WM:$mask, i64mem:$src), 0, "att">;
8083 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8084 "$dst {${mask}} {z}, ${src}{1to4}}",
8085 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8086 VK4WM:$mask, i64mem:$src), 0, "att">;
8089 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
8090 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8092 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
8093 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8094 PS, EVEX_CD8<32, CD8VF>;
8096 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
8097 X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
8098 XS, EVEX_CD8<32, CD8VF>;
8100 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
8101 X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
8102 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8104 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8105 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
8106 EVEX_CD8<32, CD8VF>;
8108 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8109 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
8110 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8112 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8113 X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8114 EVEX_CD8<32, CD8VH>;
8116 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8117 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8118 EVEX_CD8<32, CD8VF>;
8120 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8121 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8122 EVEX_CD8<32, CD8VF>;
8124 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8125 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8126 VEX_W, EVEX_CD8<64, CD8VF>;
8128 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8129 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8130 PS, EVEX_CD8<32, CD8VF>;
8132 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8133 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8134 PS, EVEX_CD8<64, CD8VF>;
8136 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8137 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8138 PD, EVEX_CD8<64, CD8VF>;
8140 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8141 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8142 EVEX_CD8<32, CD8VH>;
8144 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8145 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8146 PD, EVEX_CD8<64, CD8VF>;
8148 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8149 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8150 EVEX_CD8<32, CD8VH>;
8152 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8153 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
8154 PD, EVEX_CD8<64, CD8VF>;
8156 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8157 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
8158 EVEX_CD8<32, CD8VH>;
8160 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8161 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
8162 PD, EVEX_CD8<64, CD8VF>;
8164 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8165 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
8166 EVEX_CD8<32, CD8VH>;
8168 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8169 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8170 EVEX_CD8<64, CD8VF>;
8172 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8173 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8174 EVEX_CD8<64, CD8VF>;
8176 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
8177 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8178 EVEX_CD8<64, CD8VF>;
8180 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
8181 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8182 EVEX_CD8<64, CD8VF>;
8184 let Predicates = [HasVLX] in {
8185 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8186 // patterns have been disabled with null_frag.
8187 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8188 (VCVTPD2DQZ128rr VR128X:$src)>;
8189 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8191 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8192 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8194 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8196 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8197 (VCVTPD2DQZ128rm addr:$src)>;
8198 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8200 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8201 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8203 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8205 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8206 (VCVTPD2DQZ128rmb addr:$src)>;
8207 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8208 (v4i32 VR128X:$src0), VK2WM:$mask),
8209 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8210 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8211 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8212 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8214 // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8215 // patterns have been disabled with null_frag.
8216 def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
8217 (VCVTTPD2DQZ128rr VR128X:$src)>;
8218 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8220 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8221 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8223 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8225 def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
8226 (VCVTTPD2DQZ128rm addr:$src)>;
8227 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8229 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8230 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8232 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8234 def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8235 (VCVTTPD2DQZ128rmb addr:$src)>;
8236 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8237 (v4i32 VR128X:$src0), VK2WM:$mask),
8238 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8239 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8240 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8241 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8243 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8244 // patterns have been disabled with null_frag.
8245 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8246 (VCVTPD2UDQZ128rr VR128X:$src)>;
8247 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8249 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8250 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8252 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8254 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8255 (VCVTPD2UDQZ128rm addr:$src)>;
8256 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8258 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8259 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8261 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8263 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8264 (VCVTPD2UDQZ128rmb addr:$src)>;
8265 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8266 (v4i32 VR128X:$src0), VK2WM:$mask),
8267 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8268 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8269 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8270 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8272 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8273 // patterns have been disabled with null_frag.
8274 def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
8275 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8276 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8278 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8279 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8281 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8283 def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
8284 (VCVTTPD2UDQZ128rm addr:$src)>;
8285 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8287 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8288 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8290 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8292 def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8293 (VCVTTPD2UDQZ128rmb addr:$src)>;
8294 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8295 (v4i32 VR128X:$src0), VK2WM:$mask),
8296 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8297 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8298 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8299 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8302 let Predicates = [HasDQI, HasVLX] in {
8303 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8304 (VCVTPS2QQZ128rm addr:$src)>;
8305 def : Pat<(v2i64 (vselect VK2WM:$mask,
8306 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8308 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8309 def : Pat<(v2i64 (vselect VK2WM:$mask,
8310 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8311 v2i64x_info.ImmAllZerosV)),
8312 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8314 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8315 (VCVTPS2UQQZ128rm addr:$src)>;
8316 def : Pat<(v2i64 (vselect VK2WM:$mask,
8317 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8319 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8320 def : Pat<(v2i64 (vselect VK2WM:$mask,
8321 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8322 v2i64x_info.ImmAllZerosV)),
8323 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8325 def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8326 (VCVTTPS2QQZ128rm addr:$src)>;
8327 def : Pat<(v2i64 (vselect VK2WM:$mask,
8328 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8330 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8331 def : Pat<(v2i64 (vselect VK2WM:$mask,
8332 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8333 v2i64x_info.ImmAllZerosV)),
8334 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8336 def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8337 (VCVTTPS2UQQZ128rm addr:$src)>;
8338 def : Pat<(v2i64 (vselect VK2WM:$mask,
8339 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8341 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8342 def : Pat<(v2i64 (vselect VK2WM:$mask,
8343 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8344 v2i64x_info.ImmAllZerosV)),
8345 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8348 let Predicates = [HasAVX512, NoVLX] in {
8349 def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))),
8350 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8351 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8352 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8354 def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))),
8355 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8356 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8357 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8359 def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
8360 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8361 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8362 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8364 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8365 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8366 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8367 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8369 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8370 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8371 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8372 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8374 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8375 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8376 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8377 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8379 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8380 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8381 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8382 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8385 let Predicates = [HasVLX] in {
8386 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8387 (VCVTDQ2PDZ128rm addr:$src)>;
8388 def : Pat<(v2f64 (vselect VK2WM:$mask,
8389 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8391 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8392 def : Pat<(v2f64 (vselect VK2WM:$mask,
8393 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8394 v2f64x_info.ImmAllZerosV)),
8395 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8397 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8398 (VCVTUDQ2PDZ128rm addr:$src)>;
8399 def : Pat<(v2f64 (vselect VK2WM:$mask,
8400 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8402 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8403 def : Pat<(v2f64 (vselect VK2WM:$mask,
8404 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8405 v2f64x_info.ImmAllZerosV)),
8406 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8409 let Predicates = [HasDQI, HasVLX] in {
8410 // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8411 // patterns have been disabled with null_frag.
8412 def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
8413 (VCVTQQ2PSZ128rr VR128X:$src)>;
8414 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8416 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8417 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8419 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8421 def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))),
8422 (VCVTQQ2PSZ128rm addr:$src)>;
8423 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8425 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8426 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8428 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8430 def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
8431 (VCVTQQ2PSZ128rmb addr:$src)>;
8432 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8433 (v4f32 VR128X:$src0), VK2WM:$mask),
8434 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8435 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8436 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8437 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8439 // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8440 // patterns have been disabled with null_frag.
8441 def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))),
8442 (VCVTUQQ2PSZ128rr VR128X:$src)>;
8443 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8445 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8446 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8448 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8450 def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))),
8451 (VCVTUQQ2PSZ128rm addr:$src)>;
8452 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8454 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8455 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8457 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8459 def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
8460 (VCVTUQQ2PSZ128rmb addr:$src)>;
8461 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8462 (v4f32 VR128X:$src0), VK2WM:$mask),
8463 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8464 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8465 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8466 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8469 let Predicates = [HasDQI, NoVLX] in {
8470 def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))),
8471 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8472 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8473 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8475 def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))),
8476 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8477 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8478 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8480 def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))),
8481 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8482 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8483 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8485 def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))),
8486 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8487 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8488 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8490 def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))),
8491 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8492 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8493 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8495 def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
8496 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8497 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8498 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8500 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8501 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8502 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8503 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8505 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8506 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8507 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8508 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8510 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8511 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8512 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8513 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8515 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8516 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8517 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8518 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8520 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8521 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8522 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8523 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8525 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8526 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8527 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8528 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8531 //===----------------------------------------------------------------------===//
8532 // Half precision conversion instructions
8533 //===----------------------------------------------------------------------===//
8535 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8536 X86MemOperand x86memop, PatFrag ld_frag,
8537 X86FoldableSchedWrite sched> {
8538 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8539 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8540 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8541 T8PD, Sched<[sched]>;
8542 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8543 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8544 (X86cvtph2ps (_src.VT
8545 (ld_frag addr:$src)))>,
8546 T8PD, Sched<[sched.Folded]>;
8549 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8550 X86FoldableSchedWrite sched> {
8551 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8552 (ins _src.RC:$src), "vcvtph2ps",
8553 "{sae}, $src", "$src, {sae}",
8554 (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8555 T8PD, EVEX_B, Sched<[sched]>;
8558 let Predicates = [HasAVX512] in
8559 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8561 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8562 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8564 let Predicates = [HasVLX] in {
8565 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8566 load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8567 EVEX_CD8<32, CD8VH>;
8568 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8569 load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8570 EVEX_CD8<32, CD8VH>;
8572 // Pattern match vcvtph2ps of a scalar i64 load.
8573 def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
8574 (VCVTPH2PSZ128rm addr:$src)>;
8575 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8576 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8577 (VCVTPH2PSZ128rm addr:$src)>;
8580 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8581 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8582 let ExeDomain = GenericDomain in {
8583 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8584 (ins _src.RC:$src1, i32u8imm:$src2),
8585 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8586 [(set _dest.RC:$dst,
8587 (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
8589 let Constraints = "$src0 = $dst" in
8590 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8591 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8592 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8593 [(set _dest.RC:$dst,
8594 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8595 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8596 Sched<[RR]>, EVEX_K;
8597 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8598 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8599 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8600 [(set _dest.RC:$dst,
8601 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8602 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8603 Sched<[RR]>, EVEX_KZ;
8604 let hasSideEffects = 0, mayStore = 1 in {
8605 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8606 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8607 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8609 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8610 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8611 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8612 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8617 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8619 let hasSideEffects = 0 in
8620 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8621 (outs _dest.RC:$dst),
8622 (ins _src.RC:$src1, i32u8imm:$src2),
8623 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8624 EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8627 let Predicates = [HasAVX512] in {
8628 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8629 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8630 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8631 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8632 let Predicates = [HasVLX] in {
8633 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8634 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8635 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8636 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8637 WriteCvtPS2PH, WriteCvtPS2PHSt>,
8638 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8641 def : Pat<(store (f64 (extractelt
8642 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8643 (iPTR 0))), addr:$dst),
8644 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8645 def : Pat<(store (i64 (extractelt
8646 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8647 (iPTR 0))), addr:$dst),
8648 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8649 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
8650 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
8651 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
8652 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
8655 // Patterns for matching conversions from float to half-float and vice versa.
8656 let Predicates = [HasVLX] in {
8657 // Use MXCSR.RC for rounding instead of explicitly specifying the default
8658 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8659 // configurations we support (the default). However, falling back to MXCSR is
8660 // more consistent with other instructions, which are always controlled by it.
8661 // It's encoded as 0b100.
8662 def : Pat<(fp_to_f16 FR32X:$src),
8663 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8664 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8666 def : Pat<(f16_to_fp GR16:$src),
8667 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8668 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8670 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8671 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8672 (v8i16 (VCVTPS2PHZ128rr
8673 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8676 // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8677 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8678 string OpcodeStr, X86FoldableSchedWrite sched> {
8679 let hasSideEffects = 0 in
8680 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8681 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8682 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8685 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8686 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8687 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8688 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8689 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8690 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8691 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8692 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8693 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8696 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8697 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8698 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8699 EVEX_CD8<32, CD8VT1>;
8700 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8701 "ucomisd", WriteFCom>, PD, EVEX,
8702 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8703 let Pattern = []<dag> in {
8704 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8705 "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8706 EVEX_CD8<32, CD8VT1>;
8707 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8708 "comisd", WriteFCom>, PD, EVEX,
8709 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8711 let isCodeGenOnly = 1 in {
8712 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8713 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8714 EVEX_CD8<32, CD8VT1>;
8715 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8716 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8717 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8719 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8720 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8721 EVEX_CD8<32, CD8VT1>;
8722 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8723 sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8724 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8728 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8729 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8730 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8731 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8732 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8733 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8734 "$src2, $src1", "$src1, $src2",
8735 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8736 EVEX_4V, VEX_LIG, Sched<[sched]>;
8737 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8738 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8739 "$src2, $src1", "$src1, $src2",
8740 (OpNode (_.VT _.RC:$src1),
8741 _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
8742 Sched<[sched.Folded, sched.ReadAfterFold]>;
8746 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8747 f32x_info>, EVEX_CD8<32, CD8VT1>,
8749 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8750 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8752 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8753 SchedWriteFRsqrt.Scl, f32x_info>,
8754 EVEX_CD8<32, CD8VT1>, T8PD;
8755 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8756 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8757 EVEX_CD8<64, CD8VT1>, T8PD;
8759 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8760 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8761 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8762 let ExeDomain = _.ExeDomain in {
8763 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8764 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8765 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8767 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8768 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8770 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8771 Sched<[sched.Folded, sched.ReadAfterFold]>;
8772 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8773 (ins _.ScalarMemOp:$src), OpcodeStr,
8774 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8776 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8777 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8781 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8782 X86SchedWriteWidths sched> {
8783 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8784 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8785 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8786 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8788 // Define only if AVX512VL feature is present.
8789 let Predicates = [HasVLX] in {
8790 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8791 OpNode, sched.XMM, v4f32x_info>,
8792 EVEX_V128, EVEX_CD8<32, CD8VF>;
8793 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8794 OpNode, sched.YMM, v8f32x_info>,
8795 EVEX_V256, EVEX_CD8<32, CD8VF>;
8796 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8797 OpNode, sched.XMM, v2f64x_info>,
8798 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8799 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8800 OpNode, sched.YMM, v4f64x_info>,
8801 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8805 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8806 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8808 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8809 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8810 SDNode OpNode, SDNode OpNodeSAE,
8811 X86FoldableSchedWrite sched> {
8812 let ExeDomain = _.ExeDomain in {
8813 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8814 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8815 "$src2, $src1", "$src1, $src2",
8816 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8819 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8820 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8821 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8822 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8823 EVEX_B, Sched<[sched]>;
8825 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8826 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8827 "$src2, $src1", "$src1, $src2",
8828 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
8829 Sched<[sched.Folded, sched.ReadAfterFold]>;
8833 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8834 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8835 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8836 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8837 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8838 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8841 let Predicates = [HasERI] in {
8842 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8843 SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8844 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8845 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8848 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8849 SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8850 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8852 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8853 SDNode OpNode, X86FoldableSchedWrite sched> {
8854 let ExeDomain = _.ExeDomain in {
8855 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8856 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8857 (OpNode (_.VT _.RC:$src))>,
8860 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8861 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8863 (bitconvert (_.LdFrag addr:$src))))>,
8864 Sched<[sched.Folded, sched.ReadAfterFold]>;
8866 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8867 (ins _.ScalarMemOp:$src), OpcodeStr,
8868 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8870 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8871 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8874 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8875 SDNode OpNode, X86FoldableSchedWrite sched> {
8876 let ExeDomain = _.ExeDomain in
8877 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8878 (ins _.RC:$src), OpcodeStr,
8879 "{sae}, $src", "$src, {sae}",
8880 (OpNode (_.VT _.RC:$src))>,
8881 EVEX_B, Sched<[sched]>;
8884 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8885 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8886 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8887 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8888 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8889 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8890 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8891 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8894 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8895 SDNode OpNode, X86SchedWriteWidths sched> {
8896 // Define only if AVX512VL feature is present.
8897 let Predicates = [HasVLX] in {
8898 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8900 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8901 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8903 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8904 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8906 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8907 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8909 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8913 let Predicates = [HasERI] in {
8914 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8915 SchedWriteFRsqrt>, EVEX;
8916 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8917 SchedWriteFRcp>, EVEX;
8918 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8919 SchedWriteFAdd>, EVEX;
8921 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8923 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8924 SchedWriteFRnd>, EVEX;
8926 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8927 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8928 let ExeDomain = _.ExeDomain in
8929 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8930 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8931 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8932 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8935 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8936 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8937 let ExeDomain = _.ExeDomain in {
8938 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8939 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8940 (_.VT (fsqrt _.RC:$src))>, EVEX,
8942 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8943 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8945 (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8946 Sched<[sched.Folded, sched.ReadAfterFold]>;
8947 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8948 (ins _.ScalarMemOp:$src), OpcodeStr,
8949 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8951 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8952 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8956 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8957 X86SchedWriteSizes sched> {
8958 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8959 sched.PS.ZMM, v16f32_info>,
8960 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8961 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8962 sched.PD.ZMM, v8f64_info>,
8963 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8964 // Define only if AVX512VL feature is present.
8965 let Predicates = [HasVLX] in {
8966 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8967 sched.PS.XMM, v4f32x_info>,
8968 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8969 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8970 sched.PS.YMM, v8f32x_info>,
8971 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8972 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8973 sched.PD.XMM, v2f64x_info>,
8974 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8975 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8976 sched.PD.YMM, v4f64x_info>,
8977 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8981 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8982 X86SchedWriteSizes sched> {
8983 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8984 sched.PS.ZMM, v16f32_info>,
8985 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8986 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8987 sched.PD.ZMM, v8f64_info>,
8988 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8991 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8992 X86VectorVTInfo _, string Name> {
8993 let ExeDomain = _.ExeDomain in {
8994 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8995 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8996 "$src2, $src1", "$src1, $src2",
8997 (X86fsqrts (_.VT _.RC:$src1),
8998 (_.VT _.RC:$src2))>,
9000 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9001 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9002 "$src2, $src1", "$src1, $src2",
9003 (X86fsqrts (_.VT _.RC:$src1),
9004 _.ScalarIntMemCPat:$src2)>,
9005 Sched<[sched.Folded, sched.ReadAfterFold]>;
9006 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9007 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9008 "$rc, $src2, $src1", "$src1, $src2, $rc",
9009 (X86fsqrtRnds (_.VT _.RC:$src1),
9012 EVEX_B, EVEX_RC, Sched<[sched]>;
9014 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
9015 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9016 (ins _.FRC:$src1, _.FRC:$src2),
9017 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9020 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9021 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9022 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9023 Sched<[sched.Folded, sched.ReadAfterFold]>;
9027 let Predicates = [HasAVX512] in {
9028 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
9029 (!cast<Instruction>(Name#Zr)
9030 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9033 let Predicates = [HasAVX512, OptForSize] in {
9034 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
9035 (!cast<Instruction>(Name#Zm)
9036 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9040 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9041 X86SchedWriteSizes sched> {
9042 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9043 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9044 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9045 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9048 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9049 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9051 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9053 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9054 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9055 let ExeDomain = _.ExeDomain in {
9056 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9057 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9058 "$src3, $src2, $src1", "$src1, $src2, $src3",
9059 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9063 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9064 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9065 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9066 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9067 (i32 imm:$src3)))>, EVEX_B,
9070 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9071 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9073 "$src3, $src2, $src1", "$src1, $src2, $src3",
9074 (_.VT (X86RndScales _.RC:$src1,
9075 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
9076 Sched<[sched.Folded, sched.ReadAfterFold]>;
9078 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9079 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9080 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9081 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9082 []>, Sched<[sched]>;
9085 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9086 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9087 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9088 []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
9092 let Predicates = [HasAVX512] in {
9093 def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2),
9094 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9095 _.FRC:$src1, imm:$src2))>;
9098 let Predicates = [HasAVX512, OptForSize] in {
9099 def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2),
9100 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9101 addr:$src1, imm:$src2))>;
9105 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9106 SchedWriteFRnd.Scl, f32x_info>,
9107 AVX512AIi8Base, EVEX_4V, VEX_LIG,
9108 EVEX_CD8<32, CD8VT1>;
9110 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9111 SchedWriteFRnd.Scl, f64x_info>,
9112 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9113 EVEX_CD8<64, CD8VT1>;
9115 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9116 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9117 dag OutMask, Predicate BasePredicate> {
9118 let Predicates = [BasePredicate] in {
9119 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9120 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9121 (extractelt _.VT:$dst, (iPTR 0))))),
9122 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9123 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9125 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9126 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9128 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9129 OutMask, _.VT:$src2, _.VT:$src1)>;
9133 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9134 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9135 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9136 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9137 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9138 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9141 //-------------------------------------------------
9142 // Integer truncate and extend operations
9143 //-------------------------------------------------
9145 // PatFrags that contain a select and a truncate op. The take operands in the
9146 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9147 // either to the multiclasses.
9148 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9149 (vselect node:$mask,
9150 (trunc node:$src), node:$src0)>;
9151 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9152 (vselect node:$mask,
9153 (X86vtruncs node:$src), node:$src0)>;
9154 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9155 (vselect node:$mask,
9156 (X86vtruncus node:$src), node:$src0)>;
9158 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9159 SDPatternOperator MaskNode,
9160 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9161 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9162 let ExeDomain = DestInfo.ExeDomain in {
9163 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9164 (ins SrcInfo.RC:$src),
9165 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9166 [(set DestInfo.RC:$dst,
9167 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9168 EVEX, Sched<[sched]>;
9169 let Constraints = "$src0 = $dst" in
9170 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9171 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9172 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9173 [(set DestInfo.RC:$dst,
9174 (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9175 (DestInfo.VT DestInfo.RC:$src0),
9176 SrcInfo.KRCWM:$mask))]>,
9177 EVEX, EVEX_K, Sched<[sched]>;
9178 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9179 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9180 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9181 [(set DestInfo.RC:$dst,
9182 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9183 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9184 EVEX, EVEX_KZ, Sched<[sched]>;
9187 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9188 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9189 (ins x86memop:$dst, SrcInfo.RC:$src),
9190 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9191 EVEX, Sched<[sched.Folded]>;
9193 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9194 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9195 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9196 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9197 }//mayStore = 1, hasSideEffects = 0
9200 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9201 X86VectorVTInfo DestInfo,
9202 PatFrag truncFrag, PatFrag mtruncFrag,
9205 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9206 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9207 addr:$dst, SrcInfo.RC:$src)>;
9209 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9210 SrcInfo.KRCWM:$mask),
9211 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9212 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9215 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9216 SDNode OpNode256, SDNode OpNode512,
9217 SDPatternOperator MaskNode128,
9218 SDPatternOperator MaskNode256,
9219 SDPatternOperator MaskNode512,
9220 X86FoldableSchedWrite sched,
9221 AVX512VLVectorVTInfo VTSrcInfo,
9222 X86VectorVTInfo DestInfoZ128,
9223 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9224 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9225 X86MemOperand x86memopZ, PatFrag truncFrag,
9226 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9228 let Predicates = [HasVLX, prd] in {
9229 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9230 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9231 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9232 truncFrag, mtruncFrag, NAME>, EVEX_V128;
9234 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9235 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9236 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9237 truncFrag, mtruncFrag, NAME>, EVEX_V256;
9239 let Predicates = [prd] in
9240 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9241 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9242 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9243 truncFrag, mtruncFrag, NAME>, EVEX_V512;
9246 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9247 SDPatternOperator MaskNode,
9248 X86FoldableSchedWrite sched, PatFrag StoreNode,
9249 PatFrag MaskedStoreNode, SDNode InVecNode,
9250 SDPatternOperator InVecMaskNode> {
9251 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9252 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9253 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9254 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9255 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9258 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9259 SDPatternOperator MaskNode,
9260 X86FoldableSchedWrite sched, PatFrag StoreNode,
9261 PatFrag MaskedStoreNode, SDNode InVecNode,
9262 SDPatternOperator InVecMaskNode> {
9263 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9264 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9265 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9266 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9267 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9270 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9271 SDPatternOperator MaskNode,
9272 X86FoldableSchedWrite sched, PatFrag StoreNode,
9273 PatFrag MaskedStoreNode, SDNode InVecNode,
9274 SDPatternOperator InVecMaskNode> {
9275 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9276 InVecMaskNode, MaskNode, MaskNode, sched,
9277 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9278 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9279 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9282 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9283 SDPatternOperator MaskNode,
9284 X86FoldableSchedWrite sched, PatFrag StoreNode,
9285 PatFrag MaskedStoreNode, SDNode InVecNode,
9286 SDPatternOperator InVecMaskNode> {
9287 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9288 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9289 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9290 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9291 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9294 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9295 SDPatternOperator MaskNode,
9296 X86FoldableSchedWrite sched, PatFrag StoreNode,
9297 PatFrag MaskedStoreNode, SDNode InVecNode,
9298 SDPatternOperator InVecMaskNode> {
9299 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9300 InVecMaskNode, MaskNode, MaskNode, sched,
9301 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9302 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9303 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9306 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9307 SDPatternOperator MaskNode,
9308 X86FoldableSchedWrite sched, PatFrag StoreNode,
9309 PatFrag MaskedStoreNode, SDNode InVecNode,
9310 SDPatternOperator InVecMaskNode> {
9311 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9312 InVecMaskNode, MaskNode, MaskNode, sched,
9313 avx512vl_i16_info, v16i8x_info, v16i8x_info,
9314 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9315 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9318 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
9319 WriteShuffle256, truncstorevi8,
9320 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9321 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
9322 WriteShuffle256, truncstore_s_vi8,
9323 masked_truncstore_s_vi8, X86vtruncs,
9325 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9326 select_truncus, WriteShuffle256,
9327 truncstore_us_vi8, masked_truncstore_us_vi8,
9328 X86vtruncus, X86vmtruncus>;
9330 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9331 WriteShuffle256, truncstorevi16,
9332 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9333 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
9334 WriteShuffle256, truncstore_s_vi16,
9335 masked_truncstore_s_vi16, X86vtruncs,
9337 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9338 select_truncus, WriteShuffle256,
9339 truncstore_us_vi16, masked_truncstore_us_vi16,
9340 X86vtruncus, X86vmtruncus>;
9342 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9343 WriteShuffle256, truncstorevi32,
9344 masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9345 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
9346 WriteShuffle256, truncstore_s_vi32,
9347 masked_truncstore_s_vi32, X86vtruncs,
9349 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9350 select_truncus, WriteShuffle256,
9351 truncstore_us_vi32, masked_truncstore_us_vi32,
9352 X86vtruncus, X86vmtruncus>;
9354 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9355 WriteShuffle256, truncstorevi8,
9356 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9357 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9358 WriteShuffle256, truncstore_s_vi8,
9359 masked_truncstore_s_vi8, X86vtruncs,
9361 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
9362 select_truncus, WriteShuffle256,
9363 truncstore_us_vi8, masked_truncstore_us_vi8,
9364 X86vtruncus, X86vmtruncus>;
9366 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9367 WriteShuffle256, truncstorevi16,
9368 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9369 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9370 WriteShuffle256, truncstore_s_vi16,
9371 masked_truncstore_s_vi16, X86vtruncs,
9373 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9374 select_truncus, WriteShuffle256,
9375 truncstore_us_vi16, masked_truncstore_us_vi16,
9376 X86vtruncus, X86vmtruncus>;
9378 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9379 WriteShuffle256, truncstorevi8,
9380 masked_truncstorevi8, X86vtrunc,
9382 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9383 WriteShuffle256, truncstore_s_vi8,
9384 masked_truncstore_s_vi8, X86vtruncs,
9386 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9387 select_truncus, WriteShuffle256,
9388 truncstore_us_vi8, masked_truncstore_us_vi8,
9389 X86vtruncus, X86vmtruncus>;
9391 let Predicates = [HasAVX512, NoVLX] in {
9392 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9393 (v8i16 (EXTRACT_SUBREG
9394 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9395 VR256X:$src, sub_ymm)))), sub_xmm))>;
9396 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9397 (v4i32 (EXTRACT_SUBREG
9398 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9399 VR256X:$src, sub_ymm)))), sub_xmm))>;
9402 let Predicates = [HasBWI, NoVLX] in {
9403 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9404 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9405 VR256X:$src, sub_ymm))), sub_xmm))>;
9408 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9409 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9410 X86VectorVTInfo DestInfo,
9411 X86VectorVTInfo SrcInfo> {
9412 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9414 SrcInfo.KRCWM:$mask)),
9415 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9416 SrcInfo.KRCWM:$mask,
9419 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9420 DestInfo.ImmAllZerosV,
9421 SrcInfo.KRCWM:$mask)),
9422 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9426 let Predicates = [HasVLX] in {
9427 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9428 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9429 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9432 let Predicates = [HasAVX512] in {
9433 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9434 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9435 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9437 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9438 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9439 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9441 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9442 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9443 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9446 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9447 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9448 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9449 let ExeDomain = DestInfo.ExeDomain in {
9450 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9451 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9452 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9453 EVEX, Sched<[sched]>;
9455 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9456 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9457 (DestInfo.VT (LdFrag addr:$src))>,
9458 EVEX, Sched<[sched.Folded]>;
9462 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9463 SDNode OpNode, SDNode InVecNode, string ExtTy,
9464 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9465 let Predicates = [HasVLX, HasBWI] in {
9466 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9467 v16i8x_info, i64mem, LdFrag, InVecNode>,
9468 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9470 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9471 v16i8x_info, i128mem, LdFrag, OpNode>,
9472 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9474 let Predicates = [HasBWI] in {
9475 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9476 v32i8x_info, i256mem, LdFrag, OpNode>,
9477 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9481 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9482 SDNode OpNode, SDNode InVecNode, string ExtTy,
9483 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9484 let Predicates = [HasVLX, HasAVX512] in {
9485 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9486 v16i8x_info, i32mem, LdFrag, InVecNode>,
9487 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9489 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9490 v16i8x_info, i64mem, LdFrag, InVecNode>,
9491 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9493 let Predicates = [HasAVX512] in {
9494 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9495 v16i8x_info, i128mem, LdFrag, OpNode>,
9496 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9500 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9501 SDNode OpNode, SDNode InVecNode, string ExtTy,
9502 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9503 let Predicates = [HasVLX, HasAVX512] in {
9504 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9505 v16i8x_info, i16mem, LdFrag, InVecNode>,
9506 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9508 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9509 v16i8x_info, i32mem, LdFrag, InVecNode>,
9510 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9512 let Predicates = [HasAVX512] in {
9513 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9514 v16i8x_info, i64mem, LdFrag, InVecNode>,
9515 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9519 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9520 SDNode OpNode, SDNode InVecNode, string ExtTy,
9521 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9522 let Predicates = [HasVLX, HasAVX512] in {
9523 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9524 v8i16x_info, i64mem, LdFrag, InVecNode>,
9525 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9527 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9528 v8i16x_info, i128mem, LdFrag, OpNode>,
9529 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9531 let Predicates = [HasAVX512] in {
9532 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9533 v16i16x_info, i256mem, LdFrag, OpNode>,
9534 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9538 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9539 SDNode OpNode, SDNode InVecNode, string ExtTy,
9540 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9541 let Predicates = [HasVLX, HasAVX512] in {
9542 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9543 v8i16x_info, i32mem, LdFrag, InVecNode>,
9544 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9546 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9547 v8i16x_info, i64mem, LdFrag, InVecNode>,
9548 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9550 let Predicates = [HasAVX512] in {
9551 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9552 v8i16x_info, i128mem, LdFrag, OpNode>,
9553 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9557 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9558 SDNode OpNode, SDNode InVecNode, string ExtTy,
9559 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9561 let Predicates = [HasVLX, HasAVX512] in {
9562 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9563 v4i32x_info, i64mem, LdFrag, InVecNode>,
9564 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9566 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9567 v4i32x_info, i128mem, LdFrag, OpNode>,
9568 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9570 let Predicates = [HasAVX512] in {
9571 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9572 v8i32x_info, i256mem, LdFrag, OpNode>,
9573 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9577 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9578 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9579 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9580 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9581 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9582 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9584 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9585 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9586 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9587 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9588 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9589 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9592 // Patterns that we also need any extend versions of. aext_vector_inreg
9593 // is currently legalized to zext_vector_inreg.
9594 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9596 let Predicates = [HasVLX, HasBWI] in {
9597 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9598 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9601 let Predicates = [HasVLX] in {
9602 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9603 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9605 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9606 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9610 let Predicates = [HasBWI] in {
9611 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9612 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9614 let Predicates = [HasAVX512] in {
9615 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9616 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9617 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9618 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9620 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9621 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9623 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9624 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9628 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9630 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9632 let Predicates = [HasVLX, HasBWI] in {
9633 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9634 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9635 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9636 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9637 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9638 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9640 let Predicates = [HasVLX] in {
9641 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9642 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9643 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9644 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9646 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9647 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9649 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9650 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9651 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9652 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9653 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9654 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9656 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9657 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9658 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9659 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9661 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9662 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9663 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9664 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9665 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9666 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9668 let Predicates = [HasVLX] in {
9669 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9670 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9671 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9672 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9674 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9675 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9676 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9677 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9679 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9680 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9681 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9682 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9685 let Predicates = [HasAVX512] in {
9686 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9687 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9691 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9692 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9694 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9695 // ext+trunc aggresively making it impossible to legalize the DAG to this
9696 // pattern directly.
9697 let Predicates = [HasAVX512, NoBWI] in {
9698 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9699 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9700 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9701 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9704 //===----------------------------------------------------------------------===//
9705 // GATHER - SCATTER Operations
9707 // FIXME: Improve scheduling of gather/scatter instructions.
9708 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9709 X86MemOperand memop, PatFrag GatherNode,
9710 RegisterClass MaskRC = _.KRCWM> {
9711 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9712 ExeDomain = _.ExeDomain in
9713 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9714 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9715 !strconcat(OpcodeStr#_.Suffix,
9716 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9717 [(set _.RC:$dst, MaskRC:$mask_wb,
9718 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
9719 vectoraddr:$src2))]>, EVEX, EVEX_K,
9720 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9723 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9724 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9725 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9726 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9727 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9728 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
9729 let Predicates = [HasVLX] in {
9730 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9731 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9732 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9733 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9734 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9735 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9736 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9737 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9741 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9742 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9743 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9744 mgatherv16i32>, EVEX_V512;
9745 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9746 mgatherv8i64>, EVEX_V512;
9747 let Predicates = [HasVLX] in {
9748 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9749 vy256xmem, mgatherv8i32>, EVEX_V256;
9750 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9751 vy128xmem, mgatherv4i64>, EVEX_V256;
9752 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9753 vx128xmem, mgatherv4i32>, EVEX_V128;
9754 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9755 vx64xmem, mgatherv2i64, VK2WM>,
9761 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9762 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9764 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9765 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9767 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9768 X86MemOperand memop, PatFrag ScatterNode,
9769 RegisterClass MaskRC = _.KRCWM> {
9771 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9773 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9774 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9775 !strconcat(OpcodeStr#_.Suffix,
9776 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9777 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9778 MaskRC:$mask, vectoraddr:$dst))]>,
9779 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9780 Sched<[WriteStore]>;
9783 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9784 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9785 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9786 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9787 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9788 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
9789 let Predicates = [HasVLX] in {
9790 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9791 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9792 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9793 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9794 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9795 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9796 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9797 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9801 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9802 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9803 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9804 mscatterv16i32>, EVEX_V512;
9805 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9806 mscatterv8i64>, EVEX_V512;
9807 let Predicates = [HasVLX] in {
9808 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9809 vy256xmem, mscatterv8i32>, EVEX_V256;
9810 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9811 vy128xmem, mscatterv4i64>, EVEX_V256;
9812 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9813 vx128xmem, mscatterv4i32>, EVEX_V128;
9814 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9815 vx64xmem, mscatterv2i64, VK2WM>,
9820 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9821 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9823 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9824 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9827 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9828 RegisterClass KRC, X86MemOperand memop> {
9829 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9830 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9831 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9832 EVEX, EVEX_K, Sched<[WriteLoad]>;
9835 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9836 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9838 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9839 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9841 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9842 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9844 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9845 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9847 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9848 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9850 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9851 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9853 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9854 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9856 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9857 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9859 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9860 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9862 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9863 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9865 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9866 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9868 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9869 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9871 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9872 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9874 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9875 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9877 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9878 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9880 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9881 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9883 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9884 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9885 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9886 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9887 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9889 // Also need a pattern for anyextend.
9890 def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
9891 (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
9894 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9895 string OpcodeStr, Predicate prd> {
9896 let Predicates = [prd] in
9897 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9899 let Predicates = [prd, HasVLX] in {
9900 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9901 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9905 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9906 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9907 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9908 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9910 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9911 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9912 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9913 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9914 EVEX, Sched<[WriteMove]>;
9917 // Use 512bit version to implement 128/256 bit in case NoVLX.
9918 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9922 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9923 (_.KVT (COPY_TO_REGCLASS
9924 (!cast<Instruction>(Name#"Zrr")
9925 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9926 _.RC:$src, _.SubRegIdx)),
9930 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9931 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9932 let Predicates = [prd] in
9933 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9936 let Predicates = [prd, HasVLX] in {
9937 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9939 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9942 let Predicates = [prd, NoVLX] in {
9943 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9944 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9948 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9949 avx512vl_i8_info, HasBWI>;
9950 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9951 avx512vl_i16_info, HasBWI>, VEX_W;
9952 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9953 avx512vl_i32_info, HasDQI>;
9954 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9955 avx512vl_i64_info, HasDQI>, VEX_W;
9957 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9958 // is available, but BWI is not. We can't handle this in lowering because
9959 // a target independent DAG combine likes to combine sext and trunc.
9960 let Predicates = [HasDQI, NoBWI] in {
9961 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9962 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9963 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9964 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9966 def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
9967 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9968 def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
9969 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9972 let Predicates = [HasDQI, NoBWI, HasVLX] in {
9973 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9974 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9976 def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
9977 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9980 //===----------------------------------------------------------------------===//
9981 // AVX-512 - COMPRESS and EXPAND
9984 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9985 string OpcodeStr, X86FoldableSchedWrite sched> {
9986 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9987 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9988 (null_frag)>, AVX5128IBase,
9991 let mayStore = 1, hasSideEffects = 0 in
9992 def mr : AVX5128I<opc, MRMDestMem, (outs),
9993 (ins _.MemOp:$dst, _.RC:$src),
9994 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9995 []>, EVEX_CD8<_.EltSize, CD8VT1>,
9996 Sched<[sched.Folded]>;
9998 def mrk : AVX5128I<opc, MRMDestMem, (outs),
9999 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10000 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10002 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10003 Sched<[sched.Folded]>;
10006 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10007 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10008 (!cast<Instruction>(Name#_.ZSuffix##mrk)
10009 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10011 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10012 (!cast<Instruction>(Name#_.ZSuffix##rrk)
10013 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10014 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10015 (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10016 _.KRCWM:$mask, _.RC:$src)>;
10019 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10020 X86FoldableSchedWrite sched,
10021 AVX512VLVectorVTInfo VTInfo,
10022 Predicate Pred = HasAVX512> {
10023 let Predicates = [Pred] in
10024 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10025 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10027 let Predicates = [Pred, HasVLX] in {
10028 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10029 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10030 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10031 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10035 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10036 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10037 avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10038 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10039 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10040 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10041 avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10042 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10043 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10046 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10047 string OpcodeStr, X86FoldableSchedWrite sched> {
10048 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10049 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10050 (null_frag)>, AVX5128IBase,
10053 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10054 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10056 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10057 Sched<[sched.Folded, sched.ReadAfterFold]>;
10060 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10062 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10063 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10064 _.KRCWM:$mask, addr:$src)>;
10066 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10067 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10068 _.KRCWM:$mask, addr:$src)>;
10070 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10071 (_.VT _.RC:$src0))),
10072 (!cast<Instruction>(Name#_.ZSuffix##rmk)
10073 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10075 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10076 (!cast<Instruction>(Name#_.ZSuffix##rrk)
10077 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10078 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10079 (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10080 _.KRCWM:$mask, _.RC:$src)>;
10083 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10084 X86FoldableSchedWrite sched,
10085 AVX512VLVectorVTInfo VTInfo,
10086 Predicate Pred = HasAVX512> {
10087 let Predicates = [Pred] in
10088 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10089 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10091 let Predicates = [Pred, HasVLX] in {
10092 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10093 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10094 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10095 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10099 // FIXME: Is there a better scheduler class for VPEXPAND?
10100 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10101 avx512vl_i32_info>, EVEX;
10102 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10103 avx512vl_i64_info>, EVEX, VEX_W;
10104 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10105 avx512vl_f32_info>, EVEX;
10106 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10107 avx512vl_f64_info>, EVEX, VEX_W;
10109 //handle instruction reg_vec1 = op(reg_vec,imm)
10111 // op(broadcast(eltVt),imm)
10112 //all instruction created with FROUND_CURRENT
10113 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10114 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10115 let ExeDomain = _.ExeDomain in {
10116 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10117 (ins _.RC:$src1, i32u8imm:$src2),
10118 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10119 (OpNode (_.VT _.RC:$src1),
10120 (i32 imm:$src2))>, Sched<[sched]>;
10121 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10122 (ins _.MemOp:$src1, i32u8imm:$src2),
10123 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10124 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10126 Sched<[sched.Folded, sched.ReadAfterFold]>;
10127 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10128 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10129 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10130 "${src1}"##_.BroadcastStr##", $src2",
10131 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
10132 (i32 imm:$src2))>, EVEX_B,
10133 Sched<[sched.Folded, sched.ReadAfterFold]>;
10137 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10138 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10139 SDNode OpNode, X86FoldableSchedWrite sched,
10140 X86VectorVTInfo _> {
10141 let ExeDomain = _.ExeDomain in
10142 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10143 (ins _.RC:$src1, i32u8imm:$src2),
10144 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10145 "$src1, {sae}, $src2",
10146 (OpNode (_.VT _.RC:$src1),
10148 EVEX_B, Sched<[sched]>;
10151 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10152 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10153 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10154 let Predicates = [prd] in {
10155 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10157 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10158 sched.ZMM, _.info512>, EVEX_V512;
10160 let Predicates = [prd, HasVLX] in {
10161 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10162 _.info128>, EVEX_V128;
10163 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10164 _.info256>, EVEX_V256;
10168 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10169 // op(reg_vec2,mem_vec,imm)
10170 // op(reg_vec2,broadcast(eltVt),imm)
10171 //all instruction created with FROUND_CURRENT
10172 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10173 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10174 let ExeDomain = _.ExeDomain in {
10175 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10176 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10177 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10178 (OpNode (_.VT _.RC:$src1),
10182 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10183 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10184 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10185 (OpNode (_.VT _.RC:$src1),
10186 (_.VT (bitconvert (_.LdFrag addr:$src2))),
10188 Sched<[sched.Folded, sched.ReadAfterFold]>;
10189 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10190 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10191 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10192 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10193 (OpNode (_.VT _.RC:$src1),
10194 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10195 (i32 imm:$src3))>, EVEX_B,
10196 Sched<[sched.Folded, sched.ReadAfterFold]>;
10200 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10201 // op(reg_vec2,mem_vec,imm)
10202 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10203 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10204 X86VectorVTInfo SrcInfo>{
10205 let ExeDomain = DestInfo.ExeDomain in {
10206 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10207 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10208 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10209 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10210 (SrcInfo.VT SrcInfo.RC:$src2),
10213 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10214 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10215 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10216 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10217 (SrcInfo.VT (bitconvert
10218 (SrcInfo.LdFrag addr:$src2))),
10220 Sched<[sched.Folded, sched.ReadAfterFold]>;
10224 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10225 // op(reg_vec2,mem_vec,imm)
10226 // op(reg_vec2,broadcast(eltVt),imm)
10227 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10228 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10229 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10231 let ExeDomain = _.ExeDomain in
10232 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10233 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10234 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10235 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10236 (OpNode (_.VT _.RC:$src1),
10237 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10238 (i8 imm:$src3))>, EVEX_B,
10239 Sched<[sched.Folded, sched.ReadAfterFold]>;
10242 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10243 // op(reg_vec2,mem_scalar,imm)
10244 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10245 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10246 let ExeDomain = _.ExeDomain in {
10247 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10248 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10249 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10250 (OpNode (_.VT _.RC:$src1),
10254 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10255 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10256 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10257 (OpNode (_.VT _.RC:$src1),
10258 (_.VT (scalar_to_vector
10259 (_.ScalarLdFrag addr:$src2))),
10261 Sched<[sched.Folded, sched.ReadAfterFold]>;
10265 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10266 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10267 SDNode OpNode, X86FoldableSchedWrite sched,
10268 X86VectorVTInfo _> {
10269 let ExeDomain = _.ExeDomain in
10270 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10271 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10272 OpcodeStr, "$src3, {sae}, $src2, $src1",
10273 "$src1, $src2, {sae}, $src3",
10274 (OpNode (_.VT _.RC:$src1),
10277 EVEX_B, Sched<[sched]>;
10280 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10281 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10282 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10283 let ExeDomain = _.ExeDomain in
10284 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10285 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10286 OpcodeStr, "$src3, {sae}, $src2, $src1",
10287 "$src1, $src2, {sae}, $src3",
10288 (OpNode (_.VT _.RC:$src1),
10291 EVEX_B, Sched<[sched]>;
10294 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10295 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10296 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10297 let Predicates = [prd] in {
10298 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10299 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10303 let Predicates = [prd, HasVLX] in {
10304 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10306 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10311 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10312 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10313 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10314 let Predicates = [Pred] in {
10315 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10316 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10318 let Predicates = [Pred, HasVLX] in {
10319 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10320 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10321 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10322 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10326 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10327 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10328 Predicate Pred = HasAVX512> {
10329 let Predicates = [Pred] in {
10330 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10333 let Predicates = [Pred, HasVLX] in {
10334 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10336 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10341 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10342 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10343 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10344 let Predicates = [prd] in {
10345 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10346 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10350 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10351 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10352 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10353 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10354 opcPs, OpNode, OpNodeSAE, sched, prd>,
10355 EVEX_CD8<32, CD8VF>;
10356 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10357 opcPd, OpNode, OpNodeSAE, sched, prd>,
10358 EVEX_CD8<64, CD8VF>, VEX_W;
10361 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10362 X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
10363 AVX512AIi8Base, EVEX;
10364 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10365 X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
10366 AVX512AIi8Base, EVEX;
10367 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10368 X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
10369 AVX512AIi8Base, EVEX;
10371 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10372 0x50, X86VRange, X86VRangeSAE,
10373 SchedWriteFAdd, HasDQI>,
10374 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10375 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10376 0x50, X86VRange, X86VRangeSAE,
10377 SchedWriteFAdd, HasDQI>,
10378 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10380 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10381 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10382 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10383 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10384 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10385 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10387 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10388 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10389 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10390 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10391 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10392 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10394 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10395 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10396 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10397 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10398 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10399 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10401 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10402 X86FoldableSchedWrite sched,
10404 X86VectorVTInfo CastInfo,
10405 string EVEX2VEXOvrd> {
10406 let ExeDomain = _.ExeDomain in {
10407 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10408 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10409 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10411 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10412 (i8 imm:$src3)))))>,
10413 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10414 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10415 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10416 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10419 (CastInfo.VT (X86Shuf128 _.RC:$src1,
10420 (CastInfo.LdFrag addr:$src2),
10421 (i8 imm:$src3)))))>,
10422 Sched<[sched.Folded, sched.ReadAfterFold]>,
10423 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10424 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10425 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10426 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10427 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10431 (X86Shuf128 _.RC:$src1,
10432 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
10433 (i8 imm:$src3)))))>, EVEX_B,
10434 Sched<[sched.Folded, sched.ReadAfterFold]>;
10438 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10439 AVX512VLVectorVTInfo _,
10440 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10441 string EVEX2VEXOvrd>{
10442 let Predicates = [HasAVX512] in
10443 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10444 _.info512, CastInfo.info512, "">, EVEX_V512;
10446 let Predicates = [HasAVX512, HasVLX] in
10447 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10448 _.info256, CastInfo.info256,
10449 EVEX2VEXOvrd>, EVEX_V256;
10452 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10453 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10454 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10455 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10456 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10457 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10458 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10459 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10461 let Predicates = [HasAVX512] in {
10462 // Provide fallback in case the load node that is used in the broadcast
10463 // patterns above is used by additional users, which prevents the pattern
10465 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10466 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10467 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10469 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10470 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10471 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10474 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10475 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10476 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10478 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10479 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10480 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10483 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10484 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10485 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10488 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10489 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10490 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10494 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10495 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10496 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10497 // instantiation of this class.
10498 let ExeDomain = _.ExeDomain in {
10499 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10500 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10501 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10502 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
10503 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10504 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10505 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10506 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10507 (_.VT (X86VAlign _.RC:$src1,
10508 (bitconvert (_.LdFrag addr:$src2)),
10510 Sched<[sched.Folded, sched.ReadAfterFold]>,
10511 EVEX2VEXOverride<"VPALIGNRrmi">;
10513 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10514 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10515 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10516 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10517 (X86VAlign _.RC:$src1,
10518 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10519 (i8 imm:$src3))>, EVEX_B,
10520 Sched<[sched.Folded, sched.ReadAfterFold]>;
10524 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10525 AVX512VLVectorVTInfo _> {
10526 let Predicates = [HasAVX512] in {
10527 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10528 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10530 let Predicates = [HasAVX512, HasVLX] in {
10531 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10532 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10533 // We can't really override the 256-bit version so change it back to unset.
10534 let EVEX2VEXOverride = ? in
10535 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10536 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10540 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10541 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10542 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10543 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10546 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10547 SchedWriteShuffle, avx512vl_i8_info,
10548 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10550 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10552 def ValignqImm32XForm : SDNodeXForm<imm, [{
10553 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10555 def ValignqImm8XForm : SDNodeXForm<imm, [{
10556 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10558 def ValigndImm8XForm : SDNodeXForm<imm, [{
10559 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10562 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10563 X86VectorVTInfo From, X86VectorVTInfo To,
10564 SDNodeXForm ImmXForm> {
10565 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10567 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10570 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10571 To.RC:$src1, To.RC:$src2,
10572 (ImmXForm imm:$src3))>;
10574 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10576 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10579 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10580 To.RC:$src1, To.RC:$src2,
10581 (ImmXForm imm:$src3))>;
10583 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10585 (From.VT (OpNode From.RC:$src1,
10586 (From.LdFrag addr:$src2),
10589 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10590 To.RC:$src1, addr:$src2,
10591 (ImmXForm imm:$src3))>;
10593 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10595 (From.VT (OpNode From.RC:$src1,
10596 (From.LdFrag addr:$src2),
10599 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10600 To.RC:$src1, addr:$src2,
10601 (ImmXForm imm:$src3))>;
10604 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10605 X86VectorVTInfo From,
10606 X86VectorVTInfo To,
10607 SDNodeXForm ImmXForm> :
10608 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10609 def : Pat<(From.VT (OpNode From.RC:$src1,
10610 (bitconvert (To.VT (X86VBroadcast
10611 (To.ScalarLdFrag addr:$src2)))),
10613 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10614 (ImmXForm imm:$src3))>;
10616 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10618 (From.VT (OpNode From.RC:$src1,
10620 (To.VT (X86VBroadcast
10621 (To.ScalarLdFrag addr:$src2)))),
10624 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10625 To.RC:$src1, addr:$src2,
10626 (ImmXForm imm:$src3))>;
10628 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10630 (From.VT (OpNode From.RC:$src1,
10632 (To.VT (X86VBroadcast
10633 (To.ScalarLdFrag addr:$src2)))),
10636 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10637 To.RC:$src1, addr:$src2,
10638 (ImmXForm imm:$src3))>;
10641 let Predicates = [HasAVX512] in {
10642 // For 512-bit we lower to the widest element type we can. So we only need
10643 // to handle converting valignq to valignd.
10644 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10645 v16i32_info, ValignqImm32XForm>;
10648 let Predicates = [HasVLX] in {
10649 // For 128-bit we lower to the widest element type we can. So we only need
10650 // to handle converting valignq to valignd.
10651 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10652 v4i32x_info, ValignqImm32XForm>;
10653 // For 256-bit we lower to the widest element type we can. So we only need
10654 // to handle converting valignq to valignd.
10655 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10656 v8i32x_info, ValignqImm32XForm>;
10659 let Predicates = [HasVLX, HasBWI] in {
10660 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10661 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10662 v16i8x_info, ValignqImm8XForm>;
10663 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10664 v16i8x_info, ValigndImm8XForm>;
10667 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10668 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10669 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10671 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10672 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10673 let ExeDomain = _.ExeDomain in {
10674 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10675 (ins _.RC:$src1), OpcodeStr,
10677 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
10680 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10681 (ins _.MemOp:$src1), OpcodeStr,
10683 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
10684 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10685 Sched<[sched.Folded]>;
10689 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10690 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10691 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10692 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10693 (ins _.ScalarMemOp:$src1), OpcodeStr,
10694 "${src1}"##_.BroadcastStr,
10695 "${src1}"##_.BroadcastStr,
10696 (_.VT (OpNode (X86VBroadcast
10697 (_.ScalarLdFrag addr:$src1))))>,
10698 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10699 Sched<[sched.Folded]>;
10702 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10703 X86SchedWriteWidths sched,
10704 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10705 let Predicates = [prd] in
10706 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10709 let Predicates = [prd, HasVLX] in {
10710 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10712 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10717 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10718 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10720 let Predicates = [prd] in
10721 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10724 let Predicates = [prd, HasVLX] in {
10725 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10727 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10732 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10733 SDNode OpNode, X86SchedWriteWidths sched,
10735 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10736 avx512vl_i64_info, prd>, VEX_W;
10737 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10738 avx512vl_i32_info, prd>;
10741 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10742 SDNode OpNode, X86SchedWriteWidths sched,
10744 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10745 avx512vl_i16_info, prd>, VEX_WIG;
10746 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10747 avx512vl_i8_info, prd>, VEX_WIG;
10750 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10751 bits<8> opc_d, bits<8> opc_q,
10752 string OpcodeStr, SDNode OpNode,
10753 X86SchedWriteWidths sched> {
10754 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10756 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10760 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10763 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10764 let Predicates = [HasAVX512, NoVLX] in {
10765 def : Pat<(v4i64 (abs VR256X:$src)),
10768 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10770 def : Pat<(v2i64 (abs VR128X:$src)),
10773 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10777 // Use 512bit version to implement 128/256 bit.
10778 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10779 AVX512VLVectorVTInfo _, Predicate prd> {
10780 let Predicates = [prd, NoVLX] in {
10781 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
10783 (!cast<Instruction>(InstrStr # "Zrr")
10784 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10785 _.info256.RC:$src1,
10786 _.info256.SubRegIdx)),
10787 _.info256.SubRegIdx)>;
10789 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
10791 (!cast<Instruction>(InstrStr # "Zrr")
10792 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10793 _.info128.RC:$src1,
10794 _.info128.SubRegIdx)),
10795 _.info128.SubRegIdx)>;
10799 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10800 SchedWriteVecIMul, HasCDI>;
10802 // FIXME: Is there a better scheduler class for VPCONFLICT?
10803 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10804 SchedWriteVecALU, HasCDI>;
10806 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10807 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10808 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10810 //===---------------------------------------------------------------------===//
10811 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10812 //===---------------------------------------------------------------------===//
10814 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10815 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10816 SchedWriteVecALU, HasVPOPCNTDQ>;
10818 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10819 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10821 //===---------------------------------------------------------------------===//
10822 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10823 //===---------------------------------------------------------------------===//
10825 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10826 X86SchedWriteWidths sched> {
10827 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10828 avx512vl_f32_info, HasAVX512>, XS;
10831 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10832 SchedWriteFShuffle>;
10833 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10834 SchedWriteFShuffle>;
10836 //===----------------------------------------------------------------------===//
10837 // AVX-512 - MOVDDUP
10838 //===----------------------------------------------------------------------===//
10840 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
10841 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10842 let ExeDomain = _.ExeDomain in {
10843 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10844 (ins _.RC:$src), OpcodeStr, "$src", "$src",
10845 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
10847 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10848 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10849 (_.VT (OpNode (_.VT (scalar_to_vector
10850 (_.ScalarLdFrag addr:$src)))))>,
10851 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10852 Sched<[sched.Folded]>;
10856 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10857 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10858 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10859 VTInfo.info512>, EVEX_V512;
10861 let Predicates = [HasAVX512, HasVLX] in {
10862 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10863 VTInfo.info256>, EVEX_V256;
10864 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
10865 VTInfo.info128>, EVEX_V128;
10869 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10870 X86SchedWriteWidths sched> {
10871 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10872 avx512vl_f64_info>, XD, VEX_W;
10875 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10877 let Predicates = [HasVLX] in {
10878 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
10879 (VMOVDDUPZ128rm addr:$src)>;
10880 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10881 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10882 def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10883 (VMOVDDUPZ128rm addr:$src)>;
10884 def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
10885 (VMOVDDUPZ128rm addr:$src)>;
10887 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10888 (v2f64 VR128X:$src0)),
10889 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10890 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10891 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10893 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10895 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10896 (v2f64 VR128X:$src0)),
10897 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10898 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10900 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10902 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10903 (v2f64 VR128X:$src0)),
10904 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10905 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10907 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10910 //===----------------------------------------------------------------------===//
10911 // AVX-512 - Unpack Instructions
10912 //===----------------------------------------------------------------------===//
10914 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10915 SchedWriteFShuffleSizes, 0, 1>;
10916 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10917 SchedWriteFShuffleSizes>;
10919 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10920 SchedWriteShuffle, HasBWI>;
10921 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10922 SchedWriteShuffle, HasBWI>;
10923 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10924 SchedWriteShuffle, HasBWI>;
10925 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10926 SchedWriteShuffle, HasBWI>;
10928 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10929 SchedWriteShuffle, HasAVX512>;
10930 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10931 SchedWriteShuffle, HasAVX512>;
10932 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10933 SchedWriteShuffle, HasAVX512>;
10934 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10935 SchedWriteShuffle, HasAVX512>;
10937 //===----------------------------------------------------------------------===//
10938 // AVX-512 - Extract & Insert Integer Instructions
10939 //===----------------------------------------------------------------------===//
10941 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10942 X86VectorVTInfo _> {
10943 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10944 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10945 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10946 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10948 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10951 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10952 let Predicates = [HasBWI] in {
10953 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10954 (ins _.RC:$src1, u8imm:$src2),
10955 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10956 [(set GR32orGR64:$dst,
10957 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10958 EVEX, TAPD, Sched<[WriteVecExtract]>;
10960 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10964 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10965 let Predicates = [HasBWI] in {
10966 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10967 (ins _.RC:$src1, u8imm:$src2),
10968 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10969 [(set GR32orGR64:$dst,
10970 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10971 EVEX, PD, Sched<[WriteVecExtract]>;
10973 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10974 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10975 (ins _.RC:$src1, u8imm:$src2),
10976 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10977 EVEX, TAPD, FoldGenData<NAME#rr>,
10978 Sched<[WriteVecExtract]>;
10980 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10984 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10985 RegisterClass GRC> {
10986 let Predicates = [HasDQI] in {
10987 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10988 (ins _.RC:$src1, u8imm:$src2),
10989 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10991 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10992 EVEX, TAPD, Sched<[WriteVecExtract]>;
10994 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10995 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10996 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10997 [(store (extractelt (_.VT _.RC:$src1),
10998 imm:$src2),addr:$dst)]>,
10999 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
11000 Sched<[WriteVecExtractSt]>;
11004 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
11005 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
11006 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11007 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
11009 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11010 X86VectorVTInfo _, PatFrag LdFrag> {
11011 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11012 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11013 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11015 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
11016 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11019 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11020 X86VectorVTInfo _, PatFrag LdFrag> {
11021 let Predicates = [HasBWI] in {
11022 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11023 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11024 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11026 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
11027 Sched<[WriteVecInsert]>;
11029 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
11033 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11034 X86VectorVTInfo _, RegisterClass GRC> {
11035 let Predicates = [HasDQI] in {
11036 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11037 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11038 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11040 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11041 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11043 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11044 _.ScalarLdFrag>, TAPD;
11048 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11049 extloadi8>, TAPD, VEX_WIG;
11050 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11051 extloadi16>, PD, VEX_WIG;
11052 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11053 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11055 //===----------------------------------------------------------------------===//
11056 // VSHUFPS - VSHUFPD Operations
11057 //===----------------------------------------------------------------------===//
11059 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11060 AVX512VLVectorVTInfo VTInfo_FP>{
11061 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11062 SchedWriteFShuffle>,
11063 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11064 AVX512AIi8Base, EVEX_4V;
11067 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11068 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11070 //===----------------------------------------------------------------------===//
11071 // AVX-512 - Byte shift Left/Right
11072 //===----------------------------------------------------------------------===//
11074 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
11075 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11076 Format MRMm, string OpcodeStr,
11077 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11078 def rr : AVX512<opc, MRMr,
11079 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11080 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11081 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
11083 def rm : AVX512<opc, MRMm,
11084 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11085 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11086 [(set _.RC:$dst,(_.VT (OpNode
11087 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11088 (i8 imm:$src2))))]>,
11089 Sched<[sched.Folded, sched.ReadAfterFold]>;
11092 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11093 Format MRMm, string OpcodeStr,
11094 X86SchedWriteWidths sched, Predicate prd>{
11095 let Predicates = [prd] in
11096 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11097 sched.ZMM, v64i8_info>, EVEX_V512;
11098 let Predicates = [prd, HasVLX] in {
11099 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11100 sched.YMM, v32i8x_info>, EVEX_V256;
11101 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11102 sched.XMM, v16i8x_info>, EVEX_V128;
11105 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11106 SchedWriteShuffle, HasBWI>,
11107 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11108 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11109 SchedWriteShuffle, HasBWI>,
11110 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11112 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11113 string OpcodeStr, X86FoldableSchedWrite sched,
11114 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11115 def rr : AVX512BI<opc, MRMSrcReg,
11116 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11117 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11118 [(set _dst.RC:$dst,(_dst.VT
11119 (OpNode (_src.VT _src.RC:$src1),
11120 (_src.VT _src.RC:$src2))))]>,
11122 def rm : AVX512BI<opc, MRMSrcMem,
11123 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11124 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11125 [(set _dst.RC:$dst,(_dst.VT
11126 (OpNode (_src.VT _src.RC:$src1),
11127 (_src.VT (bitconvert
11128 (_src.LdFrag addr:$src2))))))]>,
11129 Sched<[sched.Folded, sched.ReadAfterFold]>;
11132 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11133 string OpcodeStr, X86SchedWriteWidths sched,
11135 let Predicates = [prd] in
11136 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11137 v8i64_info, v64i8_info>, EVEX_V512;
11138 let Predicates = [prd, HasVLX] in {
11139 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11140 v4i64x_info, v32i8x_info>, EVEX_V256;
11141 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11142 v2i64x_info, v16i8x_info>, EVEX_V128;
11146 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11147 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11149 // Transforms to swizzle an immediate to enable better matching when
11150 // memory operand isn't in the right place.
11151 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
11152 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11153 uint8_t Imm = N->getZExtValue();
11154 // Swap bits 1/4 and 3/6.
11155 uint8_t NewImm = Imm & 0xa5;
11156 if (Imm & 0x02) NewImm |= 0x10;
11157 if (Imm & 0x10) NewImm |= 0x02;
11158 if (Imm & 0x08) NewImm |= 0x40;
11159 if (Imm & 0x40) NewImm |= 0x08;
11160 return getI8Imm(NewImm, SDLoc(N));
11162 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
11163 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11164 uint8_t Imm = N->getZExtValue();
11165 // Swap bits 2/4 and 3/5.
11166 uint8_t NewImm = Imm & 0xc3;
11167 if (Imm & 0x04) NewImm |= 0x10;
11168 if (Imm & 0x10) NewImm |= 0x04;
11169 if (Imm & 0x08) NewImm |= 0x20;
11170 if (Imm & 0x20) NewImm |= 0x08;
11171 return getI8Imm(NewImm, SDLoc(N));
11173 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11174 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11175 uint8_t Imm = N->getZExtValue();
11176 // Swap bits 1/2 and 5/6.
11177 uint8_t NewImm = Imm & 0x99;
11178 if (Imm & 0x02) NewImm |= 0x04;
11179 if (Imm & 0x04) NewImm |= 0x02;
11180 if (Imm & 0x20) NewImm |= 0x40;
11181 if (Imm & 0x40) NewImm |= 0x20;
11182 return getI8Imm(NewImm, SDLoc(N));
11184 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11185 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11186 uint8_t Imm = N->getZExtValue();
11187 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11188 uint8_t NewImm = Imm & 0x81;
11189 if (Imm & 0x02) NewImm |= 0x04;
11190 if (Imm & 0x04) NewImm |= 0x10;
11191 if (Imm & 0x08) NewImm |= 0x40;
11192 if (Imm & 0x10) NewImm |= 0x02;
11193 if (Imm & 0x20) NewImm |= 0x08;
11194 if (Imm & 0x40) NewImm |= 0x20;
11195 return getI8Imm(NewImm, SDLoc(N));
11197 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11198 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11199 uint8_t Imm = N->getZExtValue();
11200 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11201 uint8_t NewImm = Imm & 0x81;
11202 if (Imm & 0x02) NewImm |= 0x10;
11203 if (Imm & 0x04) NewImm |= 0x02;
11204 if (Imm & 0x08) NewImm |= 0x20;
11205 if (Imm & 0x10) NewImm |= 0x04;
11206 if (Imm & 0x20) NewImm |= 0x40;
11207 if (Imm & 0x40) NewImm |= 0x08;
11208 return getI8Imm(NewImm, SDLoc(N));
11211 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11212 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11214 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11215 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11216 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11217 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11218 (OpNode (_.VT _.RC:$src1),
11221 (i8 imm:$src4)), 1, 1>,
11222 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11223 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11224 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11225 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11226 (OpNode (_.VT _.RC:$src1),
11228 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11229 (i8 imm:$src4)), 1, 0>,
11230 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11231 Sched<[sched.Folded, sched.ReadAfterFold]>;
11232 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11233 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11234 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11235 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11236 (OpNode (_.VT _.RC:$src1),
11238 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11239 (i8 imm:$src4)), 1, 0>, EVEX_B,
11240 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11241 Sched<[sched.Folded, sched.ReadAfterFold]>;
11242 }// Constraints = "$src1 = $dst"
11244 // Additional patterns for matching passthru operand in other positions.
11245 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11246 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11248 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11249 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11250 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11251 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11253 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11254 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11256 // Additional patterns for matching loads in other positions.
11257 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11258 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11259 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11260 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11261 def : Pat<(_.VT (OpNode _.RC:$src1,
11262 (bitconvert (_.LdFrag addr:$src3)),
11263 _.RC:$src2, (i8 imm:$src4))),
11264 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11265 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11267 // Additional patterns for matching zero masking with loads in other
11269 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11270 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11271 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11273 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11274 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11275 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11276 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11277 _.RC:$src2, (i8 imm:$src4)),
11279 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11280 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11282 // Additional patterns for matching masked loads with different
11284 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11285 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11286 _.RC:$src2, (i8 imm:$src4)),
11288 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11289 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11290 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11291 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11292 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11294 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11295 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11296 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11297 (OpNode _.RC:$src2, _.RC:$src1,
11298 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11300 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11301 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11302 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11303 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11304 _.RC:$src1, (i8 imm:$src4)),
11306 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11307 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11308 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11309 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11310 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11312 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11313 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11315 // Additional patterns for matching broadcasts in other positions.
11316 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11317 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11318 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11319 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11320 def : Pat<(_.VT (OpNode _.RC:$src1,
11321 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11322 _.RC:$src2, (i8 imm:$src4))),
11323 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11324 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11326 // Additional patterns for matching zero masking with broadcasts in other
11328 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11329 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11330 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11332 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11333 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11334 (VPTERNLOG321_imm8 imm:$src4))>;
11335 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11336 (OpNode _.RC:$src1,
11337 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11338 _.RC:$src2, (i8 imm:$src4)),
11340 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11341 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11342 (VPTERNLOG132_imm8 imm:$src4))>;
11344 // Additional patterns for matching masked broadcasts with different
11346 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11347 (OpNode _.RC:$src1,
11348 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11349 _.RC:$src2, (i8 imm:$src4)),
11351 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11352 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11353 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11354 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11355 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11357 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11358 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11359 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11360 (OpNode _.RC:$src2, _.RC:$src1,
11361 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11362 (i8 imm:$src4)), _.RC:$src1)),
11363 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11364 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11365 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11366 (OpNode _.RC:$src2,
11367 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11368 _.RC:$src1, (i8 imm:$src4)),
11370 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11371 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11372 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11373 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11374 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11376 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11377 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11380 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11381 AVX512VLVectorVTInfo _> {
11382 let Predicates = [HasAVX512] in
11383 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11384 _.info512, NAME>, EVEX_V512;
11385 let Predicates = [HasAVX512, HasVLX] in {
11386 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11387 _.info128, NAME>, EVEX_V128;
11388 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11389 _.info256, NAME>, EVEX_V256;
11393 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11394 avx512vl_i32_info>;
11395 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11396 avx512vl_i64_info>, VEX_W;
11398 // Patterns to implement vnot using vpternlog instead of creating all ones
11399 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11400 // so that the result is only dependent on src0. But we use the same source
11401 // for all operands to prevent a false dependency.
11402 // TODO: We should maybe have a more generalized algorithm for folding to
11404 let Predicates = [HasAVX512] in {
11405 def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11406 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11407 def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11408 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11409 def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11410 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11411 def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11412 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11415 let Predicates = [HasAVX512, NoVLX] in {
11416 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11419 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11420 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11421 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11422 (i8 15)), sub_xmm)>;
11423 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11426 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11427 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11428 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11429 (i8 15)), sub_xmm)>;
11430 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11433 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11434 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11435 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11436 (i8 15)), sub_xmm)>;
11437 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11440 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11441 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11442 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11443 (i8 15)), sub_xmm)>;
11445 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11448 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11449 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11450 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11451 (i8 15)), sub_ymm)>;
11452 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11455 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11456 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11457 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11458 (i8 15)), sub_ymm)>;
11459 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11462 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11463 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11464 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11465 (i8 15)), sub_ymm)>;
11466 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11469 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11470 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11471 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11472 (i8 15)), sub_ymm)>;
11475 let Predicates = [HasVLX] in {
11476 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11477 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11478 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11479 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11480 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11481 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11482 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11483 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11485 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11486 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11487 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11488 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11489 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11490 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11491 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11492 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11495 //===----------------------------------------------------------------------===//
11496 // AVX-512 - FixupImm
11497 //===----------------------------------------------------------------------===//
11499 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11500 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11501 X86VectorVTInfo TblVT>{
11502 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11503 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11504 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11505 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11506 (X86VFixupimm (_.VT _.RC:$src1),
11508 (TblVT.VT _.RC:$src3),
11509 (i32 imm:$src4))>, Sched<[sched]>;
11510 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11511 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11512 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11513 (X86VFixupimm (_.VT _.RC:$src1),
11515 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11517 Sched<[sched.Folded, sched.ReadAfterFold]>;
11518 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11519 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11520 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11521 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11522 (X86VFixupimm (_.VT _.RC:$src1),
11524 (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
11526 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11527 } // Constraints = "$src1 = $dst"
11530 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11531 X86FoldableSchedWrite sched,
11532 X86VectorVTInfo _, X86VectorVTInfo TblVT>
11533 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11534 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11535 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11536 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11537 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11538 "$src2, $src3, {sae}, $src4",
11539 (X86VFixupimmSAE (_.VT _.RC:$src1),
11541 (TblVT.VT _.RC:$src3),
11543 EVEX_B, Sched<[sched]>;
11547 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11548 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11549 X86VectorVTInfo _src3VT> {
11550 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11551 ExeDomain = _.ExeDomain in {
11552 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11553 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11554 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11555 (X86VFixupimms (_.VT _.RC:$src1),
11557 (_src3VT.VT _src3VT.RC:$src3),
11558 (i32 imm:$src4))>, Sched<[sched]>;
11559 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11560 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11561 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11562 "$src2, $src3, {sae}, $src4",
11563 (X86VFixupimmSAEs (_.VT _.RC:$src1),
11565 (_src3VT.VT _src3VT.RC:$src3),
11567 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11568 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11569 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11570 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11571 (X86VFixupimms (_.VT _.RC:$src1),
11573 (_src3VT.VT (scalar_to_vector
11574 (_src3VT.ScalarLdFrag addr:$src3))),
11576 Sched<[sched.Folded, sched.ReadAfterFold]>;
11580 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11581 AVX512VLVectorVTInfo _Vec,
11582 AVX512VLVectorVTInfo _Tbl> {
11583 let Predicates = [HasAVX512] in
11584 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11585 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11586 EVEX_4V, EVEX_V512;
11587 let Predicates = [HasAVX512, HasVLX] in {
11588 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11589 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11590 EVEX_4V, EVEX_V128;
11591 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11592 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11593 EVEX_4V, EVEX_V256;
11597 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11598 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11599 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11600 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11601 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11602 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11603 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11604 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11605 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11606 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11608 // Patterns used to select SSE scalar fp arithmetic instructions from
11611 // (1) a scalar fp operation followed by a blend
11613 // The effect is that the backend no longer emits unnecessary vector
11614 // insert instructions immediately after SSE scalar fp instructions
11615 // like addss or mulss.
11617 // For example, given the following code:
11618 // __m128 foo(__m128 A, __m128 B) {
11623 // Previously we generated:
11624 // addss %xmm0, %xmm1
11625 // movss %xmm1, %xmm0
11627 // We now generate:
11628 // addss %xmm1, %xmm0
11630 // (2) a vector packed single/double fp operation followed by a vector insert
11632 // The effect is that the backend converts the packed fp instruction
11633 // followed by a vector insert into a single SSE scalar fp instruction.
11635 // For example, given the following code:
11636 // __m128 foo(__m128 A, __m128 B) {
11637 // __m128 C = A + B;
11638 // return (__m128) {c[0], a[1], a[2], a[3]};
11641 // Previously we generated:
11642 // addps %xmm0, %xmm1
11643 // movss %xmm1, %xmm0
11645 // We now generate:
11646 // addss %xmm1, %xmm0
11648 // TODO: Some canonicalization in lowering would simplify the number of
11649 // patterns we have to try to match.
11650 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11651 X86VectorVTInfo _, PatLeaf ZeroFP> {
11652 let Predicates = [HasAVX512] in {
11653 // extracted scalar math op with insert via movss
11654 def : Pat<(MoveNode
11655 (_.VT VR128X:$dst),
11656 (_.VT (scalar_to_vector
11657 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11659 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11660 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11661 def : Pat<(MoveNode
11662 (_.VT VR128X:$dst),
11663 (_.VT (scalar_to_vector
11664 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11665 (_.ScalarLdFrag addr:$src))))),
11666 (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11668 // extracted masked scalar math op with insert via movss
11669 def : Pat<(MoveNode (_.VT VR128X:$src1),
11671 (X86selects VK1WM:$mask,
11673 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11676 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11677 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11678 VK1WM:$mask, _.VT:$src1,
11679 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11680 def : Pat<(MoveNode (_.VT VR128X:$src1),
11682 (X86selects VK1WM:$mask,
11684 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11685 (_.ScalarLdFrag addr:$src2)),
11687 (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11688 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11689 VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11691 // extracted masked scalar math op with insert via movss
11692 def : Pat<(MoveNode (_.VT VR128X:$src1),
11694 (X86selects VK1WM:$mask,
11696 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11697 _.FRC:$src2), (_.EltVT ZeroFP)))),
11698 (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11699 VK1WM:$mask, _.VT:$src1,
11700 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11701 def : Pat<(MoveNode (_.VT VR128X:$src1),
11703 (X86selects VK1WM:$mask,
11705 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11706 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11707 (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11711 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11712 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11713 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11714 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11716 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11717 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11718 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11719 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11721 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11722 SDNode Move, X86VectorVTInfo _> {
11723 let Predicates = [HasAVX512] in {
11724 def : Pat<(_.VT (Move _.VT:$dst,
11725 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11726 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11730 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11731 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11733 //===----------------------------------------------------------------------===//
11734 // AES instructions
11735 //===----------------------------------------------------------------------===//
11737 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11738 let Predicates = [HasVLX, HasVAES] in {
11739 defm Z128 : AESI_binop_rm_int<Op, OpStr,
11740 !cast<Intrinsic>(IntPrefix),
11741 loadv2i64, 0, VR128X, i128mem>,
11742 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11743 defm Z256 : AESI_binop_rm_int<Op, OpStr,
11744 !cast<Intrinsic>(IntPrefix##"_256"),
11745 loadv4i64, 0, VR256X, i256mem>,
11746 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11748 let Predicates = [HasAVX512, HasVAES] in
11749 defm Z : AESI_binop_rm_int<Op, OpStr,
11750 !cast<Intrinsic>(IntPrefix##"_512"),
11751 loadv8i64, 0, VR512, i512mem>,
11752 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11755 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11756 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11757 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11758 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11760 //===----------------------------------------------------------------------===//
11761 // PCLMUL instructions - Carry less multiplication
11762 //===----------------------------------------------------------------------===//
11764 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11765 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11766 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11768 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11769 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11770 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11772 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11773 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11774 EVEX_CD8<64, CD8VF>, VEX_WIG;
11778 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11779 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11780 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11782 //===----------------------------------------------------------------------===//
11784 //===----------------------------------------------------------------------===//
11786 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11787 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11788 let Constraints = "$src1 = $dst",
11789 ExeDomain = VTI.ExeDomain in {
11790 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11791 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11792 "$src3, $src2", "$src2, $src3",
11793 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11794 AVX512FMA3Base, Sched<[sched]>;
11795 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11796 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11797 "$src3, $src2", "$src2, $src3",
11798 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11799 (VTI.VT (VTI.LdFrag addr:$src3))))>,
11801 Sched<[sched.Folded, sched.ReadAfterFold]>;
11805 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11806 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11807 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11808 let Constraints = "$src1 = $dst",
11809 ExeDomain = VTI.ExeDomain in
11810 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11811 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11812 "${src3}"##VTI.BroadcastStr##", $src2",
11813 "$src2, ${src3}"##VTI.BroadcastStr,
11814 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11815 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
11816 AVX512FMA3Base, EVEX_B,
11817 Sched<[sched.Folded, sched.ReadAfterFold]>;
11820 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11821 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11822 let Predicates = [HasVBMI2] in
11823 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11825 let Predicates = [HasVBMI2, HasVLX] in {
11826 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11828 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11833 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11834 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11835 let Predicates = [HasVBMI2] in
11836 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11838 let Predicates = [HasVBMI2, HasVLX] in {
11839 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11841 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11845 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11846 SDNode OpNode, X86SchedWriteWidths sched> {
11847 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11848 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11849 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11850 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11851 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11852 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11855 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11856 SDNode OpNode, X86SchedWriteWidths sched> {
11857 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11858 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11859 VEX_W, EVEX_CD8<16, CD8VF>;
11860 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11861 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11862 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11863 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11867 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11868 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11869 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11870 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11873 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11874 avx512vl_i8_info, HasVBMI2>, EVEX,
11876 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11877 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11880 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11881 avx512vl_i8_info, HasVBMI2>, EVEX;
11882 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11883 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11885 //===----------------------------------------------------------------------===//
11887 //===----------------------------------------------------------------------===//
11889 let Constraints = "$src1 = $dst" in
11890 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11891 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11892 bit IsCommutable> {
11893 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11894 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11895 "$src3, $src2", "$src2, $src3",
11896 (VTI.VT (OpNode VTI.RC:$src1,
11897 VTI.RC:$src2, VTI.RC:$src3)),
11898 IsCommutable, IsCommutable>,
11899 EVEX_4V, T8PD, Sched<[sched]>;
11900 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11901 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11902 "$src3, $src2", "$src2, $src3",
11903 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11904 (VTI.VT (VTI.LdFrag addr:$src3))))>,
11905 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11906 Sched<[sched.Folded, sched.ReadAfterFold]>;
11907 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11908 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11909 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11910 "$src2, ${src3}"##VTI.BroadcastStr,
11911 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11912 (VTI.VT (X86VBroadcast
11913 (VTI.ScalarLdFrag addr:$src3))))>,
11914 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11915 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
11918 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11919 X86SchedWriteWidths sched, bit IsCommutable> {
11920 let Predicates = [HasVNNI] in
11921 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
11922 IsCommutable>, EVEX_V512;
11923 let Predicates = [HasVNNI, HasVLX] in {
11924 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
11925 IsCommutable>, EVEX_V256;
11926 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
11927 IsCommutable>, EVEX_V128;
11931 // FIXME: Is there a better scheduler class for VPDP?
11932 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
11933 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
11934 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
11935 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
11937 //===----------------------------------------------------------------------===//
11939 //===----------------------------------------------------------------------===//
11941 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11942 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11943 avx512vl_i8_info, HasBITALG>;
11944 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11945 avx512vl_i16_info, HasBITALG>, VEX_W;
11947 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11948 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11950 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
11951 (X86Vpshufbitqmb node:$src1, node:$src2), [{
11952 return N->hasOneUse();
11955 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11956 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11957 (ins VTI.RC:$src1, VTI.RC:$src2),
11959 "$src2, $src1", "$src1, $src2",
11960 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11961 (VTI.VT VTI.RC:$src2)),
11962 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11963 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11965 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11966 (ins VTI.RC:$src1, VTI.MemOp:$src2),
11968 "$src2, $src1", "$src1, $src2",
11969 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11970 (VTI.VT (VTI.LdFrag addr:$src2))),
11971 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11972 (VTI.VT (VTI.LdFrag addr:$src2)))>,
11973 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11974 Sched<[sched.Folded, sched.ReadAfterFold]>;
11977 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11978 let Predicates = [HasBITALG] in
11979 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11980 let Predicates = [HasBITALG, HasVLX] in {
11981 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11982 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11986 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11987 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11989 //===----------------------------------------------------------------------===//
11991 //===----------------------------------------------------------------------===//
11993 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11994 X86SchedWriteWidths sched> {
11995 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11996 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11998 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11999 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12001 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12006 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12008 EVEX_CD8<8, CD8VF>, T8PD;
12010 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12011 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12012 X86VectorVTInfo BcstVTI>
12013 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12014 let ExeDomain = VTI.ExeDomain in
12015 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12016 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12017 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
12018 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
12019 (OpNode (VTI.VT VTI.RC:$src1),
12020 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
12021 (i8 imm:$src3))>, EVEX_B,
12022 Sched<[sched.Folded, sched.ReadAfterFold]>;
12025 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12026 X86SchedWriteWidths sched> {
12027 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12028 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12029 v64i8_info, v8i64_info>, EVEX_V512;
12030 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12031 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12032 v32i8x_info, v4i64x_info>, EVEX_V256;
12033 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12034 v16i8x_info, v2i64x_info>, EVEX_V128;
12038 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12039 X86GF2P8affineinvqb, SchedWriteVecIMul>,
12040 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12041 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12042 X86GF2P8affineqb, SchedWriteVecIMul>,
12043 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12046 //===----------------------------------------------------------------------===//
12048 //===----------------------------------------------------------------------===//
12050 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12051 Constraints = "$src1 = $dst" in {
12052 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12053 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12054 "v4fmaddps", "$src3, $src2", "$src2, $src3",
12055 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12056 Sched<[SchedWriteFMA.ZMM.Folded]>;
12058 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12059 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12060 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12061 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12062 Sched<[SchedWriteFMA.ZMM.Folded]>;
12064 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12065 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12066 "v4fmaddss", "$src3, $src2", "$src2, $src3",
12067 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12068 Sched<[SchedWriteFMA.Scl.Folded]>;
12070 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12071 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12072 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12073 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12074 Sched<[SchedWriteFMA.Scl.Folded]>;
12077 //===----------------------------------------------------------------------===//
12079 //===----------------------------------------------------------------------===//
12081 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12082 Constraints = "$src1 = $dst" in {
12083 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12084 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12085 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12086 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12087 Sched<[SchedWriteFMA.ZMM.Folded]>;
12089 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12090 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12091 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12092 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12093 Sched<[SchedWriteFMA.ZMM.Folded]>;
12096 let hasSideEffects = 0 in {
12097 let mayStore = 1 in
12098 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12100 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12103 //===----------------------------------------------------------------------===//
12105 //===----------------------------------------------------------------------===//
12107 multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
12108 def rr : I<0x68, MRMSrcReg,
12109 (outs _.KRPC:$dst),
12110 (ins _.RC:$src1, _.RC:$src2),
12111 !strconcat("vp2intersect", _.Suffix,
12112 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12113 [(set _.KRPC:$dst, (X86vp2intersect
12114 _.RC:$src1, (_.VT _.RC:$src2)))]>,
12117 def rm : I<0x68, MRMSrcMem,
12118 (outs _.KRPC:$dst),
12119 (ins _.RC:$src1, _.MemOp:$src2),
12120 !strconcat("vp2intersect", _.Suffix,
12121 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12122 [(set _.KRPC:$dst, (X86vp2intersect
12123 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12124 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
12126 def rmb : I<0x68, MRMSrcMem,
12127 (outs _.KRPC:$dst),
12128 (ins _.RC:$src1, _.ScalarMemOp:$src2),
12129 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12130 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12131 [(set _.KRPC:$dst, (X86vp2intersect
12132 _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>,
12133 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
12136 multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
12137 let Predicates = [HasAVX512, HasVP2INTERSECT] in
12138 defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
12140 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12141 defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
12142 defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
12146 defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
12147 defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
12149 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12150 X86SchedWriteWidths sched,
12151 AVX512VLVectorVTInfo _SrcVTInfo,
12152 AVX512VLVectorVTInfo _DstVTInfo,
12153 SDNode OpNode, Predicate prd,
12154 bit IsCommutable = 0> {
12155 let Predicates = [prd] in
12156 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12157 _SrcVTInfo.info512, _DstVTInfo.info512,
12158 _SrcVTInfo.info512, IsCommutable>,
12159 EVEX_V512, EVEX_CD8<32, CD8VF>;
12160 let Predicates = [HasVLX, prd] in {
12161 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12162 _SrcVTInfo.info256, _DstVTInfo.info256,
12163 _SrcVTInfo.info256, IsCommutable>,
12164 EVEX_V256, EVEX_CD8<32, CD8VF>;
12165 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12166 _SrcVTInfo.info128, _DstVTInfo.info128,
12167 _SrcVTInfo.info128, IsCommutable>,
12168 EVEX_V128, EVEX_CD8<32, CD8VF>;
12172 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12173 SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
12174 avx512vl_f32_info, avx512vl_i16_info,
12175 X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12177 // Truncate Float to BFloat16
12178 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12179 X86SchedWriteWidths sched> {
12180 let Predicates = [HasBF16] in {
12181 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12182 X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12184 let Predicates = [HasBF16, HasVLX] in {
12185 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12186 null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12188 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12190 sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12192 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12193 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12195 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12196 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12197 f128mem:$src), 0, "intel">;
12198 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12199 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12201 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12202 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12203 f256mem:$src), 0, "intel">;
12207 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12208 SchedWriteCvtPD2PS>, T8XS,
12209 EVEX_CD8<32, CD8VF>;
12211 let Predicates = [HasBF16, HasVLX] in {
12212 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12213 // patterns have been disabled with null_frag.
12214 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12215 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12216 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12218 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12219 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12221 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12223 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12224 (VCVTNEPS2BF16Z128rm addr:$src)>;
12225 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12227 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12228 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12230 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12232 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12233 (X86VBroadcast (loadf32 addr:$src))))),
12234 (VCVTNEPS2BF16Z128rmb addr:$src)>;
12235 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
12236 (v8i16 VR128X:$src0), VK4WM:$mask),
12237 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12238 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
12239 v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12240 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12243 let Constraints = "$src1 = $dst" in {
12244 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12245 X86VectorVTInfo _, X86VectorVTInfo src_v> {
12246 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12247 (ins _.RC:$src2, _.RC:$src3),
12248 OpcodeStr, "$src3, $src2", "$src2, $src3",
12249 (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
12252 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12253 (ins _.RC:$src2, _.MemOp:$src3),
12254 OpcodeStr, "$src3, $src2", "$src2, $src3",
12255 (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12256 (src_v.VT (bitconvert
12257 (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
12259 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12260 (ins _.RC:$src2, _.ScalarMemOp:$src3),
12262 !strconcat("${src3}", _.BroadcastStr,", $src2"),
12263 !strconcat("$src2, ${src3}", _.BroadcastStr),
12264 (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12265 (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>,
12269 } // Constraints = "$src1 = $dst"
12271 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12272 AVX512VLVectorVTInfo _,
12273 AVX512VLVectorVTInfo src_v, Predicate prd> {
12274 let Predicates = [prd] in {
12275 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
12276 src_v.info512>, EVEX_V512;
12278 let Predicates = [HasVLX, prd] in {
12279 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
12280 src_v.info256>, EVEX_V256;
12281 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
12282 src_v.info128>, EVEX_V128;
12286 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
12287 avx512vl_f32_info, avx512vl_i32_info,
12288 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;