1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT). These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22 RegisterClass RC = rc;
23 ValueType EltVT = eltvt;
24 int NumElts = numelts;
26 // Corresponding mask register class.
27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29 // Corresponding mask register pair class.
30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31 !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33 // Corresponding write-mask register class.
34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39 // Suffix used in the instruction mnemonic.
40 string Suffix = suffix;
42 // VTName is a string name for vector VT. For vector types it will be
43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44 // It is a little bit complex for scalar types, where NumElts = 1.
45 // In this case we build v4f32 or v2f64
46 string VTName = "v" # !if (!eq (NumElts, 1),
47 !if (!eq (EltVT.Size, 32), 4,
48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
51 ValueType VT = !cast<ValueType>(VTName);
53 string EltTypeName = !cast<string>(EltVT);
54 // Size of the element type in bits, e.g. 32 for v16i32.
55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56 int EltSize = EltVT.Size;
58 // "i" for integer types and "f" for floating-point types
59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61 // Size of RC in bits, e.g. 512 for VR512.
64 // The corresponding memory operand, e.g. i512mem for VR512.
65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67 // FP scalar memory operand for intrinsics - ssmem/sdmem.
68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
78 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
79 !cast<ComplexPattern>("sse_load_f32"),
80 !if (!eq (EltTypeName, "f64"),
81 !cast<ComplexPattern>("sse_load_f64"),
84 // The string to specify embedded broadcast in assembly.
85 string BroadcastStr = "{1to" # NumElts # "}";
87 // 8-bit compressed displacement tuple/subvector format. This is only
88 // defined for NumElts <= 8.
89 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
90 !cast<CD8VForm>("CD8VT" # NumElts), ?);
92 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
93 !if (!eq (Size, 256), sub_ymm, ?));
95 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
96 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
99 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
101 dag ImmAllZerosV = (VT immAllZerosV);
103 string ZSuffix = !if (!eq (Size, 128), "Z128",
104 !if (!eq (Size, 256), "Z256", "Z"));
107 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
108 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
109 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
110 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
111 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
112 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
114 // "x" in v32i8x_info means RC = VR256X
115 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
116 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
117 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
118 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
119 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
120 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
122 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
123 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
124 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
125 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
126 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
127 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
129 // We map scalar types to the smallest (128-bit) vector type
130 // with the appropriate element type. This allows to use the same masking logic.
131 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
132 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
133 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
134 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
136 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
137 X86VectorVTInfo i128> {
138 X86VectorVTInfo info512 = i512;
139 X86VectorVTInfo info256 = i256;
140 X86VectorVTInfo info128 = i128;
143 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
156 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158 RegisterClass KRC = _krc;
159 RegisterClass KRCWM = _krcwm;
163 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
164 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
165 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
166 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
167 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
168 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
169 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171 // This multiclass generates the masking variants from the non-masking
172 // variant. It only provides the assembly pieces for the masking variants.
173 // It assumes custom ISel patterns for masking which can be provided as
174 // template arguments.
175 multiclass AVX512_maskable_custom<bits<8> O, Format F,
177 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
179 string AttSrcAsm, string IntelSrcAsm,
181 list<dag> MaskingPattern,
182 list<dag> ZeroMaskingPattern,
183 string MaskingConstraint = "",
184 bit IsCommutable = 0,
185 bit IsKCommutable = 0,
186 bit IsKZCommutable = IsCommutable> {
187 let isCommutable = IsCommutable in
188 def NAME: AVX512<O, F, Outs, Ins,
189 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
190 "$dst, "#IntelSrcAsm#"}",
193 // Prefer over VMOV*rrk Pat<>
194 let isCommutable = IsKCommutable in
195 def NAME#k: AVX512<O, F, Outs, MaskingIns,
196 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
197 "$dst {${mask}}, "#IntelSrcAsm#"}",
200 // In case of the 3src subclass this is overridden with a let.
201 string Constraints = MaskingConstraint;
204 // Zero mask does not add any restrictions to commute operands transformation.
205 // So, it is Ok to use IsCommutable instead of IsKCommutable.
206 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
207 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
208 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
209 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
215 // Common base class of AVX512_maskable and AVX512_maskable_3src.
216 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
218 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
220 string AttSrcAsm, string IntelSrcAsm,
221 dag RHS, dag MaskingRHS,
222 SDNode Select = vselect,
223 string MaskingConstraint = "",
224 bit IsCommutable = 0,
225 bit IsKCommutable = 0,
226 bit IsKZCommutable = IsCommutable> :
227 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
228 AttSrcAsm, IntelSrcAsm,
229 [(set _.RC:$dst, RHS)],
230 [(set _.RC:$dst, MaskingRHS)],
232 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
233 MaskingConstraint, IsCommutable,
234 IsKCommutable, IsKZCommutable>;
236 // This multiclass generates the unconditional/non-masking, the masking and
237 // the zero-masking variant of the vector instruction. In the masking case, the
238 // perserved vector elements come from a new dummy input operand tied to $dst.
239 // This version uses a separate dag for non-masking and masking.
240 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
241 dag Outs, dag Ins, string OpcodeStr,
242 string AttSrcAsm, string IntelSrcAsm,
243 dag RHS, dag MaskRHS,
244 bit IsCommutable = 0, bit IsKCommutable = 0,
245 SDNode Select = vselect> :
246 AVX512_maskable_custom<O, F, Outs, Ins,
247 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
248 !con((ins _.KRCWM:$mask), Ins),
249 OpcodeStr, AttSrcAsm, IntelSrcAsm,
250 [(set _.RC:$dst, RHS)],
252 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
254 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
255 "$src0 = $dst", IsCommutable, IsKCommutable>;
257 // This multiclass generates the unconditional/non-masking, the masking and
258 // the zero-masking variant of the vector instruction. In the masking case, the
259 // perserved vector elements come from a new dummy input operand tied to $dst.
260 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
261 dag Outs, dag Ins, string OpcodeStr,
262 string AttSrcAsm, string IntelSrcAsm,
264 bit IsCommutable = 0, bit IsKCommutable = 0,
265 bit IsKZCommutable = IsCommutable,
266 SDNode Select = vselect> :
267 AVX512_maskable_common<O, F, _, Outs, Ins,
268 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
269 !con((ins _.KRCWM:$mask), Ins),
270 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
271 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
272 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
275 // This multiclass generates the unconditional/non-masking, the masking and
276 // the zero-masking variant of the scalar instruction.
277 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
278 dag Outs, dag Ins, string OpcodeStr,
279 string AttSrcAsm, string IntelSrcAsm,
281 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
282 RHS, 0, 0, 0, X86selects>;
284 // Similar to AVX512_maskable but in this case one of the source operands
285 // ($src1) is already tied to $dst so we just use that for the preserved
286 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
288 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
289 dag Outs, dag NonTiedIns, string OpcodeStr,
290 string AttSrcAsm, string IntelSrcAsm,
292 bit IsCommutable = 0,
293 bit IsKCommutable = 0,
294 SDNode Select = vselect,
296 AVX512_maskable_common<O, F, _, Outs,
297 !con((ins _.RC:$src1), NonTiedIns),
298 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
299 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300 OpcodeStr, AttSrcAsm, IntelSrcAsm,
301 !if(MaskOnly, (null_frag), RHS),
302 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
303 Select, "", IsCommutable, IsKCommutable>;
305 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
306 // operand differs from the output VT. This requires a bitconvert on
307 // the preserved vector going into the vselect.
308 // NOTE: The unmasked pattern is disabled.
309 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
310 X86VectorVTInfo InVT,
311 dag Outs, dag NonTiedIns, string OpcodeStr,
312 string AttSrcAsm, string IntelSrcAsm,
313 dag RHS, bit IsCommutable = 0> :
314 AVX512_maskable_common<O, F, OutVT, Outs,
315 !con((ins InVT.RC:$src1), NonTiedIns),
316 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
317 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
319 (vselect InVT.KRCWM:$mask, RHS,
320 (bitconvert InVT.RC:$src1)),
321 vselect, "", IsCommutable>;
323 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
324 dag Outs, dag NonTiedIns, string OpcodeStr,
325 string AttSrcAsm, string IntelSrcAsm,
327 bit IsCommutable = 0,
328 bit IsKCommutable = 0,
330 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
331 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
332 X86selects, MaskOnly>;
334 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
337 string AttSrcAsm, string IntelSrcAsm,
339 AVX512_maskable_custom<O, F, Outs, Ins,
340 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
341 !con((ins _.KRCWM:$mask), Ins),
342 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
345 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
346 dag Outs, dag NonTiedIns,
348 string AttSrcAsm, string IntelSrcAsm,
350 AVX512_maskable_custom<O, F, Outs,
351 !con((ins _.RC:$src1), NonTiedIns),
352 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
353 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
357 // Instruction with mask that puts result in mask register,
358 // like "compare" and "vptest"
359 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
361 dag Ins, dag MaskingIns,
363 string AttSrcAsm, string IntelSrcAsm,
365 list<dag> MaskingPattern,
366 bit IsCommutable = 0> {
367 let isCommutable = IsCommutable in {
368 def NAME: AVX512<O, F, Outs, Ins,
369 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
370 "$dst, "#IntelSrcAsm#"}",
373 def NAME#k: AVX512<O, F, Outs, MaskingIns,
374 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
375 "$dst {${mask}}, "#IntelSrcAsm#"}",
376 MaskingPattern>, EVEX_K;
380 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
382 dag Ins, dag MaskingIns,
384 string AttSrcAsm, string IntelSrcAsm,
385 dag RHS, dag MaskingRHS,
386 bit IsCommutable = 0> :
387 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
388 AttSrcAsm, IntelSrcAsm,
389 [(set _.KRC:$dst, RHS)],
390 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
392 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
393 dag Outs, dag Ins, string OpcodeStr,
394 string AttSrcAsm, string IntelSrcAsm,
395 dag RHS, dag RHS_su, bit IsCommutable = 0> :
396 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
397 !con((ins _.KRCWM:$mask), Ins),
398 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
399 (and _.KRCWM:$mask, RHS_su), IsCommutable>;
402 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
403 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
404 // swizzled by ExecutionDomainFix to pxor.
405 // We set canFoldAsLoad because this can be converted to a constant-pool
406 // load of an all-zeros value if folding it would be beneficial.
407 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
408 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
409 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
410 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
411 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
412 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
415 // Alias instructions that allow VPTERNLOG to be used with a mask to create
416 // a mix of all ones and all zeros elements. This is done this way to force
417 // the same register to be used as input for all three sources.
418 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
419 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
420 (ins VK16WM:$mask), "",
421 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
422 (v16i32 immAllOnesV),
423 (v16i32 immAllZerosV)))]>;
424 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
425 (ins VK8WM:$mask), "",
426 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
428 (v8i64 immAllZerosV)))]>;
431 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
432 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
433 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
434 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
435 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
436 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
439 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
440 // This is expanded by ExpandPostRAPseudos.
441 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
442 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
443 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
444 [(set FR32X:$dst, fp32imm0)]>;
445 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
446 [(set FR64X:$dst, fpimm0)]>;
449 //===----------------------------------------------------------------------===//
450 // AVX-512 - VECTOR INSERT
453 // Supports two different pattern operators for mask and unmasked ops. Allows
454 // null_frag to be passed for one.
455 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
457 SDPatternOperator vinsert_insert,
458 SDPatternOperator vinsert_for_mask,
459 X86FoldableSchedWrite sched> {
460 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
461 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
462 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
463 "vinsert" # From.EltTypeName # "x" # From.NumElts,
464 "$src3, $src2, $src1", "$src1, $src2, $src3",
465 (vinsert_insert:$src3 (To.VT To.RC:$src1),
466 (From.VT From.RC:$src2),
468 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
469 (From.VT From.RC:$src2),
471 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
473 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
474 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
475 "vinsert" # From.EltTypeName # "x" # From.NumElts,
476 "$src3, $src2, $src1", "$src1, $src2, $src3",
477 (vinsert_insert:$src3 (To.VT To.RC:$src1),
478 (From.VT (From.LdFrag addr:$src2)),
480 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
481 (From.VT (From.LdFrag addr:$src2)),
482 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
483 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
484 Sched<[sched.Folded, sched.ReadAfterFold]>;
488 // Passes the same pattern operator for masked and unmasked ops.
489 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
491 SDPatternOperator vinsert_insert,
492 X86FoldableSchedWrite sched> :
493 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
495 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
496 X86VectorVTInfo To, PatFrag vinsert_insert,
497 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
498 let Predicates = p in {
499 def : Pat<(vinsert_insert:$ins
500 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
501 (To.VT (!cast<Instruction>(InstrStr#"rr")
502 To.RC:$src1, From.RC:$src2,
503 (INSERT_get_vinsert_imm To.RC:$ins)))>;
505 def : Pat<(vinsert_insert:$ins
507 (From.VT (From.LdFrag addr:$src2)),
509 (To.VT (!cast<Instruction>(InstrStr#"rm")
510 To.RC:$src1, addr:$src2,
511 (INSERT_get_vinsert_imm To.RC:$ins)))>;
515 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
516 ValueType EltVT64, int Opcode256,
517 X86FoldableSchedWrite sched> {
519 let Predicates = [HasVLX] in
520 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
521 X86VectorVTInfo< 4, EltVT32, VR128X>,
522 X86VectorVTInfo< 8, EltVT32, VR256X>,
523 vinsert128_insert, sched>, EVEX_V256;
525 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
526 X86VectorVTInfo< 4, EltVT32, VR128X>,
527 X86VectorVTInfo<16, EltVT32, VR512>,
528 vinsert128_insert, sched>, EVEX_V512;
530 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
531 X86VectorVTInfo< 4, EltVT64, VR256X>,
532 X86VectorVTInfo< 8, EltVT64, VR512>,
533 vinsert256_insert, sched>, VEX_W, EVEX_V512;
535 // Even with DQI we'd like to only use these instructions for masking.
536 let Predicates = [HasVLX, HasDQI] in
537 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
538 X86VectorVTInfo< 2, EltVT64, VR128X>,
539 X86VectorVTInfo< 4, EltVT64, VR256X>,
540 null_frag, vinsert128_insert, sched>,
543 // Even with DQI we'd like to only use these instructions for masking.
544 let Predicates = [HasDQI] in {
545 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
546 X86VectorVTInfo< 2, EltVT64, VR128X>,
547 X86VectorVTInfo< 8, EltVT64, VR512>,
548 null_frag, vinsert128_insert, sched>,
551 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
552 X86VectorVTInfo< 8, EltVT32, VR256X>,
553 X86VectorVTInfo<16, EltVT32, VR512>,
554 null_frag, vinsert256_insert, sched>,
559 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
560 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
561 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
563 // Codegen pattern with the alternative types,
564 // Even with AVX512DQ we'll still use these for unmasked operations.
565 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
566 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
567 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
568 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
570 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
571 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
572 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
573 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
575 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
576 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
577 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
578 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
580 // Codegen pattern with the alternative types insert VEC128 into VEC256
581 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
582 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
583 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
584 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
585 // Codegen pattern with the alternative types insert VEC128 into VEC512
586 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
587 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
588 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
589 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
590 // Codegen pattern with the alternative types insert VEC256 into VEC512
591 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
592 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
593 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
594 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
597 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
598 X86VectorVTInfo To, X86VectorVTInfo Cast,
599 PatFrag vinsert_insert,
600 SDNodeXForm INSERT_get_vinsert_imm,
602 let Predicates = p in {
604 (vselect Cast.KRCWM:$mask,
606 (vinsert_insert:$ins (To.VT To.RC:$src1),
607 (From.VT From.RC:$src2),
610 (!cast<Instruction>(InstrStr#"rrk")
611 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
612 (INSERT_get_vinsert_imm To.RC:$ins))>;
614 (vselect Cast.KRCWM:$mask,
616 (vinsert_insert:$ins (To.VT To.RC:$src1),
619 (From.LdFrag addr:$src2))),
622 (!cast<Instruction>(InstrStr#"rmk")
623 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
624 (INSERT_get_vinsert_imm To.RC:$ins))>;
627 (vselect Cast.KRCWM:$mask,
629 (vinsert_insert:$ins (To.VT To.RC:$src1),
630 (From.VT From.RC:$src2),
633 (!cast<Instruction>(InstrStr#"rrkz")
634 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
635 (INSERT_get_vinsert_imm To.RC:$ins))>;
637 (vselect Cast.KRCWM:$mask,
639 (vinsert_insert:$ins (To.VT To.RC:$src1),
640 (From.VT (From.LdFrag addr:$src2)),
643 (!cast<Instruction>(InstrStr#"rmkz")
644 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
645 (INSERT_get_vinsert_imm To.RC:$ins))>;
649 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
650 v8f32x_info, vinsert128_insert,
651 INSERT_get_vinsert128_imm, [HasVLX]>;
652 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
653 v4f64x_info, vinsert128_insert,
654 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
656 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
657 v8i32x_info, vinsert128_insert,
658 INSERT_get_vinsert128_imm, [HasVLX]>;
659 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
660 v8i32x_info, vinsert128_insert,
661 INSERT_get_vinsert128_imm, [HasVLX]>;
662 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
663 v8i32x_info, vinsert128_insert,
664 INSERT_get_vinsert128_imm, [HasVLX]>;
665 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
666 v4i64x_info, vinsert128_insert,
667 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
668 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
669 v4i64x_info, vinsert128_insert,
670 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
671 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
672 v4i64x_info, vinsert128_insert,
673 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
675 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
676 v16f32_info, vinsert128_insert,
677 INSERT_get_vinsert128_imm, [HasAVX512]>;
678 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
679 v8f64_info, vinsert128_insert,
680 INSERT_get_vinsert128_imm, [HasDQI]>;
682 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
683 v16i32_info, vinsert128_insert,
684 INSERT_get_vinsert128_imm, [HasAVX512]>;
685 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
686 v16i32_info, vinsert128_insert,
687 INSERT_get_vinsert128_imm, [HasAVX512]>;
688 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
689 v16i32_info, vinsert128_insert,
690 INSERT_get_vinsert128_imm, [HasAVX512]>;
691 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
692 v8i64_info, vinsert128_insert,
693 INSERT_get_vinsert128_imm, [HasDQI]>;
694 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
695 v8i64_info, vinsert128_insert,
696 INSERT_get_vinsert128_imm, [HasDQI]>;
697 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
698 v8i64_info, vinsert128_insert,
699 INSERT_get_vinsert128_imm, [HasDQI]>;
701 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
702 v16f32_info, vinsert256_insert,
703 INSERT_get_vinsert256_imm, [HasDQI]>;
704 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
705 v8f64_info, vinsert256_insert,
706 INSERT_get_vinsert256_imm, [HasAVX512]>;
708 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
709 v16i32_info, vinsert256_insert,
710 INSERT_get_vinsert256_imm, [HasDQI]>;
711 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
712 v16i32_info, vinsert256_insert,
713 INSERT_get_vinsert256_imm, [HasDQI]>;
714 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
715 v16i32_info, vinsert256_insert,
716 INSERT_get_vinsert256_imm, [HasDQI]>;
717 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
718 v8i64_info, vinsert256_insert,
719 INSERT_get_vinsert256_imm, [HasAVX512]>;
720 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
721 v8i64_info, vinsert256_insert,
722 INSERT_get_vinsert256_imm, [HasAVX512]>;
723 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
724 v8i64_info, vinsert256_insert,
725 INSERT_get_vinsert256_imm, [HasAVX512]>;
727 // vinsertps - insert f32 to XMM
728 let ExeDomain = SSEPackedSingle in {
729 let isCommutable = 1 in
730 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
731 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
732 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
733 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
734 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
735 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
736 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
737 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
738 [(set VR128X:$dst, (X86insertps VR128X:$src1,
739 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
741 EVEX_4V, EVEX_CD8<32, CD8VT1>,
742 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
745 //===----------------------------------------------------------------------===//
746 // AVX-512 VECTOR EXTRACT
749 // Supports two different pattern operators for mask and unmasked ops. Allows
750 // null_frag to be passed for one.
751 multiclass vextract_for_size_split<int Opcode,
752 X86VectorVTInfo From, X86VectorVTInfo To,
753 SDPatternOperator vextract_extract,
754 SDPatternOperator vextract_for_mask,
755 SchedWrite SchedRR, SchedWrite SchedMR> {
757 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
758 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
759 (ins From.RC:$src1, u8imm:$idx),
760 "vextract" # To.EltTypeName # "x" # To.NumElts,
761 "$idx, $src1", "$src1, $idx",
762 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
763 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
764 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
766 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
767 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
768 "vextract" # To.EltTypeName # "x" # To.NumElts #
769 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
770 [(store (To.VT (vextract_extract:$idx
771 (From.VT From.RC:$src1), (iPTR imm))),
775 let mayStore = 1, hasSideEffects = 0 in
776 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
777 (ins To.MemOp:$dst, To.KRCWM:$mask,
778 From.RC:$src1, u8imm:$idx),
779 "vextract" # To.EltTypeName # "x" # To.NumElts #
780 "\t{$idx, $src1, $dst {${mask}}|"
781 "$dst {${mask}}, $src1, $idx}", []>,
782 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
786 // Passes the same pattern operator for masked and unmasked ops.
787 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
789 SDPatternOperator vextract_extract,
790 SchedWrite SchedRR, SchedWrite SchedMR> :
791 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
793 // Codegen pattern for the alternative types
794 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
795 X86VectorVTInfo To, PatFrag vextract_extract,
796 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
797 let Predicates = p in {
798 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
799 (To.VT (!cast<Instruction>(InstrStr#"rr")
801 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
802 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
803 (iPTR imm))), addr:$dst),
804 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
805 (EXTRACT_get_vextract_imm To.RC:$ext))>;
809 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
810 ValueType EltVT64, int Opcode256,
811 SchedWrite SchedRR, SchedWrite SchedMR> {
812 let Predicates = [HasAVX512] in {
813 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
814 X86VectorVTInfo<16, EltVT32, VR512>,
815 X86VectorVTInfo< 4, EltVT32, VR128X>,
816 vextract128_extract, SchedRR, SchedMR>,
817 EVEX_V512, EVEX_CD8<32, CD8VT4>;
818 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
819 X86VectorVTInfo< 8, EltVT64, VR512>,
820 X86VectorVTInfo< 4, EltVT64, VR256X>,
821 vextract256_extract, SchedRR, SchedMR>,
822 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
824 let Predicates = [HasVLX] in
825 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
826 X86VectorVTInfo< 8, EltVT32, VR256X>,
827 X86VectorVTInfo< 4, EltVT32, VR128X>,
828 vextract128_extract, SchedRR, SchedMR>,
829 EVEX_V256, EVEX_CD8<32, CD8VT4>;
831 // Even with DQI we'd like to only use these instructions for masking.
832 let Predicates = [HasVLX, HasDQI] in
833 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
834 X86VectorVTInfo< 4, EltVT64, VR256X>,
835 X86VectorVTInfo< 2, EltVT64, VR128X>,
836 null_frag, vextract128_extract, SchedRR, SchedMR>,
837 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
839 // Even with DQI we'd like to only use these instructions for masking.
840 let Predicates = [HasDQI] in {
841 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
842 X86VectorVTInfo< 8, EltVT64, VR512>,
843 X86VectorVTInfo< 2, EltVT64, VR128X>,
844 null_frag, vextract128_extract, SchedRR, SchedMR>,
845 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
846 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
847 X86VectorVTInfo<16, EltVT32, VR512>,
848 X86VectorVTInfo< 8, EltVT32, VR256X>,
849 null_frag, vextract256_extract, SchedRR, SchedMR>,
850 EVEX_V512, EVEX_CD8<32, CD8VT8>;
854 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
855 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
856 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
858 // extract_subvector codegen patterns with the alternative types.
859 // Even with AVX512DQ we'll still use these for unmasked operations.
860 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
861 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
862 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
863 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
865 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
866 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
867 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
868 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
870 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
871 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
872 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
873 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
875 // Codegen pattern with the alternative types extract VEC128 from VEC256
876 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
877 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
878 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
879 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
881 // Codegen pattern with the alternative types extract VEC128 from VEC512
882 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
883 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
884 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
885 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
886 // Codegen pattern with the alternative types extract VEC256 from VEC512
887 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
888 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
889 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
890 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
893 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
894 // smaller extract to enable EVEX->VEX.
895 let Predicates = [NoVLX] in {
896 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
897 (v2i64 (VEXTRACTI128rr
898 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
900 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
901 (v2f64 (VEXTRACTF128rr
902 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
904 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
905 (v4i32 (VEXTRACTI128rr
906 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
908 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
909 (v4f32 (VEXTRACTF128rr
910 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
912 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
913 (v8i16 (VEXTRACTI128rr
914 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
916 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
917 (v16i8 (VEXTRACTI128rr
918 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
922 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
923 // smaller extract to enable EVEX->VEX.
924 let Predicates = [HasVLX] in {
925 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
926 (v2i64 (VEXTRACTI32x4Z256rr
927 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
929 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
930 (v2f64 (VEXTRACTF32x4Z256rr
931 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
933 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
934 (v4i32 (VEXTRACTI32x4Z256rr
935 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
937 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
938 (v4f32 (VEXTRACTF32x4Z256rr
939 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
941 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
942 (v8i16 (VEXTRACTI32x4Z256rr
943 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
945 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
946 (v16i8 (VEXTRACTI32x4Z256rr
947 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
952 // Additional patterns for handling a bitcast between the vselect and the
953 // extract_subvector.
954 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
955 X86VectorVTInfo To, X86VectorVTInfo Cast,
956 PatFrag vextract_extract,
957 SDNodeXForm EXTRACT_get_vextract_imm,
959 let Predicates = p in {
960 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
962 (To.VT (vextract_extract:$ext
963 (From.VT From.RC:$src), (iPTR imm)))),
965 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
966 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
967 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
969 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
971 (To.VT (vextract_extract:$ext
972 (From.VT From.RC:$src), (iPTR imm)))),
974 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
975 Cast.KRCWM:$mask, From.RC:$src,
976 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
980 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
981 v4f32x_info, vextract128_extract,
982 EXTRACT_get_vextract128_imm, [HasVLX]>;
983 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
984 v2f64x_info, vextract128_extract,
985 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
987 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
988 v4i32x_info, vextract128_extract,
989 EXTRACT_get_vextract128_imm, [HasVLX]>;
990 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
991 v4i32x_info, vextract128_extract,
992 EXTRACT_get_vextract128_imm, [HasVLX]>;
993 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
994 v4i32x_info, vextract128_extract,
995 EXTRACT_get_vextract128_imm, [HasVLX]>;
996 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
997 v2i64x_info, vextract128_extract,
998 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
999 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1000 v2i64x_info, vextract128_extract,
1001 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1002 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1003 v2i64x_info, vextract128_extract,
1004 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1006 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1007 v4f32x_info, vextract128_extract,
1008 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1009 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1010 v2f64x_info, vextract128_extract,
1011 EXTRACT_get_vextract128_imm, [HasDQI]>;
1013 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1014 v4i32x_info, vextract128_extract,
1015 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1016 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1017 v4i32x_info, vextract128_extract,
1018 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1019 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1020 v4i32x_info, vextract128_extract,
1021 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1022 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1023 v2i64x_info, vextract128_extract,
1024 EXTRACT_get_vextract128_imm, [HasDQI]>;
1025 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1026 v2i64x_info, vextract128_extract,
1027 EXTRACT_get_vextract128_imm, [HasDQI]>;
1028 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1029 v2i64x_info, vextract128_extract,
1030 EXTRACT_get_vextract128_imm, [HasDQI]>;
1032 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1033 v8f32x_info, vextract256_extract,
1034 EXTRACT_get_vextract256_imm, [HasDQI]>;
1035 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1036 v4f64x_info, vextract256_extract,
1037 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1039 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1040 v8i32x_info, vextract256_extract,
1041 EXTRACT_get_vextract256_imm, [HasDQI]>;
1042 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1043 v8i32x_info, vextract256_extract,
1044 EXTRACT_get_vextract256_imm, [HasDQI]>;
1045 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1046 v8i32x_info, vextract256_extract,
1047 EXTRACT_get_vextract256_imm, [HasDQI]>;
1048 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1049 v4i64x_info, vextract256_extract,
1050 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1051 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1052 v4i64x_info, vextract256_extract,
1053 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1054 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1055 v4i64x_info, vextract256_extract,
1056 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1058 // vextractps - extract 32 bits from XMM
1059 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1060 (ins VR128X:$src1, u8imm:$src2),
1061 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1062 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1063 EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1065 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1066 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1067 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1068 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1070 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1072 //===---------------------------------------------------------------------===//
1073 // AVX-512 BROADCAST
1075 // broadcast with a scalar argument.
1076 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1078 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1079 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1080 (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1081 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1082 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1083 (X86VBroadcast SrcInfo.FRC:$src),
1084 DestInfo.RC:$src0)),
1085 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1086 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1087 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1088 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1089 (X86VBroadcast SrcInfo.FRC:$src),
1090 DestInfo.ImmAllZerosV)),
1091 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1092 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1095 // Split version to allow mask and broadcast node to be different types. This
1096 // helps support the 32x2 broadcasts.
1097 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1099 SchedWrite SchedRR, SchedWrite SchedRM,
1100 X86VectorVTInfo MaskInfo,
1101 X86VectorVTInfo DestInfo,
1102 X86VectorVTInfo SrcInfo,
1103 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1104 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1105 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1106 (outs MaskInfo.RC:$dst),
1107 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1111 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1115 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1116 T8PD, EVEX, Sched<[SchedRR]>;
1118 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1119 (outs MaskInfo.RC:$dst),
1120 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1123 (DestInfo.VT (UnmaskedOp
1124 (SrcInfo.ScalarLdFrag addr:$src))))),
1127 (DestInfo.VT (X86VBroadcast
1128 (SrcInfo.ScalarLdFrag addr:$src)))))>,
1129 T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1133 def : Pat<(MaskInfo.VT
1135 (DestInfo.VT (UnmaskedOp
1136 (SrcInfo.VT (scalar_to_vector
1137 (SrcInfo.ScalarLdFrag addr:$src))))))),
1138 (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
1139 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1143 (SrcInfo.VT (scalar_to_vector
1144 (SrcInfo.ScalarLdFrag addr:$src)))))),
1145 MaskInfo.RC:$src0)),
1146 (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
1147 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1148 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1152 (SrcInfo.VT (scalar_to_vector
1153 (SrcInfo.ScalarLdFrag addr:$src)))))),
1154 MaskInfo.ImmAllZerosV)),
1155 (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
1156 MaskInfo.KRCWM:$mask, addr:$src)>;
1159 // Helper class to force mask and broadcast result to same type.
1160 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1161 SchedWrite SchedRR, SchedWrite SchedRM,
1162 X86VectorVTInfo DestInfo,
1163 X86VectorVTInfo SrcInfo> :
1164 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1165 DestInfo, DestInfo, SrcInfo>;
1167 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1168 AVX512VLVectorVTInfo _> {
1169 let Predicates = [HasAVX512] in {
1170 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1171 WriteFShuffle256Ld, _.info512, _.info128>,
1172 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1177 let Predicates = [HasVLX] in {
1178 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1179 WriteFShuffle256Ld, _.info256, _.info128>,
1180 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1186 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1187 AVX512VLVectorVTInfo _> {
1188 let Predicates = [HasAVX512] in {
1189 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1190 WriteFShuffle256Ld, _.info512, _.info128>,
1191 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1196 let Predicates = [HasVLX] in {
1197 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1198 WriteFShuffle256Ld, _.info256, _.info128>,
1199 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1202 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1203 WriteFShuffle256Ld, _.info128, _.info128>,
1204 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1209 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1211 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1212 avx512vl_f64_info>, VEX_W1X;
1214 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1215 X86VectorVTInfo _, SDPatternOperator OpNode,
1216 RegisterClass SrcRC> {
1217 let ExeDomain = _.ExeDomain in
1218 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1220 "vpbroadcast"##_.Suffix, "$src", "$src",
1221 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1225 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1226 X86VectorVTInfo _, SDPatternOperator OpNode,
1227 RegisterClass SrcRC, SubRegIndex Subreg> {
1228 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1229 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1230 (outs _.RC:$dst), (ins GR32:$src),
1231 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1232 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1233 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1234 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1236 def : Pat <(_.VT (OpNode SrcRC:$src)),
1237 (!cast<Instruction>(Name#r)
1238 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1240 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1241 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1242 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1244 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1245 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1246 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1249 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1250 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1251 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1252 let Predicates = [prd] in
1253 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1254 OpNode, SrcRC, Subreg>, EVEX_V512;
1255 let Predicates = [prd, HasVLX] in {
1256 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1257 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1258 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1259 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1263 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1264 SDPatternOperator OpNode,
1265 RegisterClass SrcRC, Predicate prd> {
1266 let Predicates = [prd] in
1267 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1269 let Predicates = [prd, HasVLX] in {
1270 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1272 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1277 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1278 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1279 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1280 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1282 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1283 X86VBroadcast, GR32, HasAVX512>;
1284 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1285 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1287 // Provide aliases for broadcast from the same register class that
1288 // automatically does the extract.
1289 multiclass avx512_int_broadcast_rm_lowering<string Name,
1290 X86VectorVTInfo DestInfo,
1291 X86VectorVTInfo SrcInfo,
1292 X86VectorVTInfo ExtInfo> {
1293 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1294 (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
1295 (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>;
1298 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1299 AVX512VLVectorVTInfo _, Predicate prd> {
1300 let Predicates = [prd] in {
1301 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1302 WriteShuffle256Ld, _.info512, _.info128>,
1303 avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>,
1305 // Defined separately to avoid redefinition.
1306 defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>;
1308 let Predicates = [prd, HasVLX] in {
1309 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1310 WriteShuffle256Ld, _.info256, _.info128>,
1311 avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>,
1313 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1314 WriteShuffleXLd, _.info128, _.info128>,
1319 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1320 avx512vl_i8_info, HasBWI>;
1321 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1322 avx512vl_i16_info, HasBWI>;
1323 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1324 avx512vl_i32_info, HasAVX512>;
1325 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1326 avx512vl_i64_info, HasAVX512>, VEX_W1X;
1328 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1329 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1330 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1331 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1332 (_Dst.VT (X86SubVBroadcast
1333 (_Src.VT (_Src.LdFrag addr:$src))))>,
1334 Sched<[SchedWriteShuffle.YMM.Folded]>,
1338 // This should be used for the AVX512DQ broadcast instructions. It disables
1339 // the unmasked patterns so that we only use the DQ instructions when masking
1341 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1342 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1343 let hasSideEffects = 0, mayLoad = 1 in
1344 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1345 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1347 (_Dst.VT (X86SubVBroadcast
1348 (_Src.VT (_Src.LdFrag addr:$src))))>,
1349 Sched<[SchedWriteShuffle.YMM.Folded]>,
1353 let Predicates = [HasAVX512] in {
1354 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1355 def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1356 (VPBROADCASTQZm addr:$src)>;
1359 let Predicates = [HasVLX] in {
1360 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1361 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1362 (VPBROADCASTQZ128m addr:$src)>;
1363 def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1364 (VPBROADCASTQZ256m addr:$src)>;
1366 let Predicates = [HasVLX, HasBWI] in {
1367 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1368 // This means we'll encounter truncated i32 loads; match that here.
1369 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1370 (VPBROADCASTWZ128m addr:$src)>;
1371 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1372 (VPBROADCASTWZ256m addr:$src)>;
1373 def : Pat<(v8i16 (X86VBroadcast
1374 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1375 (VPBROADCASTWZ128m addr:$src)>;
1376 def : Pat<(v8i16 (X86VBroadcast
1377 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1378 (VPBROADCASTWZ128m addr:$src)>;
1379 def : Pat<(v16i16 (X86VBroadcast
1380 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1381 (VPBROADCASTWZ256m addr:$src)>;
1382 def : Pat<(v16i16 (X86VBroadcast
1383 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1384 (VPBROADCASTWZ256m addr:$src)>;
1386 let Predicates = [HasBWI] in {
1387 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1388 // This means we'll encounter truncated i32 loads; match that here.
1389 def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1390 (VPBROADCASTWZm addr:$src)>;
1391 def : Pat<(v32i16 (X86VBroadcast
1392 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1393 (VPBROADCASTWZm addr:$src)>;
1394 def : Pat<(v32i16 (X86VBroadcast
1395 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1396 (VPBROADCASTWZm addr:$src)>;
1399 //===----------------------------------------------------------------------===//
1400 // AVX-512 BROADCAST SUBVECTORS
1403 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1404 v16i32_info, v4i32x_info>,
1405 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1406 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1407 v16f32_info, v4f32x_info>,
1408 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1409 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1410 v8i64_info, v4i64x_info>, VEX_W,
1411 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1412 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1413 v8f64_info, v4f64x_info>, VEX_W,
1414 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1416 let Predicates = [HasAVX512] in {
1417 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1418 (VBROADCASTF64X4rm addr:$src)>;
1419 def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1420 (VBROADCASTI64X4rm addr:$src)>;
1421 def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1422 (VBROADCASTI64X4rm addr:$src)>;
1423 def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1424 (VBROADCASTI64X4rm addr:$src)>;
1426 // Provide fallback in case the load node that is used in the patterns above
1427 // is used by additional users, which prevents the pattern selection.
1428 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1429 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1430 (v4f64 VR256X:$src), 1)>;
1431 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1432 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1433 (v8f32 VR256X:$src), 1)>;
1434 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1435 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1436 (v4i64 VR256X:$src), 1)>;
1437 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1438 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1439 (v8i32 VR256X:$src), 1)>;
1440 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1441 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1442 (v16i16 VR256X:$src), 1)>;
1443 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1444 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1445 (v32i8 VR256X:$src), 1)>;
1447 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1448 (VBROADCASTF32X4rm addr:$src)>;
1449 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1450 (VBROADCASTI32X4rm addr:$src)>;
1451 def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1452 (VBROADCASTI32X4rm addr:$src)>;
1453 def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1454 (VBROADCASTI32X4rm addr:$src)>;
1456 // Patterns for selects of bitcasted operations.
1457 def : Pat<(vselect VK16WM:$mask,
1458 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1459 (v16f32 immAllZerosV)),
1460 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1461 def : Pat<(vselect VK16WM:$mask,
1462 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1464 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1465 def : Pat<(vselect VK16WM:$mask,
1466 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1467 (v16i32 immAllZerosV)),
1468 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1469 def : Pat<(vselect VK16WM:$mask,
1470 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1472 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1474 def : Pat<(vselect VK8WM:$mask,
1475 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1476 (v8f64 immAllZerosV)),
1477 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1478 def : Pat<(vselect VK8WM:$mask,
1479 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1481 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1482 def : Pat<(vselect VK8WM:$mask,
1483 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1484 (v8i64 immAllZerosV)),
1485 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1486 def : Pat<(vselect VK8WM:$mask,
1487 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1489 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1492 let Predicates = [HasVLX] in {
1493 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1494 v8i32x_info, v4i32x_info>,
1495 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1496 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1497 v8f32x_info, v4f32x_info>,
1498 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1500 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1501 (VBROADCASTF32X4Z256rm addr:$src)>;
1502 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1503 (VBROADCASTI32X4Z256rm addr:$src)>;
1504 def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1505 (VBROADCASTI32X4Z256rm addr:$src)>;
1506 def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1507 (VBROADCASTI32X4Z256rm addr:$src)>;
1509 // Patterns for selects of bitcasted operations.
1510 def : Pat<(vselect VK8WM:$mask,
1511 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1512 (v8f32 immAllZerosV)),
1513 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1514 def : Pat<(vselect VK8WM:$mask,
1515 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1517 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1518 def : Pat<(vselect VK8WM:$mask,
1519 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1520 (v8i32 immAllZerosV)),
1521 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1522 def : Pat<(vselect VK8WM:$mask,
1523 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1525 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1528 // Provide fallback in case the load node that is used in the patterns above
1529 // is used by additional users, which prevents the pattern selection.
1530 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1531 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1532 (v2f64 VR128X:$src), 1)>;
1533 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1534 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1535 (v4f32 VR128X:$src), 1)>;
1536 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1537 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1538 (v2i64 VR128X:$src), 1)>;
1539 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1540 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1541 (v4i32 VR128X:$src), 1)>;
1542 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1543 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1544 (v8i16 VR128X:$src), 1)>;
1545 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1546 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1547 (v16i8 VR128X:$src), 1)>;
1550 let Predicates = [HasVLX, HasDQI] in {
1551 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1552 v4i64x_info, v2i64x_info>, VEX_W1X,
1553 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1554 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1555 v4f64x_info, v2f64x_info>, VEX_W1X,
1556 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1558 // Patterns for selects of bitcasted operations.
1559 def : Pat<(vselect VK4WM:$mask,
1560 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1561 (v4f64 immAllZerosV)),
1562 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1563 def : Pat<(vselect VK4WM:$mask,
1564 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1566 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1567 def : Pat<(vselect VK4WM:$mask,
1568 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1569 (v4i64 immAllZerosV)),
1570 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1571 def : Pat<(vselect VK4WM:$mask,
1572 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1574 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1577 let Predicates = [HasDQI] in {
1578 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1579 v8i64_info, v2i64x_info>, VEX_W,
1580 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1581 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1582 v16i32_info, v8i32x_info>,
1583 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1584 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1585 v8f64_info, v2f64x_info>, VEX_W,
1586 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1587 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1588 v16f32_info, v8f32x_info>,
1589 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1591 // Patterns for selects of bitcasted operations.
1592 def : Pat<(vselect VK16WM:$mask,
1593 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1594 (v16f32 immAllZerosV)),
1595 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1596 def : Pat<(vselect VK16WM:$mask,
1597 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1599 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1600 def : Pat<(vselect VK16WM:$mask,
1601 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1602 (v16i32 immAllZerosV)),
1603 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1604 def : Pat<(vselect VK16WM:$mask,
1605 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1607 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1609 def : Pat<(vselect VK8WM:$mask,
1610 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1611 (v8f64 immAllZerosV)),
1612 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1613 def : Pat<(vselect VK8WM:$mask,
1614 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1616 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1617 def : Pat<(vselect VK8WM:$mask,
1618 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1619 (v8i64 immAllZerosV)),
1620 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1621 def : Pat<(vselect VK8WM:$mask,
1622 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1624 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1627 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1628 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1629 let Predicates = [HasDQI] in
1630 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1631 WriteShuffle256Ld, _Dst.info512,
1632 _Src.info512, _Src.info128, null_frag>,
1634 let Predicates = [HasDQI, HasVLX] in
1635 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1636 WriteShuffle256Ld, _Dst.info256,
1637 _Src.info256, _Src.info128, null_frag>,
1641 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1642 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1643 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1645 let Predicates = [HasDQI, HasVLX] in
1646 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1647 WriteShuffleXLd, _Dst.info128,
1648 _Src.info128, _Src.info128, null_frag>,
1652 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1653 avx512vl_i32_info, avx512vl_i64_info>;
1654 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1655 avx512vl_f32_info, avx512vl_f64_info>;
1657 let Predicates = [HasVLX] in {
1658 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1659 (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1660 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1661 (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1664 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1665 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>;
1666 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1667 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1669 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1670 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>;
1671 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1672 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1674 //===----------------------------------------------------------------------===//
1675 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1677 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1678 X86VectorVTInfo _, RegisterClass KRC> {
1679 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1680 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1681 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1682 EVEX, Sched<[WriteShuffle]>;
1685 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1686 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1687 let Predicates = [HasCDI] in
1688 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1689 let Predicates = [HasCDI, HasVLX] in {
1690 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1691 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1695 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1696 avx512vl_i32_info, VK16>;
1697 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1698 avx512vl_i64_info, VK8>, VEX_W;
1700 //===----------------------------------------------------------------------===//
1701 // -- VPERMI2 - 3 source operands form --
1702 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1703 X86FoldableSchedWrite sched,
1704 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1705 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1706 hasSideEffects = 0 in {
1707 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1708 (ins _.RC:$src2, _.RC:$src3),
1709 OpcodeStr, "$src3, $src2", "$src2, $src3",
1710 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1711 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1714 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1715 (ins _.RC:$src2, _.MemOp:$src3),
1716 OpcodeStr, "$src3, $src2", "$src2, $src3",
1717 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1718 (_.VT (_.LdFrag addr:$src3)))), 1>,
1719 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1723 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1724 X86FoldableSchedWrite sched,
1725 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1726 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1727 hasSideEffects = 0, mayLoad = 1 in
1728 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1729 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1730 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1731 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1732 (_.VT (X86VPermt2 _.RC:$src2,
1733 IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1734 AVX5128IBase, EVEX_4V, EVEX_B,
1735 Sched<[sched.Folded, sched.ReadAfterFold]>;
1738 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1739 X86FoldableSchedWrite sched,
1740 AVX512VLVectorVTInfo VTInfo,
1741 AVX512VLVectorVTInfo ShuffleMask> {
1742 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1743 ShuffleMask.info512>,
1744 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1745 ShuffleMask.info512>, EVEX_V512;
1746 let Predicates = [HasVLX] in {
1747 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1748 ShuffleMask.info128>,
1749 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1750 ShuffleMask.info128>, EVEX_V128;
1751 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1752 ShuffleMask.info256>,
1753 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1754 ShuffleMask.info256>, EVEX_V256;
1758 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1759 X86FoldableSchedWrite sched,
1760 AVX512VLVectorVTInfo VTInfo,
1761 AVX512VLVectorVTInfo Idx,
1763 let Predicates = [Prd] in
1764 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1765 Idx.info512>, EVEX_V512;
1766 let Predicates = [Prd, HasVLX] in {
1767 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1768 Idx.info128>, EVEX_V128;
1769 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1770 Idx.info256>, EVEX_V256;
1774 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1775 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1776 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1777 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1778 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1779 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1780 VEX_W, EVEX_CD8<16, CD8VF>;
1781 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1782 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1784 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1785 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1786 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1787 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1789 // Extra patterns to deal with extra bitcasts due to passthru and index being
1790 // different types on the fp versions.
1791 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1792 X86VectorVTInfo IdxVT,
1793 X86VectorVTInfo CastVT> {
1794 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1795 (X86VPermt2 (_.VT _.RC:$src2),
1796 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1797 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1798 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1799 _.RC:$src2, _.RC:$src3)>;
1800 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1801 (X86VPermt2 _.RC:$src2,
1802 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1803 (_.LdFrag addr:$src3)),
1804 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1805 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1806 _.RC:$src2, addr:$src3)>;
1807 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1808 (X86VPermt2 _.RC:$src2,
1809 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1810 (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1811 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1812 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1813 _.RC:$src2, addr:$src3)>;
1816 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1817 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1818 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1819 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1822 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1823 X86FoldableSchedWrite sched,
1824 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1825 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1826 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1827 (ins IdxVT.RC:$src2, _.RC:$src3),
1828 OpcodeStr, "$src3, $src2", "$src2, $src3",
1829 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1830 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1832 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1833 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1834 OpcodeStr, "$src3, $src2", "$src2, $src3",
1835 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1836 (_.LdFrag addr:$src3))), 1>,
1837 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1840 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1841 X86FoldableSchedWrite sched,
1842 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1843 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1844 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1845 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1846 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1847 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1848 (_.VT (X86VPermt2 _.RC:$src1,
1849 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1850 AVX5128IBase, EVEX_4V, EVEX_B,
1851 Sched<[sched.Folded, sched.ReadAfterFold]>;
1854 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1855 X86FoldableSchedWrite sched,
1856 AVX512VLVectorVTInfo VTInfo,
1857 AVX512VLVectorVTInfo ShuffleMask> {
1858 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1859 ShuffleMask.info512>,
1860 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1861 ShuffleMask.info512>, EVEX_V512;
1862 let Predicates = [HasVLX] in {
1863 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1864 ShuffleMask.info128>,
1865 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1866 ShuffleMask.info128>, EVEX_V128;
1867 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1868 ShuffleMask.info256>,
1869 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1870 ShuffleMask.info256>, EVEX_V256;
1874 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1875 X86FoldableSchedWrite sched,
1876 AVX512VLVectorVTInfo VTInfo,
1877 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1878 let Predicates = [Prd] in
1879 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1880 Idx.info512>, EVEX_V512;
1881 let Predicates = [Prd, HasVLX] in {
1882 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1883 Idx.info128>, EVEX_V128;
1884 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1885 Idx.info256>, EVEX_V256;
1889 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1890 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1891 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1892 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1893 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1894 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1895 VEX_W, EVEX_CD8<16, CD8VF>;
1896 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1897 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1899 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1900 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1901 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1902 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1904 //===----------------------------------------------------------------------===//
1905 // AVX-512 - BLEND using mask
1908 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1909 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1910 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1911 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1912 (ins _.RC:$src1, _.RC:$src2),
1913 !strconcat(OpcodeStr,
1914 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1915 EVEX_4V, Sched<[sched]>;
1916 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1917 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1918 !strconcat(OpcodeStr,
1919 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1920 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1921 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1922 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1923 !strconcat(OpcodeStr,
1924 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1925 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1926 let mayLoad = 1 in {
1927 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1928 (ins _.RC:$src1, _.MemOp:$src2),
1929 !strconcat(OpcodeStr,
1930 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1931 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1932 Sched<[sched.Folded, sched.ReadAfterFold]>;
1933 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1934 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1935 !strconcat(OpcodeStr,
1936 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1937 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1938 Sched<[sched.Folded, sched.ReadAfterFold]>;
1939 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1940 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1941 !strconcat(OpcodeStr,
1942 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1943 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1944 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1948 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1949 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1950 let mayLoad = 1, hasSideEffects = 0 in {
1951 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1952 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1953 !strconcat(OpcodeStr,
1954 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1955 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1956 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1957 Sched<[sched.Folded, sched.ReadAfterFold]>;
1959 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1960 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1961 !strconcat(OpcodeStr,
1962 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1963 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1964 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1965 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1967 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1968 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1969 !strconcat(OpcodeStr,
1970 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1971 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1972 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1973 Sched<[sched.Folded, sched.ReadAfterFold]>;
1977 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1978 AVX512VLVectorVTInfo VTInfo> {
1979 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1980 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1983 let Predicates = [HasVLX] in {
1984 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1985 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1987 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1988 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1993 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1994 AVX512VLVectorVTInfo VTInfo> {
1995 let Predicates = [HasBWI] in
1996 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1999 let Predicates = [HasBWI, HasVLX] in {
2000 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2002 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2007 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2009 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2010 avx512vl_f64_info>, VEX_W;
2011 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2013 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2014 avx512vl_i64_info>, VEX_W;
2015 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2017 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2018 avx512vl_i16_info>, VEX_W;
2020 //===----------------------------------------------------------------------===//
2021 // Compare Instructions
2022 //===----------------------------------------------------------------------===//
2024 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2026 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2027 PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2028 X86FoldableSchedWrite sched> {
2029 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2031 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2033 "$cc, $src2, $src1", "$src1, $src2, $cc",
2034 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2035 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2036 imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
2038 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2040 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2042 "$cc, $src2, $src1", "$src1, $src2, $cc",
2043 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2045 (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2046 imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2047 Sched<[sched.Folded, sched.ReadAfterFold]>;
2049 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2051 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2053 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2054 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2056 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2058 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2060 let isCodeGenOnly = 1 in {
2061 let isCommutable = 1 in
2062 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2063 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2064 !strconcat("vcmp", _.Suffix,
2065 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2066 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2069 EVEX_4V, VEX_LIG, Sched<[sched]>;
2070 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2072 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2073 !strconcat("vcmp", _.Suffix,
2074 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2075 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2076 (_.ScalarLdFrag addr:$src2),
2078 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2079 Sched<[sched.Folded, sched.ReadAfterFold]>;
2083 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2084 (X86cmpms node:$src1, node:$src2, node:$cc), [{
2085 return N->hasOneUse();
2087 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2088 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2089 return N->hasOneUse();
2092 let Predicates = [HasAVX512] in {
2093 let ExeDomain = SSEPackedSingle in
2094 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2095 X86cmpms_su, X86cmpmsSAE_su,
2096 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2097 let ExeDomain = SSEPackedDouble in
2098 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2099 X86cmpms_su, X86cmpmsSAE_su,
2100 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2103 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2104 PatFrag OpNode_su, X86FoldableSchedWrite sched,
2105 X86VectorVTInfo _, bit IsCommutable> {
2106 let isCommutable = IsCommutable in
2107 def rr : AVX512BI<opc, MRMSrcReg,
2108 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2109 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2110 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2111 EVEX_4V, Sched<[sched]>;
2112 def rm : AVX512BI<opc, MRMSrcMem,
2113 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2114 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2115 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2116 (_.VT (_.LdFrag addr:$src2))))]>,
2117 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2118 let isCommutable = IsCommutable in
2119 def rrk : AVX512BI<opc, MRMSrcReg,
2120 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2121 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2122 "$dst {${mask}}, $src1, $src2}"),
2123 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2124 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2125 EVEX_4V, EVEX_K, Sched<[sched]>;
2126 def rmk : AVX512BI<opc, MRMSrcMem,
2127 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2128 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2129 "$dst {${mask}}, $src1, $src2}"),
2130 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2131 (OpNode_su (_.VT _.RC:$src1),
2132 (_.VT (_.LdFrag addr:$src2)))))]>,
2133 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2136 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2138 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2140 avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> {
2141 def rmb : AVX512BI<opc, MRMSrcMem,
2142 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2143 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2144 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2145 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2146 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2147 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2148 def rmbk : AVX512BI<opc, MRMSrcMem,
2149 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2150 _.ScalarMemOp:$src2),
2151 !strconcat(OpcodeStr,
2152 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2153 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2154 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2155 (OpNode_su (_.VT _.RC:$src1),
2157 (_.ScalarLdFrag addr:$src2)))))]>,
2158 EVEX_4V, EVEX_K, EVEX_B,
2159 Sched<[sched.Folded, sched.ReadAfterFold]>;
2162 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2163 PatFrag OpNode_su, X86SchedWriteWidths sched,
2164 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2165 bit IsCommutable = 0> {
2166 let Predicates = [prd] in
2167 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
2168 VTInfo.info512, IsCommutable>, EVEX_V512;
2170 let Predicates = [prd, HasVLX] in {
2171 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
2172 VTInfo.info256, IsCommutable>, EVEX_V256;
2173 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
2174 VTInfo.info128, IsCommutable>, EVEX_V128;
2178 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2179 PatFrag OpNode, PatFrag OpNode_su,
2180 X86SchedWriteWidths sched,
2181 AVX512VLVectorVTInfo VTInfo,
2182 Predicate prd, bit IsCommutable = 0> {
2183 let Predicates = [prd] in
2184 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
2185 VTInfo.info512, IsCommutable>, EVEX_V512;
2187 let Predicates = [prd, HasVLX] in {
2188 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
2189 VTInfo.info256, IsCommutable>, EVEX_V256;
2190 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
2191 VTInfo.info128, IsCommutable>, EVEX_V128;
2195 // This fragment treats X86cmpm as commutable to help match loads in both
2196 // operands for PCMPEQ.
2197 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2198 def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2199 (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2200 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2201 (setcc node:$src1, node:$src2, SETGT)>;
2203 def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2),
2204 (X86pcmpeqm_c node:$src1, node:$src2), [{
2205 return N->hasOneUse();
2207 def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2),
2208 (X86pcmpgtm node:$src1, node:$src2), [{
2209 return N->hasOneUse();
2212 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2213 // increase the pattern complexity the way an immediate would.
2214 let AddedComplexity = 2 in {
2215 // FIXME: Is there a better scheduler class for VPCMP?
2216 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su,
2217 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2218 EVEX_CD8<8, CD8VF>, VEX_WIG;
2220 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su,
2221 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2222 EVEX_CD8<16, CD8VF>, VEX_WIG;
2224 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su,
2225 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2226 EVEX_CD8<32, CD8VF>;
2228 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su,
2229 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2230 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2232 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su,
2233 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2234 EVEX_CD8<8, CD8VF>, VEX_WIG;
2236 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su,
2237 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2238 EVEX_CD8<16, CD8VF>, VEX_WIG;
2240 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su,
2241 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2242 EVEX_CD8<32, CD8VF>;
2244 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su,
2245 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2246 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2249 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2250 PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2251 X86FoldableSchedWrite sched,
2252 X86VectorVTInfo _, string Name> {
2253 let isCommutable = 1 in
2254 def rri : AVX512AIi8<opc, MRMSrcReg,
2255 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2256 !strconcat("vpcmp", Suffix,
2257 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2258 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2261 EVEX_4V, Sched<[sched]>;
2262 def rmi : AVX512AIi8<opc, MRMSrcMem,
2263 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2264 !strconcat("vpcmp", Suffix,
2265 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2266 [(set _.KRC:$dst, (_.KVT
2269 (_.VT (_.LdFrag addr:$src2)),
2271 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2272 let isCommutable = 1 in
2273 def rrik : AVX512AIi8<opc, MRMSrcReg,
2274 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2276 !strconcat("vpcmp", Suffix,
2277 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2278 "$dst {${mask}}, $src1, $src2, $cc}"),
2279 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2280 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2283 EVEX_4V, EVEX_K, Sched<[sched]>;
2284 def rmik : AVX512AIi8<opc, MRMSrcMem,
2285 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2287 !strconcat("vpcmp", Suffix,
2288 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2289 "$dst {${mask}}, $src1, $src2, $cc}"),
2290 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2294 (_.VT (_.LdFrag addr:$src2)),
2296 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2298 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2299 (_.VT _.RC:$src1), cond)),
2300 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2301 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2303 def : Pat<(and _.KRCWM:$mask,
2304 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2305 (_.VT _.RC:$src1), cond))),
2306 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2307 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2308 (CommFrag.OperandTransform $cc))>;
2311 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2312 PatFrag Frag_su, PatFrag CommFrag,
2313 PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2314 X86VectorVTInfo _, string Name> :
2315 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2317 def rmib : AVX512AIi8<opc, MRMSrcMem,
2318 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2320 !strconcat("vpcmp", Suffix,
2321 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2322 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2323 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2326 (_.ScalarLdFrag addr:$src2)),
2328 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2329 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2330 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2331 _.ScalarMemOp:$src2, u8imm:$cc),
2332 !strconcat("vpcmp", Suffix,
2333 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2334 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2335 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2339 (_.ScalarLdFrag addr:$src2)),
2341 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2343 def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2344 (_.VT _.RC:$src1), cond)),
2345 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2346 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2348 def : Pat<(and _.KRCWM:$mask,
2349 (_.KVT (CommFrag_su:$cc (X86VBroadcast
2350 (_.ScalarLdFrag addr:$src2)),
2351 (_.VT _.RC:$src1), cond))),
2352 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2353 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2354 (CommFrag.OperandTransform $cc))>;
2357 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2358 PatFrag Frag_su, PatFrag CommFrag,
2359 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2360 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2361 let Predicates = [prd] in
2362 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2363 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2365 let Predicates = [prd, HasVLX] in {
2366 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2367 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2368 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2369 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2373 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2374 PatFrag Frag_su, PatFrag CommFrag,
2375 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2376 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2377 let Predicates = [prd] in
2378 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2379 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2381 let Predicates = [prd, HasVLX] in {
2382 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2383 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2384 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2385 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2389 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2390 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2391 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2392 return getI8Imm(SSECC, SDLoc(N));
2395 // Swapped operand version of the above.
2396 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2397 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2398 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2399 SSECC = X86::getSwappedVPCMPImm(SSECC);
2400 return getI8Imm(SSECC, SDLoc(N));
2403 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2404 (setcc node:$src1, node:$src2, node:$cc), [{
2405 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2406 return !ISD::isUnsignedIntSetCC(CC);
2409 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2410 (setcc node:$src1, node:$src2, node:$cc), [{
2411 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2412 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2415 // Same as above, but commutes immediate. Use for load folding.
2416 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2417 (setcc node:$src1, node:$src2, node:$cc), [{
2418 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2419 return !ISD::isUnsignedIntSetCC(CC);
2420 }], X86pcmpm_imm_commute>;
2422 def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2423 (setcc node:$src1, node:$src2, node:$cc), [{
2424 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2425 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2426 }], X86pcmpm_imm_commute>;
2428 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2429 (setcc node:$src1, node:$src2, node:$cc), [{
2430 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2431 return ISD::isUnsignedIntSetCC(CC);
2434 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2435 (setcc node:$src1, node:$src2, node:$cc), [{
2436 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2437 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2440 // Same as above, but commutes immediate. Use for load folding.
2441 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2442 (setcc node:$src1, node:$src2, node:$cc), [{
2443 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2444 return ISD::isUnsignedIntSetCC(CC);
2445 }], X86pcmpm_imm_commute>;
2447 def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2448 (setcc node:$src1, node:$src2, node:$cc), [{
2449 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2450 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2451 }], X86pcmpm_imm_commute>;
2453 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2454 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2455 X86pcmpm_commute, X86pcmpm_commute_su,
2456 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2458 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2459 X86pcmpum_commute, X86pcmpum_commute_su,
2460 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2463 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2464 X86pcmpm_commute, X86pcmpm_commute_su,
2465 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2466 VEX_W, EVEX_CD8<16, CD8VF>;
2467 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2468 X86pcmpum_commute, X86pcmpum_commute_su,
2469 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2470 VEX_W, EVEX_CD8<16, CD8VF>;
2472 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2473 X86pcmpm_commute, X86pcmpm_commute_su,
2474 SchedWriteVecALU, avx512vl_i32_info,
2475 HasAVX512>, EVEX_CD8<32, CD8VF>;
2476 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2477 X86pcmpum_commute, X86pcmpum_commute_su,
2478 SchedWriteVecALU, avx512vl_i32_info,
2479 HasAVX512>, EVEX_CD8<32, CD8VF>;
2481 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2482 X86pcmpm_commute, X86pcmpm_commute_su,
2483 SchedWriteVecALU, avx512vl_i64_info,
2484 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2485 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2486 X86pcmpum_commute, X86pcmpum_commute_su,
2487 SchedWriteVecALU, avx512vl_i64_info,
2488 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2490 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2491 (X86cmpm node:$src1, node:$src2, node:$cc), [{
2492 return N->hasOneUse();
2494 def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2495 (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2496 return N->hasOneUse();
2499 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2501 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2502 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2504 "$cc, $src2, $src1", "$src1, $src2, $cc",
2505 (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2506 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2509 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2510 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2512 "$cc, $src2, $src1", "$src1, $src2, $cc",
2513 (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2515 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2517 Sched<[sched.Folded, sched.ReadAfterFold]>;
2519 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2521 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2523 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2524 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2525 (X86cmpm (_.VT _.RC:$src1),
2526 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2528 (X86cmpm_su (_.VT _.RC:$src1),
2529 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2531 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2533 // Patterns for selecting with loads in other operand.
2534 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2535 CommutableCMPCC:$cc),
2536 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2539 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2541 CommutableCMPCC:$cc)),
2542 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2543 _.RC:$src1, addr:$src2,
2546 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2547 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2548 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2551 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast
2552 (_.ScalarLdFrag addr:$src2)),
2554 CommutableCMPCC:$cc)),
2555 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2556 _.RC:$src1, addr:$src2,
2560 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2561 // comparison code form (VCMP[EQ/LT/LE/...]
2562 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2563 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2565 "$cc, {sae}, $src2, $src1",
2566 "$src1, $src2, {sae}, $cc",
2567 (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2568 (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2570 EVEX_B, Sched<[sched]>;
2573 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2574 let Predicates = [HasAVX512] in {
2575 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2576 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2579 let Predicates = [HasAVX512,HasVLX] in {
2580 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2581 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2585 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2586 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2587 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2588 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2590 // Patterns to select fp compares with load as first operand.
2591 let Predicates = [HasAVX512] in {
2592 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2593 CommutableCMPCC:$cc)),
2594 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2596 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2597 CommutableCMPCC:$cc)),
2598 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2601 // ----------------------------------------------------------------
2604 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2605 (X86Vfpclasss node:$src1, node:$src2), [{
2606 return N->hasOneUse();
2609 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2610 (X86Vfpclass node:$src1, node:$src2), [{
2611 return N->hasOneUse();
2614 //handle fpclass instruction mask = op(reg_scalar,imm)
2615 // op(mem_scalar,imm)
2616 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2617 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2619 let Predicates = [prd], ExeDomain = _.ExeDomain in {
2620 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2621 (ins _.RC:$src1, i32u8imm:$src2),
2622 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2623 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2624 (i32 imm:$src2)))]>,
2626 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2627 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2628 OpcodeStr##_.Suffix#
2629 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2630 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2631 (X86Vfpclasss_su (_.VT _.RC:$src1),
2632 (i32 imm:$src2))))]>,
2633 EVEX_K, Sched<[sched]>;
2634 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2635 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2636 OpcodeStr##_.Suffix##
2637 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2639 (X86Vfpclasss _.ScalarIntMemCPat:$src1,
2640 (i32 imm:$src2)))]>,
2641 Sched<[sched.Folded, sched.ReadAfterFold]>;
2642 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2643 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2644 OpcodeStr##_.Suffix##
2645 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2646 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2647 (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
2648 (i32 imm:$src2))))]>,
2649 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2653 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2654 // fpclass(reg_vec, mem_vec, imm)
2655 // fpclass(reg_vec, broadcast(eltVt), imm)
2656 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2657 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2659 let ExeDomain = _.ExeDomain in {
2660 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2661 (ins _.RC:$src1, i32u8imm:$src2),
2662 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2663 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2664 (i32 imm:$src2)))]>,
2666 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2667 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2668 OpcodeStr##_.Suffix#
2669 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2670 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2671 (X86Vfpclass_su (_.VT _.RC:$src1),
2672 (i32 imm:$src2))))]>,
2673 EVEX_K, Sched<[sched]>;
2674 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2675 (ins _.MemOp:$src1, i32u8imm:$src2),
2676 OpcodeStr##_.Suffix#"{"#mem#"}"#
2677 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2678 [(set _.KRC:$dst,(X86Vfpclass
2679 (_.VT (_.LdFrag addr:$src1)),
2680 (i32 imm:$src2)))]>,
2681 Sched<[sched.Folded, sched.ReadAfterFold]>;
2682 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2683 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2684 OpcodeStr##_.Suffix#"{"#mem#"}"#
2685 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2686 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2687 (_.VT (_.LdFrag addr:$src1)),
2688 (i32 imm:$src2))))]>,
2689 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2690 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2691 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2692 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2693 _.BroadcastStr##", $dst|$dst, ${src1}"
2694 ##_.BroadcastStr##", $src2}",
2695 [(set _.KRC:$dst,(X86Vfpclass
2696 (_.VT (X86VBroadcast
2697 (_.ScalarLdFrag addr:$src1))),
2698 (i32 imm:$src2)))]>,
2699 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2700 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2701 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2702 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2703 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2704 _.BroadcastStr##", $src2}",
2705 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2706 (_.VT (X86VBroadcast
2707 (_.ScalarLdFrag addr:$src1))),
2708 (i32 imm:$src2))))]>,
2709 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2712 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2714 def : InstAlias<OpcodeStr#_.Suffix#mem#
2715 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2716 (!cast<Instruction>(NAME#"rr")
2717 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2718 def : InstAlias<OpcodeStr#_.Suffix#mem#
2719 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2720 (!cast<Instruction>(NAME#"rrk")
2721 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2722 def : InstAlias<OpcodeStr#_.Suffix#mem#
2723 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2724 _.BroadcastStr#", $src2}",
2725 (!cast<Instruction>(NAME#"rmb")
2726 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2727 def : InstAlias<OpcodeStr#_.Suffix#mem#
2728 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2729 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2730 (!cast<Instruction>(NAME#"rmbk")
2731 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2734 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2735 bits<8> opc, X86SchedWriteWidths sched,
2737 let Predicates = [prd] in {
2738 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2739 _.info512, "z">, EVEX_V512;
2741 let Predicates = [prd, HasVLX] in {
2742 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2743 _.info128, "x">, EVEX_V128;
2744 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2745 _.info256, "y">, EVEX_V256;
2749 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2750 bits<8> opcScalar, X86SchedWriteWidths sched,
2752 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2754 EVEX_CD8<32, CD8VF>;
2755 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2757 EVEX_CD8<64, CD8VF> , VEX_W;
2758 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2759 sched.Scl, f32x_info, prd>, VEX_LIG,
2760 EVEX_CD8<32, CD8VT1>;
2761 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2762 sched.Scl, f64x_info, prd>, VEX_LIG,
2763 EVEX_CD8<64, CD8VT1>, VEX_W;
2766 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2767 HasDQI>, AVX512AIi8Base, EVEX;
2769 //-----------------------------------------------------------------
2770 // Mask register copy, including
2771 // - copy between mask registers
2772 // - load/store mask registers
2773 // - copy from GPR to mask register and vice versa
2775 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2776 string OpcodeStr, RegisterClass KRC,
2777 ValueType vvt, X86MemOperand x86memop> {
2778 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2779 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2780 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2782 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2783 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2784 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2786 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2787 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2788 [(store KRC:$src, addr:$dst)]>,
2789 Sched<[WriteStore]>;
2792 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2794 RegisterClass KRC, RegisterClass GRC> {
2795 let hasSideEffects = 0 in {
2796 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2797 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2799 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2800 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2805 let Predicates = [HasDQI] in
2806 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2807 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2810 let Predicates = [HasAVX512] in
2811 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2812 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2815 let Predicates = [HasBWI] in {
2816 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2818 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2820 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2822 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2826 // GR from/to mask register
2827 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2828 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2829 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2830 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2832 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2833 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2834 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2835 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2837 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2838 (KMOVWrk VK16:$src)>;
2839 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2840 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2842 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2843 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2844 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2845 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2847 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2848 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2849 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2850 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2851 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2852 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2853 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2854 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2857 let Predicates = [HasDQI] in {
2858 def : Pat<(store VK1:$src, addr:$dst),
2859 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2861 def : Pat<(v1i1 (load addr:$src)),
2862 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2863 def : Pat<(v2i1 (load addr:$src)),
2864 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2865 def : Pat<(v4i1 (load addr:$src)),
2866 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2869 let Predicates = [HasAVX512] in {
2870 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2871 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2872 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2873 (KMOVWkm addr:$src)>;
2876 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2877 SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2878 SDTCVecEltisVT<1, i1>,
2881 let Predicates = [HasAVX512] in {
2882 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2883 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2884 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2886 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2887 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2889 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2890 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2892 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2893 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2896 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2897 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2898 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2899 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2900 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2901 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2902 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2904 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2905 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2908 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2912 // Mask unary operation
2914 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2915 RegisterClass KRC, SDPatternOperator OpNode,
2916 X86FoldableSchedWrite sched, Predicate prd> {
2917 let Predicates = [prd] in
2918 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2919 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2920 [(set KRC:$dst, (OpNode KRC:$src))]>,
2924 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2925 SDPatternOperator OpNode,
2926 X86FoldableSchedWrite sched> {
2927 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2928 sched, HasDQI>, VEX, PD;
2929 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2930 sched, HasAVX512>, VEX, PS;
2931 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2932 sched, HasBWI>, VEX, PD, VEX_W;
2933 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2934 sched, HasBWI>, VEX, PS, VEX_W;
2937 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2938 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2940 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2941 let Predicates = [HasAVX512, NoDQI] in
2942 def : Pat<(vnot VK8:$src),
2943 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2945 def : Pat<(vnot VK4:$src),
2946 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2947 def : Pat<(vnot VK2:$src),
2948 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2950 // Mask binary operation
2951 // - KAND, KANDN, KOR, KXNOR, KXOR
2952 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2953 RegisterClass KRC, SDPatternOperator OpNode,
2954 X86FoldableSchedWrite sched, Predicate prd,
2956 let Predicates = [prd], isCommutable = IsCommutable in
2957 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2958 !strconcat(OpcodeStr,
2959 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2960 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2964 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2965 SDPatternOperator OpNode,
2966 X86FoldableSchedWrite sched, bit IsCommutable,
2967 Predicate prdW = HasAVX512> {
2968 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2969 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2970 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2971 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2972 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2973 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2974 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2975 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2978 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2979 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2980 // These nodes use 'vnot' instead of 'not' to support vectors.
2981 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2982 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2984 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2985 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
2986 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
2987 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
2988 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
2989 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
2990 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2992 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2994 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2995 // for the DQI set, this type is legal and KxxxB instruction is used
2996 let Predicates = [NoDQI] in
2997 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2999 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3000 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3002 // All types smaller than 8 bits require conversion anyway
3003 def : Pat<(OpNode VK1:$src1, VK1:$src2),
3004 (COPY_TO_REGCLASS (Inst
3005 (COPY_TO_REGCLASS VK1:$src1, VK16),
3006 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3007 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3008 (COPY_TO_REGCLASS (Inst
3009 (COPY_TO_REGCLASS VK2:$src1, VK16),
3010 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3011 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3012 (COPY_TO_REGCLASS (Inst
3013 (COPY_TO_REGCLASS VK4:$src1, VK16),
3014 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3017 defm : avx512_binop_pat<and, and, KANDWrr>;
3018 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3019 defm : avx512_binop_pat<or, or, KORWrr>;
3020 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3021 defm : avx512_binop_pat<xor, xor, KXORWrr>;
3024 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3025 X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3027 let Predicates = [prd] in {
3028 let hasSideEffects = 0 in
3029 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3030 (ins Src.KRC:$src1, Src.KRC:$src2),
3031 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3032 VEX_4V, VEX_L, Sched<[sched]>;
3034 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3035 (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
3039 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
3040 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3041 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3044 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3045 SDNode OpNode, X86FoldableSchedWrite sched,
3047 let Predicates = [prd], Defs = [EFLAGS] in
3048 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3049 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3050 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3054 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3055 X86FoldableSchedWrite sched,
3056 Predicate prdW = HasAVX512> {
3057 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3059 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3061 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3063 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3067 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3068 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3069 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3072 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3073 SDNode OpNode, X86FoldableSchedWrite sched> {
3074 let Predicates = [HasAVX512] in
3075 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3076 !strconcat(OpcodeStr,
3077 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3078 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3082 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3083 SDNode OpNode, X86FoldableSchedWrite sched> {
3084 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3085 sched>, VEX, TAPD, VEX_W;
3086 let Predicates = [HasDQI] in
3087 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3089 let Predicates = [HasBWI] in {
3090 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3091 sched>, VEX, TAPD, VEX_W;
3092 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3097 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3098 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3100 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3101 multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3103 X86VectorVTInfo Narrow,
3104 X86VectorVTInfo Wide> {
3105 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3106 (Narrow.VT Narrow.RC:$src2))),
3108 (!cast<Instruction>(InstStr#"Zrr")
3109 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3110 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3113 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3114 (Frag_su (Narrow.VT Narrow.RC:$src1),
3115 (Narrow.VT Narrow.RC:$src2)))),
3117 (!cast<Instruction>(InstStr#"Zrrk")
3118 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3119 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3120 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3124 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3125 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3127 X86VectorVTInfo Narrow,
3128 X86VectorVTInfo Wide> {
3129 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3130 (Narrow.VT Narrow.RC:$src2), cond)),
3132 (!cast<Instruction>(InstStr##Zrri)
3133 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3134 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3135 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3137 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3138 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3139 (Narrow.VT Narrow.RC:$src2),
3141 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3142 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3143 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3144 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3145 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3148 // Same as above, but for fp types which don't use PatFrags.
3149 multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su,
3151 X86VectorVTInfo Narrow,
3152 X86VectorVTInfo Wide> {
3153 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3154 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3156 (!cast<Instruction>(InstStr##Zrri)
3157 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3158 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3159 imm:$cc), Narrow.KRC)>;
3161 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3162 (OpNode_su (Narrow.VT Narrow.RC:$src1),
3163 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3164 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3165 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3166 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3167 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3168 imm:$cc), Narrow.KRC)>;
3171 let Predicates = [HasAVX512, NoVLX] in {
3172 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3173 // increase the pattern complexity the way an immediate would.
3174 let AddedComplexity = 2 in {
3175 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
3176 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
3178 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
3179 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
3181 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3182 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3184 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3185 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3188 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3189 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3191 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3192 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3194 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3195 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3197 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3198 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3200 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>;
3201 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>;
3202 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>;
3203 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>;
3206 let Predicates = [HasBWI, NoVLX] in {
3207 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3208 // increase the pattern complexity the way an immediate would.
3209 let AddedComplexity = 2 in {
3210 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v32i8x_info, v64i8_info>;
3211 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v32i8x_info, v64i8_info>;
3213 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v16i8x_info, v64i8_info>;
3214 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v16i8x_info, v64i8_info>;
3216 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v16i16x_info, v32i16_info>;
3217 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v16i16x_info, v32i16_info>;
3219 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v8i16x_info, v32i16_info>;
3220 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v8i16x_info, v32i16_info>;
3223 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3224 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3226 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3227 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3229 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3230 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3232 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3233 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3236 // Mask setting all 0s or 1s
3237 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3238 let Predicates = [HasAVX512] in
3239 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3240 SchedRW = [WriteZero] in
3241 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3242 [(set KRC:$dst, (VT Val))]>;
3245 multiclass avx512_mask_setop_w<PatFrag Val> {
3246 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3247 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3248 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3251 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3252 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3254 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3255 let Predicates = [HasAVX512] in {
3256 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3257 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3258 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3259 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3260 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3261 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3262 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3263 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3266 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3267 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3268 RegisterClass RC, ValueType VT> {
3269 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3270 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3272 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3273 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3275 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3276 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3277 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3278 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3279 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3280 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3282 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3283 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3284 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3285 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3286 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3288 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3289 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3290 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3291 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3293 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3294 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3295 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3297 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3298 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3300 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3302 //===----------------------------------------------------------------------===//
3303 // AVX-512 - Aligned and unaligned load and store
3306 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3307 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3308 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3309 bit NoRMPattern = 0,
3310 SDPatternOperator SelectOprr = vselect> {
3311 let hasSideEffects = 0 in {
3312 let isMoveReg = 1 in
3313 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3314 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3315 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3316 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3317 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3318 (ins _.KRCWM:$mask, _.RC:$src),
3319 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3320 "${dst} {${mask}} {z}, $src}"),
3321 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3323 _.ImmAllZerosV)))], _.ExeDomain>,
3324 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3326 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3327 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3328 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3329 !if(NoRMPattern, [],
3331 (_.VT (ld_frag addr:$src)))]),
3332 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3333 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3335 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3336 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3337 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3338 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3339 "${dst} {${mask}}, $src1}"),
3340 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3342 (_.VT _.RC:$src0))))], _.ExeDomain>,
3343 EVEX, EVEX_K, Sched<[Sched.RR]>;
3344 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3345 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3346 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3347 "${dst} {${mask}}, $src1}"),
3348 [(set _.RC:$dst, (_.VT
3349 (vselect _.KRCWM:$mask,
3350 (_.VT (ld_frag addr:$src1)),
3351 (_.VT _.RC:$src0))))], _.ExeDomain>,
3352 EVEX, EVEX_K, Sched<[Sched.RM]>;
3354 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3355 (ins _.KRCWM:$mask, _.MemOp:$src),
3356 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3357 "${dst} {${mask}} {z}, $src}",
3358 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3359 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3360 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3362 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3363 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3365 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3366 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3368 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3369 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3370 _.KRCWM:$mask, addr:$ptr)>;
3373 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3374 AVX512VLVectorVTInfo _, Predicate prd,
3375 X86SchedWriteMoveLSWidths Sched,
3376 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3377 let Predicates = [prd] in
3378 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3379 _.info512.AlignedLdFrag, masked_load_aligned,
3380 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3382 let Predicates = [prd, HasVLX] in {
3383 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3384 _.info256.AlignedLdFrag, masked_load_aligned,
3385 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3386 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3387 _.info128.AlignedLdFrag, masked_load_aligned,
3388 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3392 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3393 AVX512VLVectorVTInfo _, Predicate prd,
3394 X86SchedWriteMoveLSWidths Sched,
3395 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3396 SDPatternOperator SelectOprr = vselect> {
3397 let Predicates = [prd] in
3398 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3399 masked_load, Sched.ZMM, "",
3400 NoRMPattern, SelectOprr>, EVEX_V512;
3402 let Predicates = [prd, HasVLX] in {
3403 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3404 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3405 NoRMPattern, SelectOprr>, EVEX_V256;
3406 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3407 masked_load, Sched.XMM, EVEX2VEXOvrd,
3408 NoRMPattern, SelectOprr>, EVEX_V128;
3412 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3413 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3414 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3415 bit NoMRPattern = 0> {
3416 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3417 let isMoveReg = 1 in
3418 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3419 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3420 [], _.ExeDomain>, EVEX,
3421 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3422 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3423 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3424 (ins _.KRCWM:$mask, _.RC:$src),
3425 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3426 "${dst} {${mask}}, $src}",
3427 [], _.ExeDomain>, EVEX, EVEX_K,
3428 FoldGenData<BaseName#_.ZSuffix#rrk>,
3430 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3431 (ins _.KRCWM:$mask, _.RC:$src),
3432 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3433 "${dst} {${mask}} {z}, $src}",
3434 [], _.ExeDomain>, EVEX, EVEX_KZ,
3435 FoldGenData<BaseName#_.ZSuffix#rrkz>,
3439 let hasSideEffects = 0, mayStore = 1 in
3440 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3441 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3442 !if(NoMRPattern, [],
3443 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3444 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3445 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3446 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3447 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3448 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3449 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3452 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3453 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3454 _.KRCWM:$mask, _.RC:$src)>;
3456 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3457 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3458 _.RC:$dst, _.RC:$src), 0>;
3459 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3460 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3461 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3462 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3463 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3464 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3467 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3468 AVX512VLVectorVTInfo _, Predicate prd,
3469 X86SchedWriteMoveLSWidths Sched,
3470 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3471 let Predicates = [prd] in
3472 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3473 masked_store, Sched.ZMM, "",
3474 NoMRPattern>, EVEX_V512;
3475 let Predicates = [prd, HasVLX] in {
3476 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3477 masked_store, Sched.YMM,
3478 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3479 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3480 masked_store, Sched.XMM, EVEX2VEXOvrd,
3481 NoMRPattern>, EVEX_V128;
3485 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3486 AVX512VLVectorVTInfo _, Predicate prd,
3487 X86SchedWriteMoveLSWidths Sched,
3488 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3489 let Predicates = [prd] in
3490 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3491 masked_store_aligned, Sched.ZMM, "",
3492 NoMRPattern>, EVEX_V512;
3494 let Predicates = [prd, HasVLX] in {
3495 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3496 masked_store_aligned, Sched.YMM,
3497 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3498 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3499 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3500 NoMRPattern>, EVEX_V128;
3504 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3505 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3506 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3507 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3508 PS, EVEX_CD8<32, CD8VF>;
3510 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3511 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3512 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3513 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3514 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3516 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3517 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3518 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3519 SchedWriteFMoveLS, "VMOVUPS">,
3520 PS, EVEX_CD8<32, CD8VF>;
3522 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3523 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3524 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3525 SchedWriteFMoveLS, "VMOVUPD">,
3526 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3528 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3529 HasAVX512, SchedWriteVecMoveLS,
3531 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3532 HasAVX512, SchedWriteVecMoveLS,
3534 PD, EVEX_CD8<32, CD8VF>;
3536 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3537 HasAVX512, SchedWriteVecMoveLS,
3539 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3540 HasAVX512, SchedWriteVecMoveLS,
3542 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3544 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3545 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3546 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3547 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3548 XD, EVEX_CD8<8, CD8VF>;
3550 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3551 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3552 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3553 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3554 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3556 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3557 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3558 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3559 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3560 XS, EVEX_CD8<32, CD8VF>;
3562 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3563 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3564 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3565 SchedWriteVecMoveLS, "VMOVDQU">,
3566 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3568 // Special instructions to help with spilling when we don't have VLX. We need
3569 // to load or store from a ZMM register instead. These are converted in
3570 // expandPostRAPseudos.
3571 let isReMaterializable = 1, canFoldAsLoad = 1,
3572 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3573 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3574 "", []>, Sched<[WriteFLoadX]>;
3575 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3576 "", []>, Sched<[WriteFLoadY]>;
3577 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3578 "", []>, Sched<[WriteFLoadX]>;
3579 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3580 "", []>, Sched<[WriteFLoadY]>;
3583 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3584 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3585 "", []>, Sched<[WriteFStoreX]>;
3586 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3587 "", []>, Sched<[WriteFStoreY]>;
3588 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3589 "", []>, Sched<[WriteFStoreX]>;
3590 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3591 "", []>, Sched<[WriteFStoreY]>;
3594 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3595 (v8i64 VR512:$src))),
3596 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3599 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3600 (v16i32 VR512:$src))),
3601 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3603 // These patterns exist to prevent the above patterns from introducing a second
3604 // mask inversion when one already exists.
3605 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3606 (v8i64 immAllZerosV),
3607 (v8i64 VR512:$src))),
3608 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3609 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3610 (v16i32 immAllZerosV),
3611 (v16i32 VR512:$src))),
3612 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3614 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3615 X86VectorVTInfo Wide> {
3616 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3617 Narrow.RC:$src1, Narrow.RC:$src0)),
3620 (!cast<Instruction>(InstrStr#"rrk")
3621 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3622 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3623 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3626 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3627 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3630 (!cast<Instruction>(InstrStr#"rrkz")
3631 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3632 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3636 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3637 // available. Use a 512-bit operation and extract.
3638 let Predicates = [HasAVX512, NoVLX] in {
3639 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3640 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3641 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3642 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3644 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3645 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3646 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3647 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3650 let Predicates = [HasBWI, NoVLX] in {
3651 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3652 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3654 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3655 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3658 let Predicates = [HasAVX512] in {
3660 def : Pat<(alignedloadv16i32 addr:$src),
3661 (VMOVDQA64Zrm addr:$src)>;
3662 def : Pat<(alignedloadv32i16 addr:$src),
3663 (VMOVDQA64Zrm addr:$src)>;
3664 def : Pat<(alignedloadv64i8 addr:$src),
3665 (VMOVDQA64Zrm addr:$src)>;
3666 def : Pat<(loadv16i32 addr:$src),
3667 (VMOVDQU64Zrm addr:$src)>;
3668 def : Pat<(loadv32i16 addr:$src),
3669 (VMOVDQU64Zrm addr:$src)>;
3670 def : Pat<(loadv64i8 addr:$src),
3671 (VMOVDQU64Zrm addr:$src)>;
3674 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3675 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3676 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3677 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3678 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3679 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3680 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3681 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3682 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3683 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3684 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3685 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3688 let Predicates = [HasVLX] in {
3690 def : Pat<(alignedloadv4i32 addr:$src),
3691 (VMOVDQA64Z128rm addr:$src)>;
3692 def : Pat<(alignedloadv8i16 addr:$src),
3693 (VMOVDQA64Z128rm addr:$src)>;
3694 def : Pat<(alignedloadv16i8 addr:$src),
3695 (VMOVDQA64Z128rm addr:$src)>;
3696 def : Pat<(loadv4i32 addr:$src),
3697 (VMOVDQU64Z128rm addr:$src)>;
3698 def : Pat<(loadv8i16 addr:$src),
3699 (VMOVDQU64Z128rm addr:$src)>;
3700 def : Pat<(loadv16i8 addr:$src),
3701 (VMOVDQU64Z128rm addr:$src)>;
3704 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3705 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3706 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3707 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3708 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3709 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3710 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3711 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3712 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3713 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3714 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3715 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3718 def : Pat<(alignedloadv8i32 addr:$src),
3719 (VMOVDQA64Z256rm addr:$src)>;
3720 def : Pat<(alignedloadv16i16 addr:$src),
3721 (VMOVDQA64Z256rm addr:$src)>;
3722 def : Pat<(alignedloadv32i8 addr:$src),
3723 (VMOVDQA64Z256rm addr:$src)>;
3724 def : Pat<(loadv8i32 addr:$src),
3725 (VMOVDQU64Z256rm addr:$src)>;
3726 def : Pat<(loadv16i16 addr:$src),
3727 (VMOVDQU64Z256rm addr:$src)>;
3728 def : Pat<(loadv32i8 addr:$src),
3729 (VMOVDQU64Z256rm addr:$src)>;
3732 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3733 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3734 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3735 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3736 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3737 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3738 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3739 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3740 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3741 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3742 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3743 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3746 // Move Int Doubleword to Packed Double Int
3748 let ExeDomain = SSEPackedInt in {
3749 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3750 "vmovd\t{$src, $dst|$dst, $src}",
3752 (v4i32 (scalar_to_vector GR32:$src)))]>,
3753 EVEX, Sched<[WriteVecMoveFromGpr]>;
3754 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3755 "vmovd\t{$src, $dst|$dst, $src}",
3757 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3758 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3759 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3760 "vmovq\t{$src, $dst|$dst, $src}",
3762 (v2i64 (scalar_to_vector GR64:$src)))]>,
3763 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3764 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3765 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3767 "vmovq\t{$src, $dst|$dst, $src}", []>,
3768 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3769 let isCodeGenOnly = 1 in {
3770 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3771 "vmovq\t{$src, $dst|$dst, $src}",
3772 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3773 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3774 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3775 "vmovq\t{$src, $dst|$dst, $src}",
3776 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3777 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3779 } // ExeDomain = SSEPackedInt
3781 // Move Int Doubleword to Single Scalar
3783 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3784 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3785 "vmovd\t{$src, $dst|$dst, $src}",
3786 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3787 EVEX, Sched<[WriteVecMoveFromGpr]>;
3788 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3790 // Move doubleword from xmm register to r/m32
3792 let ExeDomain = SSEPackedInt in {
3793 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3794 "vmovd\t{$src, $dst|$dst, $src}",
3795 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3797 EVEX, Sched<[WriteVecMoveToGpr]>;
3798 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3799 (ins i32mem:$dst, VR128X:$src),
3800 "vmovd\t{$src, $dst|$dst, $src}",
3801 [(store (i32 (extractelt (v4i32 VR128X:$src),
3802 (iPTR 0))), addr:$dst)]>,
3803 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3804 } // ExeDomain = SSEPackedInt
3806 // Move quadword from xmm1 register to r/m64
3808 let ExeDomain = SSEPackedInt in {
3809 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3810 "vmovq\t{$src, $dst|$dst, $src}",
3811 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3813 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3814 Requires<[HasAVX512]>;
3816 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3817 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3818 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3819 EVEX, VEX_W, Sched<[WriteVecStore]>,
3820 Requires<[HasAVX512, In64BitMode]>;
3822 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3823 (ins i64mem:$dst, VR128X:$src),
3824 "vmovq\t{$src, $dst|$dst, $src}",
3825 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3827 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3828 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3830 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3831 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3833 "vmovq\t{$src, $dst|$dst, $src}", []>,
3834 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3835 } // ExeDomain = SSEPackedInt
3837 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3838 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3840 let Predicates = [HasAVX512] in {
3841 def : Pat<(X86vextractstore (v2i64 VR128X:$src), addr:$dst),
3842 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3845 // Move Scalar Single to Double Int
3847 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3848 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3850 "vmovd\t{$src, $dst|$dst, $src}",
3851 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3852 EVEX, Sched<[WriteVecMoveToGpr]>;
3853 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3855 // Move Quadword Int to Packed Quadword Int
3857 let ExeDomain = SSEPackedInt in {
3858 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3860 "vmovq\t{$src, $dst|$dst, $src}",
3862 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3863 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3864 } // ExeDomain = SSEPackedInt
3866 // Allow "vmovd" but print "vmovq".
3867 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3868 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3869 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3870 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3872 //===----------------------------------------------------------------------===//
3873 // AVX-512 MOVSS, MOVSD
3874 //===----------------------------------------------------------------------===//
3876 multiclass avx512_move_scalar<string asm, SDNode OpNode,
3877 X86VectorVTInfo _> {
3878 let Predicates = [HasAVX512, OptForSize] in
3879 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3880 (ins _.RC:$src1, _.RC:$src2),
3881 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3882 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3883 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3884 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3885 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3886 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3887 "$dst {${mask}} {z}, $src1, $src2}"),
3888 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3889 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3891 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3892 let Constraints = "$src0 = $dst" in
3893 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3894 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3895 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3896 "$dst {${mask}}, $src1, $src2}"),
3897 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3898 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3899 (_.VT _.RC:$src0))))],
3900 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3901 let canFoldAsLoad = 1, isReMaterializable = 1 in {
3902 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3903 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3904 [(set _.RC:$dst, (_.VT (X86vzload addr:$src)))],
3905 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3906 // _alt version uses FR32/FR64 register class.
3907 let isCodeGenOnly = 1 in
3908 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3909 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3910 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3911 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3913 let mayLoad = 1, hasSideEffects = 0 in {
3914 let Constraints = "$src0 = $dst" in
3915 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3916 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3917 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3918 "$dst {${mask}}, $src}"),
3919 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3920 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3921 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3922 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3923 "$dst {${mask}} {z}, $src}"),
3924 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3926 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3927 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3928 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
3929 EVEX, Sched<[WriteFStore]>;
3930 let mayStore = 1, hasSideEffects = 0 in
3931 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3932 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3933 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3934 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3938 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3939 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3941 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3942 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3945 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3946 PatLeaf ZeroFP, X86VectorVTInfo _> {
3948 def : Pat<(_.VT (OpNode _.RC:$src0,
3949 (_.VT (scalar_to_vector
3950 (_.EltVT (X86selects VK1WM:$mask,
3951 (_.EltVT _.FRC:$src1),
3952 (_.EltVT _.FRC:$src2))))))),
3953 (!cast<Instruction>(InstrStr#rrk)
3954 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3957 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3959 def : Pat<(_.VT (OpNode _.RC:$src0,
3960 (_.VT (scalar_to_vector
3961 (_.EltVT (X86selects VK1WM:$mask,
3962 (_.EltVT _.FRC:$src1),
3963 (_.EltVT ZeroFP))))))),
3964 (!cast<Instruction>(InstrStr#rrkz)
3967 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3970 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3971 dag Mask, RegisterClass MaskRC> {
3973 def : Pat<(masked_store
3974 (_.info512.VT (insert_subvector undef,
3975 (_.info128.VT _.info128.RC:$src),
3976 (iPTR 0))), addr:$dst, Mask),
3977 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3978 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3979 _.info128.RC:$src)>;
3983 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3984 AVX512VLVectorVTInfo _,
3985 dag Mask, RegisterClass MaskRC,
3986 SubRegIndex subreg> {
3988 def : Pat<(masked_store
3989 (_.info512.VT (insert_subvector undef,
3990 (_.info128.VT _.info128.RC:$src),
3991 (iPTR 0))), addr:$dst, Mask),
3992 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3993 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3994 _.info128.RC:$src)>;
3998 // This matches the more recent codegen from clang that avoids emitting a 512
3999 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4000 // bits on AVX512F only targets.
4001 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4002 AVX512VLVectorVTInfo _,
4003 dag Mask512, dag Mask128,
4004 RegisterClass MaskRC,
4005 SubRegIndex subreg> {
4008 def : Pat<(masked_store
4009 (_.info512.VT (insert_subvector undef,
4010 (_.info128.VT _.info128.RC:$src),
4011 (iPTR 0))), addr:$dst, Mask512),
4012 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4013 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4014 _.info128.RC:$src)>;
4016 // AVX512VL pattern.
4017 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4018 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4019 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4020 _.info128.RC:$src)>;
4023 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4024 dag Mask, RegisterClass MaskRC> {
4026 def : Pat<(_.info128.VT (extract_subvector
4027 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4028 _.info512.ImmAllZerosV)),
4030 (!cast<Instruction>(InstrStr#rmkz)
4031 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4034 def : Pat<(_.info128.VT (extract_subvector
4035 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4036 (_.info512.VT (insert_subvector undef,
4037 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4040 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4041 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4046 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4047 AVX512VLVectorVTInfo _,
4048 dag Mask, RegisterClass MaskRC,
4049 SubRegIndex subreg> {
4051 def : Pat<(_.info128.VT (extract_subvector
4052 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4053 _.info512.ImmAllZerosV)),
4055 (!cast<Instruction>(InstrStr#rmkz)
4056 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4059 def : Pat<(_.info128.VT (extract_subvector
4060 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4061 (_.info512.VT (insert_subvector undef,
4062 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4065 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4066 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4071 // This matches the more recent codegen from clang that avoids emitting a 512
4072 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4073 // bits on AVX512F only targets.
4074 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4075 AVX512VLVectorVTInfo _,
4076 dag Mask512, dag Mask128,
4077 RegisterClass MaskRC,
4078 SubRegIndex subreg> {
4079 // AVX512F patterns.
4080 def : Pat<(_.info128.VT (extract_subvector
4081 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4082 _.info512.ImmAllZerosV)),
4084 (!cast<Instruction>(InstrStr#rmkz)
4085 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4088 def : Pat<(_.info128.VT (extract_subvector
4089 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4090 (_.info512.VT (insert_subvector undef,
4091 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4094 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4095 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4098 // AVX512Vl patterns.
4099 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4100 _.info128.ImmAllZerosV)),
4101 (!cast<Instruction>(InstrStr#rmkz)
4102 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4105 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4106 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4107 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4108 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4112 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4113 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4115 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4116 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4117 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4118 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4119 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4120 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4122 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4123 (v16i1 (insert_subvector
4124 (v16i1 immAllZerosV),
4125 (v4i1 (extract_subvector
4126 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4129 (v4i1 (extract_subvector
4130 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4131 (iPTR 0))), GR8, sub_8bit>;
4132 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4137 (v16i1 immAllZerosV),
4138 (v2i1 (extract_subvector
4139 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4143 (v2i1 (extract_subvector
4144 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4145 (iPTR 0))), GR8, sub_8bit>;
4147 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4148 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4149 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4150 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4151 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4152 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4154 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4155 (v16i1 (insert_subvector
4156 (v16i1 immAllZerosV),
4157 (v4i1 (extract_subvector
4158 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4161 (v4i1 (extract_subvector
4162 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4163 (iPTR 0))), GR8, sub_8bit>;
4164 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4169 (v16i1 immAllZerosV),
4170 (v2i1 (extract_subvector
4171 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4175 (v2i1 (extract_subvector
4176 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4177 (iPTR 0))), GR8, sub_8bit>;
4179 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4180 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4181 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4182 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4183 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4185 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4186 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4187 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4189 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4190 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4191 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4192 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4193 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4195 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
4196 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4197 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4199 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4200 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4201 (ins VR128X:$src1, VR128X:$src2),
4202 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4203 []>, XS, EVEX_4V, VEX_LIG,
4204 FoldGenData<"VMOVSSZrr">,
4205 Sched<[SchedWriteFShuffle.XMM]>;
4207 let Constraints = "$src0 = $dst" in
4208 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4209 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4210 VR128X:$src1, VR128X:$src2),
4211 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4212 "$dst {${mask}}, $src1, $src2}",
4213 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4214 FoldGenData<"VMOVSSZrrk">,
4215 Sched<[SchedWriteFShuffle.XMM]>;
4217 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4218 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4219 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4220 "$dst {${mask}} {z}, $src1, $src2}",
4221 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4222 FoldGenData<"VMOVSSZrrkz">,
4223 Sched<[SchedWriteFShuffle.XMM]>;
4225 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4226 (ins VR128X:$src1, VR128X:$src2),
4227 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4228 []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4229 FoldGenData<"VMOVSDZrr">,
4230 Sched<[SchedWriteFShuffle.XMM]>;
4232 let Constraints = "$src0 = $dst" in
4233 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4234 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4235 VR128X:$src1, VR128X:$src2),
4236 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4237 "$dst {${mask}}, $src1, $src2}",
4238 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4239 VEX_W, FoldGenData<"VMOVSDZrrk">,
4240 Sched<[SchedWriteFShuffle.XMM]>;
4242 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4243 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4245 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4246 "$dst {${mask}} {z}, $src1, $src2}",
4247 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4248 VEX_W, FoldGenData<"VMOVSDZrrkz">,
4249 Sched<[SchedWriteFShuffle.XMM]>;
4252 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4253 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4254 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4255 "$dst {${mask}}, $src1, $src2}",
4256 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4257 VR128X:$src1, VR128X:$src2), 0>;
4258 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4259 "$dst {${mask}} {z}, $src1, $src2}",
4260 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4261 VR128X:$src1, VR128X:$src2), 0>;
4262 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4263 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4264 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4265 "$dst {${mask}}, $src1, $src2}",
4266 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4267 VR128X:$src1, VR128X:$src2), 0>;
4268 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4269 "$dst {${mask}} {z}, $src1, $src2}",
4270 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4271 VR128X:$src1, VR128X:$src2), 0>;
4273 let Predicates = [HasAVX512, OptForSize] in {
4274 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4275 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4276 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4277 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4279 // Move low f32 and clear high bits.
4280 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4281 (SUBREG_TO_REG (i32 0),
4282 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4283 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4284 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4285 (SUBREG_TO_REG (i32 0),
4286 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4287 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4289 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4290 (SUBREG_TO_REG (i32 0),
4291 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4292 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4293 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4294 (SUBREG_TO_REG (i32 0),
4295 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4296 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4299 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4300 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4301 let Predicates = [HasAVX512, OptForSpeed] in {
4302 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4303 (SUBREG_TO_REG (i32 0),
4304 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4305 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4306 (i8 1))), sub_xmm)>;
4307 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4308 (SUBREG_TO_REG (i32 0),
4309 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4310 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4311 (i8 3))), sub_xmm)>;
4314 let Predicates = [HasAVX512] in {
4315 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4316 (VMOVSSZrm addr:$src)>;
4317 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4318 (VMOVSDZrm addr:$src)>;
4320 // Represent the same patterns above but in the form they appear for
4322 def : Pat<(v8f32 (X86vzload addr:$src)),
4323 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4324 def : Pat<(v4f64 (X86vzload addr:$src)),
4325 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4327 // Represent the same patterns above but in the form they appear for
4329 def : Pat<(v16f32 (X86vzload addr:$src)),
4330 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4331 def : Pat<(v8f64 (X86vzload addr:$src)),
4332 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4335 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4336 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4338 "vmovq\t{$src, $dst|$dst, $src}",
4339 [(set VR128X:$dst, (v2i64 (X86vzmovl
4340 (v2i64 VR128X:$src))))]>,
4344 let Predicates = [HasAVX512] in {
4345 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4346 (VMOVDI2PDIZrr GR32:$src)>;
4348 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4349 (VMOV64toPQIZrr GR64:$src)>;
4351 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4352 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4353 (VMOVDI2PDIZrm addr:$src)>;
4354 def : Pat<(v4i32 (X86vzload addr:$src)),
4355 (VMOVDI2PDIZrm addr:$src)>;
4356 def : Pat<(v8i32 (X86vzload addr:$src)),
4357 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4358 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4359 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4360 def : Pat<(v2i64 (X86vzload addr:$src)),
4361 (VMOVQI2PQIZrm addr:$src)>;
4362 def : Pat<(v4i64 (X86vzload addr:$src)),
4363 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4365 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4366 def : Pat<(v16i32 (X86vzload addr:$src)),
4367 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4368 def : Pat<(v8i64 (X86vzload addr:$src)),
4369 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4371 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4372 (SUBREG_TO_REG (i32 0),
4373 (v2f64 (VMOVZPQILo2PQIZrr
4374 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4376 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4377 (SUBREG_TO_REG (i32 0),
4378 (v2i64 (VMOVZPQILo2PQIZrr
4379 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4382 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4383 (SUBREG_TO_REG (i32 0),
4384 (v2f64 (VMOVZPQILo2PQIZrr
4385 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4387 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4388 (SUBREG_TO_REG (i32 0),
4389 (v2i64 (VMOVZPQILo2PQIZrr
4390 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4394 //===----------------------------------------------------------------------===//
4395 // AVX-512 - Non-temporals
4396 //===----------------------------------------------------------------------===//
4398 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4399 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4400 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4401 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4403 let Predicates = [HasVLX] in {
4404 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4406 "vmovntdqa\t{$src, $dst|$dst, $src}",
4407 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4408 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4410 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4412 "vmovntdqa\t{$src, $dst|$dst, $src}",
4413 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4414 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4417 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4418 X86SchedWriteMoveLS Sched,
4419 PatFrag st_frag = alignednontemporalstore> {
4420 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4421 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4422 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4423 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4424 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4427 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4428 AVX512VLVectorVTInfo VTInfo,
4429 X86SchedWriteMoveLSWidths Sched> {
4430 let Predicates = [HasAVX512] in
4431 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4433 let Predicates = [HasAVX512, HasVLX] in {
4434 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4435 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4439 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4440 SchedWriteVecMoveLSNT>, PD;
4441 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4442 SchedWriteFMoveLSNT>, PD, VEX_W;
4443 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4444 SchedWriteFMoveLSNT>, PS;
4446 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4447 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4448 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4449 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4450 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4451 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4452 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4454 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4455 (VMOVNTDQAZrm addr:$src)>;
4456 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4457 (VMOVNTDQAZrm addr:$src)>;
4458 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4459 (VMOVNTDQAZrm addr:$src)>;
4460 def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4461 (VMOVNTDQAZrm addr:$src)>;
4462 def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4463 (VMOVNTDQAZrm addr:$src)>;
4464 def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4465 (VMOVNTDQAZrm addr:$src)>;
4468 let Predicates = [HasVLX], AddedComplexity = 400 in {
4469 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4470 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4471 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4472 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4473 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4474 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4476 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4477 (VMOVNTDQAZ256rm addr:$src)>;
4478 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4479 (VMOVNTDQAZ256rm addr:$src)>;
4480 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4481 (VMOVNTDQAZ256rm addr:$src)>;
4482 def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4483 (VMOVNTDQAZ256rm addr:$src)>;
4484 def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4485 (VMOVNTDQAZ256rm addr:$src)>;
4486 def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4487 (VMOVNTDQAZ256rm addr:$src)>;
4489 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4490 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4491 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4492 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4493 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4494 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4496 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4497 (VMOVNTDQAZ128rm addr:$src)>;
4498 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4499 (VMOVNTDQAZ128rm addr:$src)>;
4500 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4501 (VMOVNTDQAZ128rm addr:$src)>;
4502 def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4503 (VMOVNTDQAZ128rm addr:$src)>;
4504 def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4505 (VMOVNTDQAZ128rm addr:$src)>;
4506 def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4507 (VMOVNTDQAZ128rm addr:$src)>;
4510 //===----------------------------------------------------------------------===//
4511 // AVX-512 - Integer arithmetic
4513 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4514 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4515 bit IsCommutable = 0> {
4516 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4517 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4518 "$src2, $src1", "$src1, $src2",
4519 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4520 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4523 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4524 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4525 "$src2, $src1", "$src1, $src2",
4526 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4527 AVX512BIBase, EVEX_4V,
4528 Sched<[sched.Folded, sched.ReadAfterFold]>;
4531 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4532 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4533 bit IsCommutable = 0> :
4534 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4535 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4536 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4537 "${src2}"##_.BroadcastStr##", $src1",
4538 "$src1, ${src2}"##_.BroadcastStr,
4539 (_.VT (OpNode _.RC:$src1,
4541 (_.ScalarLdFrag addr:$src2))))>,
4542 AVX512BIBase, EVEX_4V, EVEX_B,
4543 Sched<[sched.Folded, sched.ReadAfterFold]>;
4546 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4547 AVX512VLVectorVTInfo VTInfo,
4548 X86SchedWriteWidths sched, Predicate prd,
4549 bit IsCommutable = 0> {
4550 let Predicates = [prd] in
4551 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4552 IsCommutable>, EVEX_V512;
4554 let Predicates = [prd, HasVLX] in {
4555 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4556 sched.YMM, IsCommutable>, EVEX_V256;
4557 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4558 sched.XMM, IsCommutable>, EVEX_V128;
4562 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4563 AVX512VLVectorVTInfo VTInfo,
4564 X86SchedWriteWidths sched, Predicate prd,
4565 bit IsCommutable = 0> {
4566 let Predicates = [prd] in
4567 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4568 IsCommutable>, EVEX_V512;
4570 let Predicates = [prd, HasVLX] in {
4571 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4572 sched.YMM, IsCommutable>, EVEX_V256;
4573 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4574 sched.XMM, IsCommutable>, EVEX_V128;
4578 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4579 X86SchedWriteWidths sched, Predicate prd,
4580 bit IsCommutable = 0> {
4581 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4582 sched, prd, IsCommutable>,
4583 VEX_W, EVEX_CD8<64, CD8VF>;
4586 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4587 X86SchedWriteWidths sched, Predicate prd,
4588 bit IsCommutable = 0> {
4589 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4590 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4593 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4594 X86SchedWriteWidths sched, Predicate prd,
4595 bit IsCommutable = 0> {
4596 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4597 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4601 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4602 X86SchedWriteWidths sched, Predicate prd,
4603 bit IsCommutable = 0> {
4604 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4605 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4609 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4610 SDNode OpNode, X86SchedWriteWidths sched,
4611 Predicate prd, bit IsCommutable = 0> {
4612 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4615 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4619 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4620 SDNode OpNode, X86SchedWriteWidths sched,
4621 Predicate prd, bit IsCommutable = 0> {
4622 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4625 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4629 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4630 bits<8> opc_d, bits<8> opc_q,
4631 string OpcodeStr, SDNode OpNode,
4632 X86SchedWriteWidths sched,
4633 bit IsCommutable = 0> {
4634 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4635 sched, HasAVX512, IsCommutable>,
4636 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4637 sched, HasBWI, IsCommutable>;
4640 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4641 X86FoldableSchedWrite sched,
4642 SDNode OpNode,X86VectorVTInfo _Src,
4643 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4644 bit IsCommutable = 0> {
4645 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4646 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4647 "$src2, $src1","$src1, $src2",
4649 (_Src.VT _Src.RC:$src1),
4650 (_Src.VT _Src.RC:$src2))),
4652 AVX512BIBase, EVEX_4V, Sched<[sched]>;
4653 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4654 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4655 "$src2, $src1", "$src1, $src2",
4656 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4657 (_Src.LdFrag addr:$src2)))>,
4658 AVX512BIBase, EVEX_4V,
4659 Sched<[sched.Folded, sched.ReadAfterFold]>;
4661 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4662 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4664 "${src2}"##_Brdct.BroadcastStr##", $src1",
4665 "$src1, ${src2}"##_Brdct.BroadcastStr,
4666 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4667 (_Brdct.VT (X86VBroadcast
4668 (_Brdct.ScalarLdFrag addr:$src2))))))>,
4669 AVX512BIBase, EVEX_4V, EVEX_B,
4670 Sched<[sched.Folded, sched.ReadAfterFold]>;
4673 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4674 SchedWriteVecALU, 1>;
4675 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4676 SchedWriteVecALU, 0>;
4677 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4678 SchedWriteVecALU, HasBWI, 1>;
4679 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4680 SchedWriteVecALU, HasBWI, 0>;
4681 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4682 SchedWriteVecALU, HasBWI, 1>;
4683 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4684 SchedWriteVecALU, HasBWI, 0>;
4685 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4686 SchedWritePMULLD, HasAVX512, 1>, T8PD;
4687 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4688 SchedWriteVecIMul, HasBWI, 1>;
4689 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4690 SchedWriteVecIMul, HasDQI, 1>, T8PD,
4691 NotEVEX2VEXConvertible;
4692 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4694 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4696 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4697 SchedWriteVecIMul, HasBWI, 1>, T8PD;
4698 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4699 SchedWriteVecALU, HasBWI, 1>;
4700 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4701 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4702 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4703 SchedWriteVecIMul, HasAVX512, 1>;
4705 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4706 X86SchedWriteWidths sched,
4707 AVX512VLVectorVTInfo _SrcVTInfo,
4708 AVX512VLVectorVTInfo _DstVTInfo,
4709 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4710 let Predicates = [prd] in
4711 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4712 _SrcVTInfo.info512, _DstVTInfo.info512,
4713 v8i64_info, IsCommutable>,
4714 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4715 let Predicates = [HasVLX, prd] in {
4716 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4717 _SrcVTInfo.info256, _DstVTInfo.info256,
4718 v4i64x_info, IsCommutable>,
4719 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4720 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4721 _SrcVTInfo.info128, _DstVTInfo.info128,
4722 v2i64x_info, IsCommutable>,
4723 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4727 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4728 avx512vl_i8_info, avx512vl_i8_info,
4729 X86multishift, HasVBMI, 0>, T8PD;
4731 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4732 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4733 X86FoldableSchedWrite sched> {
4734 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4735 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4737 "${src2}"##_Src.BroadcastStr##", $src1",
4738 "$src1, ${src2}"##_Src.BroadcastStr,
4739 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4740 (_Src.VT (X86VBroadcast
4741 (_Src.ScalarLdFrag addr:$src2))))))>,
4742 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4743 Sched<[sched.Folded, sched.ReadAfterFold]>;
4746 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4747 SDNode OpNode,X86VectorVTInfo _Src,
4748 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4749 bit IsCommutable = 0> {
4750 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4751 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4752 "$src2, $src1","$src1, $src2",
4754 (_Src.VT _Src.RC:$src1),
4755 (_Src.VT _Src.RC:$src2))),
4756 IsCommutable, IsCommutable>,
4757 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4758 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4759 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4760 "$src2, $src1", "$src1, $src2",
4761 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4762 (_Src.LdFrag addr:$src2)))>,
4763 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4764 Sched<[sched.Folded, sched.ReadAfterFold]>;
4767 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4769 let Predicates = [HasBWI] in
4770 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4771 v32i16_info, SchedWriteShuffle.ZMM>,
4772 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4773 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4774 let Predicates = [HasBWI, HasVLX] in {
4775 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4776 v16i16x_info, SchedWriteShuffle.YMM>,
4777 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4778 v16i16x_info, SchedWriteShuffle.YMM>,
4780 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4781 v8i16x_info, SchedWriteShuffle.XMM>,
4782 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4783 v8i16x_info, SchedWriteShuffle.XMM>,
4787 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4789 let Predicates = [HasBWI] in
4790 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4791 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4792 let Predicates = [HasBWI, HasVLX] in {
4793 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4794 v32i8x_info, SchedWriteShuffle.YMM>,
4796 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4797 v16i8x_info, SchedWriteShuffle.XMM>,
4802 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4803 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4804 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4805 let Predicates = [HasBWI] in
4806 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4807 _Dst.info512, SchedWriteVecIMul.ZMM,
4808 IsCommutable>, EVEX_V512;
4809 let Predicates = [HasBWI, HasVLX] in {
4810 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4811 _Dst.info256, SchedWriteVecIMul.YMM,
4812 IsCommutable>, EVEX_V256;
4813 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4814 _Dst.info128, SchedWriteVecIMul.XMM,
4815 IsCommutable>, EVEX_V128;
4819 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4820 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4821 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4822 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4824 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4825 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4826 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4827 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4829 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4830 SchedWriteVecALU, HasBWI, 1>, T8PD;
4831 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4832 SchedWriteVecALU, HasBWI, 1>;
4833 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4834 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4835 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4836 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4837 NotEVEX2VEXConvertible;
4839 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4840 SchedWriteVecALU, HasBWI, 1>;
4841 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4842 SchedWriteVecALU, HasBWI, 1>, T8PD;
4843 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4844 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4845 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4846 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4847 NotEVEX2VEXConvertible;
4849 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4850 SchedWriteVecALU, HasBWI, 1>, T8PD;
4851 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4852 SchedWriteVecALU, HasBWI, 1>;
4853 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4854 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4855 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4856 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4857 NotEVEX2VEXConvertible;
4859 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4860 SchedWriteVecALU, HasBWI, 1>;
4861 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4862 SchedWriteVecALU, HasBWI, 1>, T8PD;
4863 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4864 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4865 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4866 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4867 NotEVEX2VEXConvertible;
4869 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4870 let Predicates = [HasDQI, NoVLX] in {
4871 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4874 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4875 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4878 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4881 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4882 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4886 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4887 let Predicates = [HasDQI, NoVLX] in {
4888 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4891 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4892 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4895 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4898 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4899 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4903 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4904 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4907 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4908 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4911 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4914 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4915 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4919 let Predicates = [HasAVX512, NoVLX] in {
4920 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4921 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4922 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4923 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4926 //===----------------------------------------------------------------------===//
4927 // AVX-512 Logical Instructions
4928 //===----------------------------------------------------------------------===//
4930 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
4931 SchedWriteVecLogic, HasAVX512, 1>;
4932 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
4933 SchedWriteVecLogic, HasAVX512, 1>;
4934 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
4935 SchedWriteVecLogic, HasAVX512, 1>;
4936 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
4937 SchedWriteVecLogic, HasAVX512>;
4939 let Predicates = [HasVLX] in {
4940 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
4941 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
4942 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
4943 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
4945 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
4946 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
4947 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
4948 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
4950 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
4951 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
4952 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
4953 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
4955 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
4956 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
4957 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
4958 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
4960 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
4961 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
4962 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
4963 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
4965 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
4966 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
4967 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
4968 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
4970 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
4971 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
4972 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
4973 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
4975 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
4976 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
4977 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
4978 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
4980 def : Pat<(and VR128X:$src1,
4981 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4982 (VPANDDZ128rmb VR128X:$src1, addr:$src2)>;
4983 def : Pat<(or VR128X:$src1,
4984 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4985 (VPORDZ128rmb VR128X:$src1, addr:$src2)>;
4986 def : Pat<(xor VR128X:$src1,
4987 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4988 (VPXORDZ128rmb VR128X:$src1, addr:$src2)>;
4989 def : Pat<(X86andnp VR128X:$src1,
4990 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4991 (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>;
4993 def : Pat<(and VR128X:$src1,
4994 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4995 (VPANDQZ128rmb VR128X:$src1, addr:$src2)>;
4996 def : Pat<(or VR128X:$src1,
4997 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4998 (VPORQZ128rmb VR128X:$src1, addr:$src2)>;
4999 def : Pat<(xor VR128X:$src1,
5000 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5001 (VPXORQZ128rmb VR128X:$src1, addr:$src2)>;
5002 def : Pat<(X86andnp VR128X:$src1,
5003 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5004 (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>;
5006 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5007 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5008 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5009 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5011 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5012 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5013 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5014 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5016 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5017 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5018 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5019 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5021 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5022 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5023 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5024 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5026 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5027 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5028 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5029 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5031 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5032 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5033 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5034 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5036 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5037 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5038 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5039 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5041 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5042 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5043 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5044 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5046 def : Pat<(and VR256X:$src1,
5047 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5048 (VPANDDZ256rmb VR256X:$src1, addr:$src2)>;
5049 def : Pat<(or VR256X:$src1,
5050 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5051 (VPORDZ256rmb VR256X:$src1, addr:$src2)>;
5052 def : Pat<(xor VR256X:$src1,
5053 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5054 (VPXORDZ256rmb VR256X:$src1, addr:$src2)>;
5055 def : Pat<(X86andnp VR256X:$src1,
5056 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5057 (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>;
5059 def : Pat<(and VR256X:$src1,
5060 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5061 (VPANDQZ256rmb VR256X:$src1, addr:$src2)>;
5062 def : Pat<(or VR256X:$src1,
5063 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5064 (VPORQZ256rmb VR256X:$src1, addr:$src2)>;
5065 def : Pat<(xor VR256X:$src1,
5066 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5067 (VPXORQZ256rmb VR256X:$src1, addr:$src2)>;
5068 def : Pat<(X86andnp VR256X:$src1,
5069 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5070 (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>;
5073 let Predicates = [HasAVX512] in {
5074 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5075 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5076 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5077 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5079 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5080 (VPORQZrr VR512:$src1, VR512:$src2)>;
5081 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5082 (VPORQZrr VR512:$src1, VR512:$src2)>;
5084 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5085 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5086 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5087 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5089 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5090 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5091 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5092 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5094 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5095 (VPANDQZrm VR512:$src1, addr:$src2)>;
5096 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5097 (VPANDQZrm VR512:$src1, addr:$src2)>;
5099 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5100 (VPORQZrm VR512:$src1, addr:$src2)>;
5101 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5102 (VPORQZrm VR512:$src1, addr:$src2)>;
5104 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5105 (VPXORQZrm VR512:$src1, addr:$src2)>;
5106 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5107 (VPXORQZrm VR512:$src1, addr:$src2)>;
5109 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5110 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5111 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5112 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5114 def : Pat<(and VR512:$src1,
5115 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5116 (VPANDDZrmb VR512:$src1, addr:$src2)>;
5117 def : Pat<(or VR512:$src1,
5118 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5119 (VPORDZrmb VR512:$src1, addr:$src2)>;
5120 def : Pat<(xor VR512:$src1,
5121 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5122 (VPXORDZrmb VR512:$src1, addr:$src2)>;
5123 def : Pat<(X86andnp VR512:$src1,
5124 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5125 (VPANDNDZrmb VR512:$src1, addr:$src2)>;
5127 def : Pat<(and VR512:$src1,
5128 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5129 (VPANDQZrmb VR512:$src1, addr:$src2)>;
5130 def : Pat<(or VR512:$src1,
5131 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5132 (VPORQZrmb VR512:$src1, addr:$src2)>;
5133 def : Pat<(xor VR512:$src1,
5134 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5135 (VPXORQZrmb VR512:$src1, addr:$src2)>;
5136 def : Pat<(X86andnp VR512:$src1,
5137 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5138 (VPANDNQZrmb VR512:$src1, addr:$src2)>;
5141 // Patterns to catch vselect with different type than logic op.
5142 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5144 X86VectorVTInfo IntInfo> {
5145 // Masked register-register logical operations.
5146 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5147 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5149 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5150 _.RC:$src1, _.RC:$src2)>;
5152 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5153 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5155 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5158 // Masked register-memory logical operations.
5159 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5160 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5161 (load addr:$src2)))),
5163 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5164 _.RC:$src1, addr:$src2)>;
5165 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5166 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5167 (load addr:$src2)))),
5169 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5173 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5175 X86VectorVTInfo IntInfo> {
5176 // Register-broadcast logical operations.
5177 def : Pat<(IntInfo.VT (OpNode _.RC:$src1,
5178 (bitconvert (_.VT (X86VBroadcast
5179 (_.ScalarLdFrag addr:$src2)))))),
5180 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5181 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5183 (IntInfo.VT (OpNode _.RC:$src1,
5186 (_.ScalarLdFrag addr:$src2))))))),
5188 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5189 _.RC:$src1, addr:$src2)>;
5190 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5192 (IntInfo.VT (OpNode _.RC:$src1,
5195 (_.ScalarLdFrag addr:$src2))))))),
5197 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5198 _.RC:$src1, addr:$src2)>;
5201 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5202 AVX512VLVectorVTInfo SelectInfo,
5203 AVX512VLVectorVTInfo IntInfo> {
5204 let Predicates = [HasVLX] in {
5205 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5207 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5210 let Predicates = [HasAVX512] in {
5211 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5216 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5217 AVX512VLVectorVTInfo SelectInfo,
5218 AVX512VLVectorVTInfo IntInfo> {
5219 let Predicates = [HasVLX] in {
5220 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5221 SelectInfo.info128, IntInfo.info128>;
5222 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5223 SelectInfo.info256, IntInfo.info256>;
5225 let Predicates = [HasAVX512] in {
5226 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5227 SelectInfo.info512, IntInfo.info512>;
5231 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5232 // i64 vselect with i32/i16/i8 logic op
5233 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5235 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5237 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5240 // i32 vselect with i64/i16/i8 logic op
5241 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5243 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5245 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5248 // f32 vselect with i64/i32/i16/i8 logic op
5249 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5251 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5253 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5255 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5258 // f64 vselect with i64/i32/i16/i8 logic op
5259 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5261 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5263 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5265 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5268 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5271 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5276 defm : avx512_logical_lowering_types<"VPAND", and>;
5277 defm : avx512_logical_lowering_types<"VPOR", or>;
5278 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5279 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5281 //===----------------------------------------------------------------------===//
5282 // AVX-512 FP arithmetic
5283 //===----------------------------------------------------------------------===//
5285 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5286 SDNode OpNode, SDNode VecNode,
5287 X86FoldableSchedWrite sched, bit IsCommutable> {
5288 let ExeDomain = _.ExeDomain in {
5289 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5290 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5291 "$src2, $src1", "$src1, $src2",
5292 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5295 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5296 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5297 "$src2, $src1", "$src1, $src2",
5298 (_.VT (VecNode _.RC:$src1,
5299 _.ScalarIntMemCPat:$src2))>,
5300 Sched<[sched.Folded, sched.ReadAfterFold]>;
5301 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5302 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5303 (ins _.FRC:$src1, _.FRC:$src2),
5304 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5305 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5307 let isCommutable = IsCommutable;
5309 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5310 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5311 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5312 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5313 (_.ScalarLdFrag addr:$src2)))]>,
5314 Sched<[sched.Folded, sched.ReadAfterFold]>;
5319 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5320 SDNode VecNode, X86FoldableSchedWrite sched,
5321 bit IsCommutable = 0> {
5322 let ExeDomain = _.ExeDomain in
5323 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5324 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5325 "$rc, $src2, $src1", "$src1, $src2, $rc",
5326 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5328 EVEX_B, EVEX_RC, Sched<[sched]>;
5330 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5331 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5332 X86FoldableSchedWrite sched, bit IsCommutable> {
5333 let ExeDomain = _.ExeDomain in {
5334 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5335 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5336 "$src2, $src1", "$src1, $src2",
5337 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5340 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5341 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5342 "$src2, $src1", "$src1, $src2",
5343 (_.VT (VecNode _.RC:$src1,
5344 _.ScalarIntMemCPat:$src2))>,
5345 Sched<[sched.Folded, sched.ReadAfterFold]>;
5347 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5348 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5349 (ins _.FRC:$src1, _.FRC:$src2),
5350 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5351 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5353 let isCommutable = IsCommutable;
5355 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5356 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5357 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5358 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5359 (_.ScalarLdFrag addr:$src2)))]>,
5360 Sched<[sched.Folded, sched.ReadAfterFold]>;
5363 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5364 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5365 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5366 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5367 EVEX_B, Sched<[sched]>;
5371 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5372 SDNode VecNode, SDNode RndNode,
5373 X86SchedWriteSizes sched, bit IsCommutable> {
5374 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5375 sched.PS.Scl, IsCommutable>,
5376 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5377 sched.PS.Scl, IsCommutable>,
5378 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5379 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5380 sched.PD.Scl, IsCommutable>,
5381 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5382 sched.PD.Scl, IsCommutable>,
5383 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5386 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5387 SDNode VecNode, SDNode SaeNode,
5388 X86SchedWriteSizes sched, bit IsCommutable> {
5389 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5390 VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5391 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5392 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5393 VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5394 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5396 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
5397 SchedWriteFAddSizes, 1>;
5398 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
5399 SchedWriteFMulSizes, 1>;
5400 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
5401 SchedWriteFAddSizes, 0>;
5402 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
5403 SchedWriteFDivSizes, 0>;
5404 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5405 SchedWriteFCmpSizes, 0>;
5406 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5407 SchedWriteFCmpSizes, 0>;
5409 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5410 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5411 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5412 X86VectorVTInfo _, SDNode OpNode,
5413 X86FoldableSchedWrite sched> {
5414 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5415 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5416 (ins _.FRC:$src1, _.FRC:$src2),
5417 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5418 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5420 let isCommutable = 1;
5422 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5423 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5424 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5425 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5426 (_.ScalarLdFrag addr:$src2)))]>,
5427 Sched<[sched.Folded, sched.ReadAfterFold]>;
5430 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5431 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5432 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5434 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5435 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5436 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5438 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5439 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5440 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5442 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5443 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5444 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5446 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5447 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5449 bit IsKCommutable = IsCommutable> {
5450 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5451 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5452 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5453 "$src2, $src1", "$src1, $src2",
5454 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5455 IsKCommutable, IsKCommutable>,
5456 EVEX_4V, Sched<[sched]>;
5457 let mayLoad = 1 in {
5458 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5459 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5460 "$src2, $src1", "$src1, $src2",
5461 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5462 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5463 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5464 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5465 "${src2}"##_.BroadcastStr##", $src1",
5466 "$src1, ${src2}"##_.BroadcastStr,
5467 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5468 (_.ScalarLdFrag addr:$src2))))>,
5470 Sched<[sched.Folded, sched.ReadAfterFold]>;
5475 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5476 SDPatternOperator OpNodeRnd,
5477 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5478 let ExeDomain = _.ExeDomain in
5479 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5480 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5481 "$rc, $src2, $src1", "$src1, $src2, $rc",
5482 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5483 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5486 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5487 SDPatternOperator OpNodeSAE,
5488 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5489 let ExeDomain = _.ExeDomain in
5490 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5491 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5492 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5493 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5494 EVEX_4V, EVEX_B, Sched<[sched]>;
5497 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5498 Predicate prd, X86SchedWriteSizes sched,
5499 bit IsCommutable = 0,
5500 bit IsPD128Commutable = IsCommutable> {
5501 let Predicates = [prd] in {
5502 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5503 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5504 EVEX_CD8<32, CD8VF>;
5505 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5506 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5507 EVEX_CD8<64, CD8VF>;
5510 // Define only if AVX512VL feature is present.
5511 let Predicates = [prd, HasVLX] in {
5512 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5513 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5514 EVEX_CD8<32, CD8VF>;
5515 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5516 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5517 EVEX_CD8<32, CD8VF>;
5518 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5519 sched.PD.XMM, IsPD128Commutable,
5520 IsCommutable>, EVEX_V128, PD, VEX_W,
5521 EVEX_CD8<64, CD8VF>;
5522 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5523 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5524 EVEX_CD8<64, CD8VF>;
5528 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5529 X86SchedWriteSizes sched> {
5530 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5532 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5533 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5535 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5538 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5539 X86SchedWriteSizes sched> {
5540 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5542 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5543 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5545 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5548 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5549 SchedWriteFAddSizes, 1>,
5550 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5551 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5552 SchedWriteFMulSizes, 1>,
5553 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5554 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5555 SchedWriteFAddSizes>,
5556 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5557 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5558 SchedWriteFDivSizes>,
5559 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5560 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5561 SchedWriteFCmpSizes, 0>,
5562 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5563 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5564 SchedWriteFCmpSizes, 0>,
5565 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5566 let isCodeGenOnly = 1 in {
5567 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5568 SchedWriteFCmpSizes, 1>;
5569 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5570 SchedWriteFCmpSizes, 1>;
5572 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5573 SchedWriteFLogicSizes, 1>;
5574 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5575 SchedWriteFLogicSizes, 0>;
5576 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5577 SchedWriteFLogicSizes, 1>;
5578 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5579 SchedWriteFLogicSizes, 1>;
5581 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5582 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5583 let ExeDomain = _.ExeDomain in {
5584 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5585 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5586 "$src2, $src1", "$src1, $src2",
5587 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5588 EVEX_4V, Sched<[sched]>;
5589 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5590 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5591 "$src2, $src1", "$src1, $src2",
5592 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5593 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5594 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5595 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5596 "${src2}"##_.BroadcastStr##", $src1",
5597 "$src1, ${src2}"##_.BroadcastStr,
5598 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5599 (_.ScalarLdFrag addr:$src2))))>,
5600 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5604 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5605 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5606 let ExeDomain = _.ExeDomain in {
5607 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5608 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5609 "$src2, $src1", "$src1, $src2",
5610 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5612 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5613 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5614 "$src2, $src1", "$src1, $src2",
5615 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
5616 Sched<[sched.Folded, sched.ReadAfterFold]>;
5620 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5621 X86SchedWriteWidths sched> {
5622 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5623 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5624 EVEX_V512, EVEX_CD8<32, CD8VF>;
5625 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5626 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5627 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5628 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5629 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
5630 X86scalefsRnd, sched.Scl>,
5631 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5632 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5633 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
5634 X86scalefsRnd, sched.Scl>,
5635 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5637 // Define only if AVX512VL feature is present.
5638 let Predicates = [HasVLX] in {
5639 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5640 EVEX_V128, EVEX_CD8<32, CD8VF>;
5641 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5642 EVEX_V256, EVEX_CD8<32, CD8VF>;
5643 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5644 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5645 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5646 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5649 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5650 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5652 //===----------------------------------------------------------------------===//
5653 // AVX-512 VPTESTM instructions
5654 //===----------------------------------------------------------------------===//
5656 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5657 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5659 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5660 // There are just too many permuations due to commutability and bitcasts.
5661 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5662 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5663 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5664 "$src2, $src1", "$src1, $src2",
5665 (null_frag), (null_frag), 1>,
5666 EVEX_4V, Sched<[sched]>;
5668 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5669 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5670 "$src2, $src1", "$src1, $src2",
5671 (null_frag), (null_frag)>,
5672 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5673 Sched<[sched.Folded, sched.ReadAfterFold]>;
5677 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5678 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5679 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5680 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5681 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5682 "${src2}"##_.BroadcastStr##", $src1",
5683 "$src1, ${src2}"##_.BroadcastStr,
5684 (null_frag), (null_frag)>,
5685 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5686 Sched<[sched.Folded, sched.ReadAfterFold]>;
5689 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5690 X86SchedWriteWidths sched,
5691 AVX512VLVectorVTInfo _> {
5692 let Predicates = [HasAVX512] in
5693 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5694 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5696 let Predicates = [HasAVX512, HasVLX] in {
5697 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5698 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5699 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5700 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5704 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5705 X86SchedWriteWidths sched> {
5706 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5708 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5709 avx512vl_i64_info>, VEX_W;
5712 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5713 X86SchedWriteWidths sched> {
5714 let Predicates = [HasBWI] in {
5715 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5716 v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5717 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5718 v64i8_info, NAME#"B">, EVEX_V512;
5720 let Predicates = [HasVLX, HasBWI] in {
5722 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5723 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5724 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5725 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5726 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5727 v32i8x_info, NAME#"B">, EVEX_V256;
5728 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5729 v16i8x_info, NAME#"B">, EVEX_V128;
5733 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5734 X86SchedWriteWidths sched> :
5735 avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5736 avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5738 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5739 SchedWriteVecLogic>, T8PD;
5740 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5741 SchedWriteVecLogic>, T8XS;
5743 //===----------------------------------------------------------------------===//
5744 // AVX-512 Shift instructions
5745 //===----------------------------------------------------------------------===//
5747 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5748 string OpcodeStr, SDNode OpNode,
5749 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5750 let ExeDomain = _.ExeDomain in {
5751 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5752 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5753 "$src2, $src1", "$src1, $src2",
5754 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
5756 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5757 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5758 "$src2, $src1", "$src1, $src2",
5759 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5761 Sched<[sched.Folded]>;
5765 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5766 string OpcodeStr, SDNode OpNode,
5767 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5768 let ExeDomain = _.ExeDomain in
5769 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5770 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5771 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5772 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
5773 EVEX_B, Sched<[sched.Folded]>;
5776 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5777 X86FoldableSchedWrite sched, ValueType SrcVT,
5778 X86VectorVTInfo _> {
5779 // src2 is always 128-bit
5780 let ExeDomain = _.ExeDomain in {
5781 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5782 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5783 "$src2, $src1", "$src1, $src2",
5784 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5785 AVX512BIBase, EVEX_4V, Sched<[sched]>;
5786 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5787 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5788 "$src2, $src1", "$src1, $src2",
5789 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5791 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5795 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5796 X86SchedWriteWidths sched, ValueType SrcVT,
5797 AVX512VLVectorVTInfo VTInfo,
5799 let Predicates = [prd] in
5800 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5801 VTInfo.info512>, EVEX_V512,
5802 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5803 let Predicates = [prd, HasVLX] in {
5804 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5805 VTInfo.info256>, EVEX_V256,
5806 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5807 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5808 VTInfo.info128>, EVEX_V128,
5809 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5813 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5814 string OpcodeStr, SDNode OpNode,
5815 X86SchedWriteWidths sched,
5816 bit NotEVEX2VEXConvertibleQ = 0> {
5817 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5818 avx512vl_i32_info, HasAVX512>;
5819 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5820 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5821 avx512vl_i64_info, HasAVX512>, VEX_W;
5822 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5823 avx512vl_i16_info, HasBWI>;
5826 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5827 string OpcodeStr, SDNode OpNode,
5828 X86SchedWriteWidths sched,
5829 AVX512VLVectorVTInfo VTInfo> {
5830 let Predicates = [HasAVX512] in
5831 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5832 sched.ZMM, VTInfo.info512>,
5833 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5834 VTInfo.info512>, EVEX_V512;
5835 let Predicates = [HasAVX512, HasVLX] in {
5836 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5837 sched.YMM, VTInfo.info256>,
5838 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5839 VTInfo.info256>, EVEX_V256;
5840 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5841 sched.XMM, VTInfo.info128>,
5842 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5843 VTInfo.info128>, EVEX_V128;
5847 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5848 string OpcodeStr, SDNode OpNode,
5849 X86SchedWriteWidths sched> {
5850 let Predicates = [HasBWI] in
5851 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5852 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5853 let Predicates = [HasVLX, HasBWI] in {
5854 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5855 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5856 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5857 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5861 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5862 Format ImmFormR, Format ImmFormM,
5863 string OpcodeStr, SDNode OpNode,
5864 X86SchedWriteWidths sched,
5865 bit NotEVEX2VEXConvertibleQ = 0> {
5866 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5867 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5868 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5869 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5870 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5873 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5874 SchedWriteVecShiftImm>,
5875 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5876 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5878 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5879 SchedWriteVecShiftImm>,
5880 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5881 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5883 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5884 SchedWriteVecShiftImm, 1>,
5885 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5886 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5888 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5889 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5890 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5891 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5893 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5894 SchedWriteVecShift>;
5895 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5896 SchedWriteVecShift, 1>;
5897 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5898 SchedWriteVecShift>;
5900 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5901 let Predicates = [HasAVX512, NoVLX] in {
5902 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5903 (EXTRACT_SUBREG (v8i64
5905 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5906 VR128X:$src2)), sub_ymm)>;
5908 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5909 (EXTRACT_SUBREG (v8i64
5911 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5912 VR128X:$src2)), sub_xmm)>;
5914 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5915 (EXTRACT_SUBREG (v8i64
5917 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5918 imm:$src2)), sub_ymm)>;
5920 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5921 (EXTRACT_SUBREG (v8i64
5923 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5924 imm:$src2)), sub_xmm)>;
5927 //===-------------------------------------------------------------------===//
5928 // Variable Bit Shifts
5929 //===-------------------------------------------------------------------===//
5931 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5932 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5933 let ExeDomain = _.ExeDomain in {
5934 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5935 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5936 "$src2, $src1", "$src1, $src2",
5937 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5938 AVX5128IBase, EVEX_4V, Sched<[sched]>;
5939 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5940 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5941 "$src2, $src1", "$src1, $src2",
5942 (_.VT (OpNode _.RC:$src1,
5943 (_.VT (_.LdFrag addr:$src2))))>,
5944 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5945 Sched<[sched.Folded, sched.ReadAfterFold]>;
5949 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5950 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5951 let ExeDomain = _.ExeDomain in
5952 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5953 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5954 "${src2}"##_.BroadcastStr##", $src1",
5955 "$src1, ${src2}"##_.BroadcastStr,
5956 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5957 (_.ScalarLdFrag addr:$src2)))))>,
5958 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5959 Sched<[sched.Folded, sched.ReadAfterFold]>;
5962 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5963 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5964 let Predicates = [HasAVX512] in
5965 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5966 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5968 let Predicates = [HasAVX512, HasVLX] in {
5969 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5970 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5971 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5972 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5976 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5977 SDNode OpNode, X86SchedWriteWidths sched> {
5978 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5980 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5981 avx512vl_i64_info>, VEX_W;
5984 // Use 512bit version to implement 128/256 bit in case NoVLX.
5985 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5986 SDNode OpNode, list<Predicate> p> {
5987 let Predicates = p in {
5988 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5989 (_.info256.VT _.info256.RC:$src2))),
5991 (!cast<Instruction>(OpcodeStr#"Zrr")
5992 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5993 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5996 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5997 (_.info128.VT _.info128.RC:$src2))),
5999 (!cast<Instruction>(OpcodeStr#"Zrr")
6000 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6001 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6005 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6006 SDNode OpNode, X86SchedWriteWidths sched> {
6007 let Predicates = [HasBWI] in
6008 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6010 let Predicates = [HasVLX, HasBWI] in {
6012 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6014 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6019 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6020 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6022 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6023 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6025 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6026 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6028 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6029 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6031 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6032 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6033 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6034 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6037 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6038 let Predicates = [HasAVX512, NoVLX] in {
6039 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6040 (EXTRACT_SUBREG (v8i64
6042 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6043 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6045 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6046 (EXTRACT_SUBREG (v8i64
6048 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6049 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6052 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6053 (EXTRACT_SUBREG (v16i32
6055 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6056 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6058 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6059 (EXTRACT_SUBREG (v16i32
6061 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6062 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6065 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6066 (EXTRACT_SUBREG (v8i64
6068 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6069 imm:$src2)), sub_xmm)>;
6070 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6071 (EXTRACT_SUBREG (v8i64
6073 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6074 imm:$src2)), sub_ymm)>;
6076 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6077 (EXTRACT_SUBREG (v16i32
6079 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6080 imm:$src2)), sub_xmm)>;
6081 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6082 (EXTRACT_SUBREG (v16i32
6084 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6085 imm:$src2)), sub_ymm)>;
6088 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6089 let Predicates = [HasAVX512, NoVLX] in {
6090 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6091 (EXTRACT_SUBREG (v8i64
6093 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6094 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6096 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6097 (EXTRACT_SUBREG (v8i64
6099 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6100 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6103 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6104 (EXTRACT_SUBREG (v16i32
6106 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6107 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6109 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6110 (EXTRACT_SUBREG (v16i32
6112 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6113 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6116 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6117 (EXTRACT_SUBREG (v8i64
6119 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6120 imm:$src2)), sub_xmm)>;
6121 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6122 (EXTRACT_SUBREG (v8i64
6124 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6125 imm:$src2)), sub_ymm)>;
6127 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6128 (EXTRACT_SUBREG (v16i32
6130 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6131 imm:$src2)), sub_xmm)>;
6132 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6133 (EXTRACT_SUBREG (v16i32
6135 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6136 imm:$src2)), sub_ymm)>;
6139 //===-------------------------------------------------------------------===//
6140 // 1-src variable permutation VPERMW/D/Q
6141 //===-------------------------------------------------------------------===//
6143 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6144 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6145 let Predicates = [HasAVX512] in
6146 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6147 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6149 let Predicates = [HasAVX512, HasVLX] in
6150 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6151 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6154 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6155 string OpcodeStr, SDNode OpNode,
6156 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6157 let Predicates = [HasAVX512] in
6158 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6159 sched, VTInfo.info512>,
6160 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6161 sched, VTInfo.info512>, EVEX_V512;
6162 let Predicates = [HasAVX512, HasVLX] in
6163 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6164 sched, VTInfo.info256>,
6165 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6166 sched, VTInfo.info256>, EVEX_V256;
6169 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6170 Predicate prd, SDNode OpNode,
6171 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6172 let Predicates = [prd] in
6173 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6175 let Predicates = [HasVLX, prd] in {
6176 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6178 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6183 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6184 WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6185 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6186 WriteVarShuffle256, avx512vl_i8_info>;
6188 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6189 WriteVarShuffle256, avx512vl_i32_info>;
6190 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6191 WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6192 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6193 WriteFVarShuffle256, avx512vl_f32_info>;
6194 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6195 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6197 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6198 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6199 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6200 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6201 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6202 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6204 //===----------------------------------------------------------------------===//
6205 // AVX-512 - VPERMIL
6206 //===----------------------------------------------------------------------===//
6208 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6209 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6210 X86VectorVTInfo Ctrl> {
6211 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6212 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6213 "$src2, $src1", "$src1, $src2",
6214 (_.VT (OpNode _.RC:$src1,
6215 (Ctrl.VT Ctrl.RC:$src2)))>,
6216 T8PD, EVEX_4V, Sched<[sched]>;
6217 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6218 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6219 "$src2, $src1", "$src1, $src2",
6222 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6223 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6224 Sched<[sched.Folded, sched.ReadAfterFold]>;
6225 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6226 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6227 "${src2}"##_.BroadcastStr##", $src1",
6228 "$src1, ${src2}"##_.BroadcastStr,
6231 (Ctrl.VT (X86VBroadcast
6232 (Ctrl.ScalarLdFrag addr:$src2)))))>,
6233 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6234 Sched<[sched.Folded, sched.ReadAfterFold]>;
6237 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6238 X86SchedWriteWidths sched,
6239 AVX512VLVectorVTInfo _,
6240 AVX512VLVectorVTInfo Ctrl> {
6241 let Predicates = [HasAVX512] in {
6242 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6243 _.info512, Ctrl.info512>, EVEX_V512;
6245 let Predicates = [HasAVX512, HasVLX] in {
6246 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6247 _.info128, Ctrl.info128>, EVEX_V128;
6248 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6249 _.info256, Ctrl.info256>, EVEX_V256;
6253 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6254 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6255 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6257 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6258 X86VPermilpi, SchedWriteFShuffle, _>,
6259 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6262 let ExeDomain = SSEPackedSingle in
6263 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6265 let ExeDomain = SSEPackedDouble in
6266 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6267 avx512vl_i64_info>, VEX_W1X;
6269 //===----------------------------------------------------------------------===//
6270 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6271 //===----------------------------------------------------------------------===//
6273 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6274 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6275 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6276 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6277 X86PShufhw, SchedWriteShuffle>,
6278 EVEX, AVX512XSIi8Base;
6279 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6280 X86PShuflw, SchedWriteShuffle>,
6281 EVEX, AVX512XDIi8Base;
6283 //===----------------------------------------------------------------------===//
6284 // AVX-512 - VPSHUFB
6285 //===----------------------------------------------------------------------===//
6287 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6288 X86SchedWriteWidths sched> {
6289 let Predicates = [HasBWI] in
6290 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6293 let Predicates = [HasVLX, HasBWI] in {
6294 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6296 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6301 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6302 SchedWriteVarShuffle>, VEX_WIG;
6304 //===----------------------------------------------------------------------===//
6305 // Move Low to High and High to Low packed FP Instructions
6306 //===----------------------------------------------------------------------===//
6308 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6309 (ins VR128X:$src1, VR128X:$src2),
6310 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6311 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6312 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6313 let isCommutable = 1 in
6314 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6315 (ins VR128X:$src1, VR128X:$src2),
6316 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6317 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6318 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6320 //===----------------------------------------------------------------------===//
6321 // VMOVHPS/PD VMOVLPS Instructions
6322 // All patterns was taken from SSS implementation.
6323 //===----------------------------------------------------------------------===//
6325 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6326 SDPatternOperator OpNode,
6327 X86VectorVTInfo _> {
6328 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6329 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6330 (ins _.RC:$src1, f64mem:$src2),
6331 !strconcat(OpcodeStr,
6332 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6336 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6337 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6340 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6341 // SSE1. And MOVLPS pattern is even more complex.
6342 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6343 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6344 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6345 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6346 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6347 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6348 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6349 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6351 let Predicates = [HasAVX512] in {
6353 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6354 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6355 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6356 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload addr:$src2))),
6357 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6360 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload addr:$src2))),
6361 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6364 let SchedRW = [WriteFStore] in {
6365 let mayStore = 1, hasSideEffects = 0 in
6366 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6367 (ins f64mem:$dst, VR128X:$src),
6368 "vmovhps\t{$src, $dst|$dst, $src}",
6369 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6370 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6371 (ins f64mem:$dst, VR128X:$src),
6372 "vmovhpd\t{$src, $dst|$dst, $src}",
6373 [(store (f64 (extractelt
6374 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6375 (iPTR 0))), addr:$dst)]>,
6376 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6377 let mayStore = 1, hasSideEffects = 0 in
6378 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6379 (ins f64mem:$dst, VR128X:$src),
6380 "vmovlps\t{$src, $dst|$dst, $src}",
6381 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6382 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6383 (ins f64mem:$dst, VR128X:$src),
6384 "vmovlpd\t{$src, $dst|$dst, $src}",
6385 [(store (f64 (extractelt (v2f64 VR128X:$src),
6386 (iPTR 0))), addr:$dst)]>,
6387 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6390 let Predicates = [HasAVX512] in {
6392 def : Pat<(store (f64 (extractelt
6393 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6394 (iPTR 0))), addr:$dst),
6395 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6397 //===----------------------------------------------------------------------===//
6398 // FMA - Fused Multiply Operations
6401 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6402 X86FoldableSchedWrite sched,
6403 X86VectorVTInfo _, string Suff> {
6404 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6405 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6406 (ins _.RC:$src2, _.RC:$src3),
6407 OpcodeStr, "$src3, $src2", "$src2, $src3",
6408 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6409 AVX512FMA3Base, Sched<[sched]>;
6411 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6412 (ins _.RC:$src2, _.MemOp:$src3),
6413 OpcodeStr, "$src3, $src2", "$src2, $src3",
6414 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6415 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6417 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6418 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6419 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6420 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6422 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6423 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6427 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6428 X86FoldableSchedWrite sched,
6429 X86VectorVTInfo _, string Suff> {
6430 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6431 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6432 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6433 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6434 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6435 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6438 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6439 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6440 AVX512VLVectorVTInfo _, string Suff> {
6441 let Predicates = [HasAVX512] in {
6442 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6444 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6446 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6448 let Predicates = [HasVLX, HasAVX512] in {
6449 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6451 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6452 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6454 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6458 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6460 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6461 SchedWriteFMA, avx512vl_f32_info, "PS">;
6462 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6463 SchedWriteFMA, avx512vl_f64_info, "PD">,
6467 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6468 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6469 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6470 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6471 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6472 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6475 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6476 X86FoldableSchedWrite sched,
6477 X86VectorVTInfo _, string Suff> {
6478 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6479 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6480 (ins _.RC:$src2, _.RC:$src3),
6481 OpcodeStr, "$src3, $src2", "$src2, $src3",
6482 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6483 vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6485 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6486 (ins _.RC:$src2, _.MemOp:$src3),
6487 OpcodeStr, "$src3, $src2", "$src2, $src3",
6488 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6489 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6491 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6492 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6493 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6494 "$src2, ${src3}"##_.BroadcastStr,
6495 (_.VT (OpNode _.RC:$src2,
6496 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6497 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6498 Sched<[sched.Folded, sched.ReadAfterFold]>;
6502 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6503 X86FoldableSchedWrite sched,
6504 X86VectorVTInfo _, string Suff> {
6505 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6506 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6507 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6508 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6509 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6511 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6514 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6515 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6516 AVX512VLVectorVTInfo _, string Suff> {
6517 let Predicates = [HasAVX512] in {
6518 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6520 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6522 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6524 let Predicates = [HasVLX, HasAVX512] in {
6525 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6527 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6528 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6530 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6534 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6535 SDNode OpNodeRnd > {
6536 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6537 SchedWriteFMA, avx512vl_f32_info, "PS">;
6538 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6539 SchedWriteFMA, avx512vl_f64_info, "PD">,
6543 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6544 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6545 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6546 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6547 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6548 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6550 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6551 X86FoldableSchedWrite sched,
6552 X86VectorVTInfo _, string Suff> {
6553 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6554 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6555 (ins _.RC:$src2, _.RC:$src3),
6556 OpcodeStr, "$src3, $src2", "$src2, $src3",
6557 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6558 AVX512FMA3Base, Sched<[sched]>;
6560 // Pattern is 312 order so that the load is in a different place from the
6561 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6562 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6563 (ins _.RC:$src2, _.MemOp:$src3),
6564 OpcodeStr, "$src3, $src2", "$src2, $src3",
6565 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6566 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6568 // Pattern is 312 order so that the load is in a different place from the
6569 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6570 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6571 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6572 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6573 "$src2, ${src3}"##_.BroadcastStr,
6574 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6575 _.RC:$src1, _.RC:$src2)), 1, 0>,
6576 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6580 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6581 X86FoldableSchedWrite sched,
6582 X86VectorVTInfo _, string Suff> {
6583 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6584 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6585 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6586 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6587 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6589 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6592 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6593 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6594 AVX512VLVectorVTInfo _, string Suff> {
6595 let Predicates = [HasAVX512] in {
6596 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6598 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6600 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6602 let Predicates = [HasVLX, HasAVX512] in {
6603 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6605 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6606 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6608 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6612 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6613 SDNode OpNodeRnd > {
6614 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6615 SchedWriteFMA, avx512vl_f32_info, "PS">;
6616 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6617 SchedWriteFMA, avx512vl_f64_info, "PD">,
6621 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6622 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6623 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6624 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6625 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6626 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6629 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6630 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6631 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6632 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6633 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6634 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6635 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6638 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6639 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6640 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6641 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6643 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6644 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6645 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6646 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6648 let isCodeGenOnly = 1, isCommutable = 1 in {
6649 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6650 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6651 !strconcat(OpcodeStr,
6652 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6653 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6654 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6655 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6656 !strconcat(OpcodeStr,
6657 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6658 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6660 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6661 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6662 !strconcat(OpcodeStr,
6663 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6664 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6665 Sched<[SchedWriteFMA.Scl]>;
6666 }// isCodeGenOnly = 1
6667 }// Constraints = "$src1 = $dst"
6670 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6671 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6672 X86VectorVTInfo _, string SUFF> {
6673 let ExeDomain = _.ExeDomain in {
6674 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6675 // Operands for intrinsic are in 123 order to preserve passthu
6677 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6679 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6680 (_.ScalarLdFrag addr:$src3)))),
6681 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6682 _.FRC:$src3, (i32 timm:$rc)))), 0>;
6684 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6685 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6687 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6688 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6689 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6690 _.FRC:$src1, (i32 timm:$rc)))), 1>;
6692 // One pattern is 312 order so that the load is in a different place from the
6693 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6694 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6695 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6697 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6698 _.FRC:$src1, _.FRC:$src2))),
6699 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6700 _.FRC:$src2, (i32 timm:$rc)))), 1>;
6704 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6705 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6706 let Predicates = [HasAVX512] in {
6707 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6708 OpNodeRnd, f32x_info, "SS">,
6709 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6710 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6711 OpNodeRnd, f64x_info, "SD">,
6712 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6716 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6717 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6718 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6719 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6721 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6722 string Suffix, SDNode Move,
6723 X86VectorVTInfo _, PatLeaf ZeroFP> {
6724 let Predicates = [HasAVX512] in {
6725 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6727 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6729 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6730 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6731 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6733 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6734 (Op _.FRC:$src2, _.FRC:$src3,
6735 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6736 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6737 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6738 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6740 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6742 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6743 (_.ScalarLdFrag addr:$src3)))))),
6744 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6745 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6748 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6749 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6750 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6751 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6752 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6755 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6756 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6757 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6758 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6759 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6762 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6763 (X86selects VK1WM:$mask,
6765 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6767 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6768 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6769 VR128X:$src1, VK1WM:$mask,
6770 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6771 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6773 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6774 (X86selects VK1WM:$mask,
6776 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6777 (_.ScalarLdFrag addr:$src3)),
6778 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6779 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6780 VR128X:$src1, VK1WM:$mask,
6781 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6783 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6784 (X86selects VK1WM:$mask,
6785 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6786 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6787 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6788 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6789 VR128X:$src1, VK1WM:$mask,
6790 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6792 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6793 (X86selects VK1WM:$mask,
6794 (Op _.FRC:$src2, _.FRC:$src3,
6795 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6796 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6797 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6798 VR128X:$src1, VK1WM:$mask,
6799 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6800 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6802 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6803 (X86selects VK1WM:$mask,
6804 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6805 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6806 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6807 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6808 VR128X:$src1, VK1WM:$mask,
6809 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6811 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6812 (X86selects VK1WM:$mask,
6814 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6816 (_.EltVT ZeroFP)))))),
6817 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6818 VR128X:$src1, VK1WM:$mask,
6819 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6820 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6822 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6823 (X86selects VK1WM:$mask,
6824 (Op _.FRC:$src2, _.FRC:$src3,
6825 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6826 (_.EltVT ZeroFP)))))),
6827 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6828 VR128X:$src1, VK1WM:$mask,
6829 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6830 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6832 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6833 (X86selects VK1WM:$mask,
6835 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6836 (_.ScalarLdFrag addr:$src3)),
6837 (_.EltVT ZeroFP)))))),
6838 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6839 VR128X:$src1, VK1WM:$mask,
6840 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6842 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6843 (X86selects VK1WM:$mask,
6844 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6845 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6846 (_.EltVT ZeroFP)))))),
6847 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6848 VR128X:$src1, VK1WM:$mask,
6849 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6851 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6852 (X86selects VK1WM:$mask,
6853 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6854 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6855 (_.EltVT ZeroFP)))))),
6856 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6857 VR128X:$src1, VK1WM:$mask,
6858 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6860 // Patterns with rounding mode.
6861 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6863 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6864 _.FRC:$src3, (i32 timm:$rc)))))),
6865 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6866 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6867 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6869 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6870 (RndOp _.FRC:$src2, _.FRC:$src3,
6871 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6872 (i32 timm:$rc)))))),
6873 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6874 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6875 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6877 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6878 (X86selects VK1WM:$mask,
6880 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6881 _.FRC:$src3, (i32 timm:$rc)),
6882 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6883 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6884 VR128X:$src1, VK1WM:$mask,
6885 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6886 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6888 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6889 (X86selects VK1WM:$mask,
6890 (RndOp _.FRC:$src2, _.FRC:$src3,
6891 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6893 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6894 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6895 VR128X:$src1, VK1WM:$mask,
6896 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6897 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6899 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6900 (X86selects VK1WM:$mask,
6902 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6903 _.FRC:$src3, (i32 timm:$rc)),
6904 (_.EltVT ZeroFP)))))),
6905 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6906 VR128X:$src1, VK1WM:$mask,
6907 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6908 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6910 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6911 (X86selects VK1WM:$mask,
6912 (RndOp _.FRC:$src2, _.FRC:$src3,
6913 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6915 (_.EltVT ZeroFP)))))),
6916 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6917 VR128X:$src1, VK1WM:$mask,
6918 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6919 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6923 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
6924 X86Movss, v4f32x_info, fp32imm0>;
6925 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6926 X86Movss, v4f32x_info, fp32imm0>;
6927 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6928 X86Movss, v4f32x_info, fp32imm0>;
6929 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6930 X86Movss, v4f32x_info, fp32imm0>;
6932 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
6933 X86Movsd, v2f64x_info, fp64imm0>;
6934 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6935 X86Movsd, v2f64x_info, fp64imm0>;
6936 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6937 X86Movsd, v2f64x_info, fp64imm0>;
6938 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6939 X86Movsd, v2f64x_info, fp64imm0>;
6941 //===----------------------------------------------------------------------===//
6942 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6943 //===----------------------------------------------------------------------===//
6944 let Constraints = "$src1 = $dst" in {
6945 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6946 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6947 // NOTE: The SDNode have the multiply operands first with the add last.
6948 // This enables commuted load patterns to be autogenerated by tablegen.
6949 let ExeDomain = _.ExeDomain in {
6950 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6951 (ins _.RC:$src2, _.RC:$src3),
6952 OpcodeStr, "$src3, $src2", "$src2, $src3",
6953 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6954 AVX512FMA3Base, Sched<[sched]>;
6956 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6957 (ins _.RC:$src2, _.MemOp:$src3),
6958 OpcodeStr, "$src3, $src2", "$src2, $src3",
6959 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6960 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6962 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6963 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6964 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6965 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6967 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
6969 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6972 } // Constraints = "$src1 = $dst"
6974 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6975 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6976 let Predicates = [HasIFMA] in {
6977 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6978 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6980 let Predicates = [HasVLX, HasIFMA] in {
6981 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6982 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6983 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6984 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6988 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6989 SchedWriteVecIMul, avx512vl_i64_info>,
6991 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6992 SchedWriteVecIMul, avx512vl_i64_info>,
6995 //===----------------------------------------------------------------------===//
6996 // AVX-512 Scalar convert from sign integer to float/double
6997 //===----------------------------------------------------------------------===//
6999 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7000 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7001 X86MemOperand x86memop, PatFrag ld_frag, string asm,
7003 let hasSideEffects = 0, isCodeGenOnly = 1 in {
7004 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7005 (ins DstVT.FRC:$src1, SrcRC:$src),
7006 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7007 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7009 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7010 (ins DstVT.FRC:$src1, x86memop:$src),
7011 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7012 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7013 } // hasSideEffects = 0
7014 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7015 (ins DstVT.RC:$src1, SrcRC:$src2),
7016 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7017 [(set DstVT.RC:$dst,
7018 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7019 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7021 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7022 (ins DstVT.RC:$src1, x86memop:$src2),
7023 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7024 [(set DstVT.RC:$dst,
7025 (OpNode (DstVT.VT DstVT.RC:$src1),
7026 (ld_frag addr:$src2)))]>,
7027 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7028 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7029 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7030 DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7033 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7034 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7035 X86VectorVTInfo DstVT, string asm,
7037 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7038 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7040 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7041 [(set DstVT.RC:$dst,
7042 (OpNode (DstVT.VT DstVT.RC:$src1),
7045 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7046 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7047 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7048 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7051 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7052 X86FoldableSchedWrite sched,
7053 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7054 X86MemOperand x86memop, PatFrag ld_frag,
7055 string asm, string mem> {
7056 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7057 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7058 ld_frag, asm, mem>, VEX_LIG;
7061 let Predicates = [HasAVX512] in {
7062 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7064 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7065 XS, EVEX_CD8<32, CD8VT1>;
7066 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7068 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7069 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7070 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7071 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
7072 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7073 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7075 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7076 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7078 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7079 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7080 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7081 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7083 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7084 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7085 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7086 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7087 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7088 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7089 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7090 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7092 def : Pat<(f32 (sint_to_fp GR32:$src)),
7093 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7094 def : Pat<(f32 (sint_to_fp GR64:$src)),
7095 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7096 def : Pat<(f64 (sint_to_fp GR32:$src)),
7097 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7098 def : Pat<(f64 (sint_to_fp GR64:$src)),
7099 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7101 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7103 v4f32x_info, i32mem, loadi32,
7104 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7105 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7107 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7108 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7109 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7110 i32mem, loadi32, "cvtusi2sd", "l">,
7111 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7112 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7114 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7115 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7117 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7118 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7119 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7120 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7122 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7123 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7124 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7125 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7126 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7127 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7128 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7129 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7131 def : Pat<(f32 (uint_to_fp GR32:$src)),
7132 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7133 def : Pat<(f32 (uint_to_fp GR64:$src)),
7134 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7135 def : Pat<(f64 (uint_to_fp GR32:$src)),
7136 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7137 def : Pat<(f64 (uint_to_fp GR64:$src)),
7138 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7141 //===----------------------------------------------------------------------===//
7142 // AVX-512 Scalar convert from float/double to integer
7143 //===----------------------------------------------------------------------===//
7145 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7146 X86VectorVTInfo DstVT, SDNode OpNode,
7148 X86FoldableSchedWrite sched, string asm,
7150 let Predicates = [HasAVX512] in {
7151 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7152 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7153 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7154 EVEX, VEX_LIG, Sched<[sched]>;
7155 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7156 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7157 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7158 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7160 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7161 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7162 [(set DstVT.RC:$dst, (OpNode
7163 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7164 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7165 } // Predicates = [HasAVX512]
7167 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7168 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7169 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7170 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7171 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7172 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7173 SrcVT.IntScalarMemOp:$src), 0, "att">;
7176 // Convert float/double to signed/unsigned int 32/64
7177 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7178 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7179 XS, EVEX_CD8<32, CD8VT1>;
7180 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7181 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7182 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7183 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7184 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7185 XS, EVEX_CD8<32, CD8VT1>;
7186 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7187 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7188 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7189 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7190 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7191 XD, EVEX_CD8<64, CD8VT1>;
7192 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7193 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7194 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7195 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7196 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7197 XD, EVEX_CD8<64, CD8VT1>;
7198 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7199 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7200 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7202 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7203 // which produce unnecessary vmovs{s,d} instructions
7204 let Predicates = [HasAVX512] in {
7205 def : Pat<(v4f32 (X86Movss
7206 (v4f32 VR128X:$dst),
7207 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7208 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7210 def : Pat<(v4f32 (X86Movss
7211 (v4f32 VR128X:$dst),
7212 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7213 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7215 def : Pat<(v4f32 (X86Movss
7216 (v4f32 VR128X:$dst),
7217 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7218 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7220 def : Pat<(v4f32 (X86Movss
7221 (v4f32 VR128X:$dst),
7222 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7223 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7225 def : Pat<(v2f64 (X86Movsd
7226 (v2f64 VR128X:$dst),
7227 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7228 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7230 def : Pat<(v2f64 (X86Movsd
7231 (v2f64 VR128X:$dst),
7232 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7233 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7235 def : Pat<(v2f64 (X86Movsd
7236 (v2f64 VR128X:$dst),
7237 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7238 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7240 def : Pat<(v2f64 (X86Movsd
7241 (v2f64 VR128X:$dst),
7242 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7243 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7245 def : Pat<(v4f32 (X86Movss
7246 (v4f32 VR128X:$dst),
7247 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7248 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7250 def : Pat<(v4f32 (X86Movss
7251 (v4f32 VR128X:$dst),
7252 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7253 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7255 def : Pat<(v4f32 (X86Movss
7256 (v4f32 VR128X:$dst),
7257 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7258 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7260 def : Pat<(v4f32 (X86Movss
7261 (v4f32 VR128X:$dst),
7262 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7263 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7265 def : Pat<(v2f64 (X86Movsd
7266 (v2f64 VR128X:$dst),
7267 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7268 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7270 def : Pat<(v2f64 (X86Movsd
7271 (v2f64 VR128X:$dst),
7272 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7273 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7275 def : Pat<(v2f64 (X86Movsd
7276 (v2f64 VR128X:$dst),
7277 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7278 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7280 def : Pat<(v2f64 (X86Movsd
7281 (v2f64 VR128X:$dst),
7282 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7283 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7284 } // Predicates = [HasAVX512]
7286 // Convert float/double to signed/unsigned int 32/64 with truncation
7287 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7288 X86VectorVTInfo _DstRC, SDNode OpNode,
7289 SDNode OpNodeInt, SDNode OpNodeSAE,
7290 X86FoldableSchedWrite sched, string aliasStr>{
7291 let Predicates = [HasAVX512] in {
7292 let isCodeGenOnly = 1 in {
7293 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7294 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7295 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7296 EVEX, VEX_LIG, Sched<[sched]>;
7297 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7298 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7299 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7300 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7303 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7304 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7305 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7306 EVEX, VEX_LIG, Sched<[sched]>;
7307 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7308 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7309 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7310 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7311 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7312 (ins _SrcRC.IntScalarMemOp:$src),
7313 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7314 [(set _DstRC.RC:$dst,
7315 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7316 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7319 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7320 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7321 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7322 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7323 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7324 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7325 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7328 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7329 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7330 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7331 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7332 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7333 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7334 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7335 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7336 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7337 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7338 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7339 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7341 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7342 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7343 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7344 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7345 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7346 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7347 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7348 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7349 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7350 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7351 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7352 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7354 //===----------------------------------------------------------------------===//
7355 // AVX-512 Convert form float to double and back
7356 //===----------------------------------------------------------------------===//
7358 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7359 X86VectorVTInfo _Src, SDNode OpNode,
7360 X86FoldableSchedWrite sched> {
7361 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7362 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7363 "$src2, $src1", "$src1, $src2",
7364 (_.VT (OpNode (_.VT _.RC:$src1),
7365 (_Src.VT _Src.RC:$src2)))>,
7366 EVEX_4V, VEX_LIG, Sched<[sched]>;
7367 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7368 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7369 "$src2, $src1", "$src1, $src2",
7370 (_.VT (OpNode (_.VT _.RC:$src1),
7371 (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
7373 Sched<[sched.Folded, sched.ReadAfterFold]>;
7375 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7376 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7377 (ins _.FRC:$src1, _Src.FRC:$src2),
7378 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7379 EVEX_4V, VEX_LIG, Sched<[sched]>;
7381 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7382 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7383 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7384 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7388 // Scalar Coversion with SAE - suppress all exceptions
7389 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7390 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7391 X86FoldableSchedWrite sched> {
7392 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7393 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7394 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7395 (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7396 (_Src.VT _Src.RC:$src2)))>,
7397 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7400 // Scalar Conversion with rounding control (RC)
7401 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7402 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7403 X86FoldableSchedWrite sched> {
7404 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7405 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7406 "$rc, $src2, $src1", "$src1, $src2, $rc",
7407 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7408 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7409 EVEX_4V, VEX_LIG, Sched<[sched]>,
7412 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7413 SDNode OpNode, SDNode OpNodeRnd,
7414 X86FoldableSchedWrite sched,
7415 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7416 let Predicates = [HasAVX512] in {
7417 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7418 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7419 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7423 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7424 SDNode OpNode, SDNode OpNodeSAE,
7425 X86FoldableSchedWrite sched,
7426 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7427 let Predicates = [HasAVX512] in {
7428 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7429 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7430 EVEX_CD8<32, CD8VT1>, XS;
7433 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7434 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7436 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7437 X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7440 def : Pat<(f64 (fpextend FR32X:$src)),
7441 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7442 Requires<[HasAVX512]>;
7443 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7444 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7445 Requires<[HasAVX512, OptForSize]>;
7447 def : Pat<(f32 (fpround FR64X:$src)),
7448 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7449 Requires<[HasAVX512]>;
7451 def : Pat<(v4f32 (X86Movss
7452 (v4f32 VR128X:$dst),
7453 (v4f32 (scalar_to_vector
7454 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7455 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7456 Requires<[HasAVX512]>;
7458 def : Pat<(v2f64 (X86Movsd
7459 (v2f64 VR128X:$dst),
7460 (v2f64 (scalar_to_vector
7461 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7462 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7463 Requires<[HasAVX512]>;
7465 //===----------------------------------------------------------------------===//
7466 // AVX-512 Vector convert from signed/unsigned integer to float/double
7467 // and from float/double to signed/unsigned integer
7468 //===----------------------------------------------------------------------===//
7470 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7471 X86VectorVTInfo _Src, SDNode OpNode,
7472 X86FoldableSchedWrite sched,
7473 string Broadcast = _.BroadcastStr,
7474 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7475 RegisterClass MaskRC = _.KRCWM,
7476 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7478 defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
7480 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7481 (ins MaskRC:$mask, _Src.RC:$src),
7482 OpcodeStr, "$src", "$src",
7483 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7484 (vselect MaskRC:$mask,
7485 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7487 vselect, "$src0 = $dst">,
7488 EVEX, Sched<[sched]>;
7490 defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7492 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7493 (ins MaskRC:$mask, MemOp:$src),
7494 OpcodeStr#Alias, "$src", "$src",
7496 (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
7497 vselect, "$src0 = $dst">,
7498 EVEX, Sched<[sched.Folded]>;
7500 defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7501 (ins _Src.ScalarMemOp:$src),
7502 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7503 (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7505 "${src}"##Broadcast, "${src}"##Broadcast,
7506 (_.VT (OpNode (_Src.VT
7507 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
7509 (vselect MaskRC:$mask,
7514 (_Src.ScalarLdFrag addr:$src))))),
7516 vselect, "$src0 = $dst">,
7517 EVEX, EVEX_B, Sched<[sched.Folded]>;
7519 // Coversion with SAE - suppress all exceptions
7520 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7521 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7522 X86FoldableSchedWrite sched> {
7523 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7524 (ins _Src.RC:$src), OpcodeStr,
7525 "{sae}, $src", "$src, {sae}",
7526 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7527 EVEX, EVEX_B, Sched<[sched]>;
7530 // Conversion with rounding control (RC)
7531 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7532 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7533 X86FoldableSchedWrite sched> {
7534 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7535 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7536 "$rc, $src", "$src, $rc",
7537 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7538 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7541 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7542 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7543 X86VectorVTInfo _Src, SDNode OpNode,
7544 X86FoldableSchedWrite sched,
7545 string Broadcast = _.BroadcastStr,
7546 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7547 RegisterClass MaskRC = _.KRCWM>
7548 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
7550 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7552 // Extend Float to Double
7553 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7554 X86SchedWriteWidths sched> {
7555 let Predicates = [HasAVX512] in {
7556 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7557 fpextend, sched.ZMM>,
7558 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7559 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7561 let Predicates = [HasVLX] in {
7562 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7563 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7564 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7565 sched.YMM>, EVEX_V256;
7569 // Truncate Double to Float
7570 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7571 let Predicates = [HasAVX512] in {
7572 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
7573 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7574 X86vfproundRnd, sched.ZMM>, EVEX_V512;
7576 let Predicates = [HasVLX] in {
7577 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7578 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
7580 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
7581 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7584 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7585 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7586 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7587 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7588 VK2WM:$mask, VR128X:$src), 0, "att">;
7589 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
7590 "$dst {${mask}} {z}, $src}",
7591 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7592 VK2WM:$mask, VR128X:$src), 0, "att">;
7593 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7594 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7595 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7596 "$dst {${mask}}, ${src}{1to2}}",
7597 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7598 VK2WM:$mask, f64mem:$src), 0, "att">;
7599 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7600 "$dst {${mask}} {z}, ${src}{1to2}}",
7601 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7602 VK2WM:$mask, f64mem:$src), 0, "att">;
7604 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7605 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7606 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7607 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7608 VK4WM:$mask, VR256X:$src), 0, "att">;
7609 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
7610 "$dst {${mask}} {z}, $src}",
7611 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7612 VK4WM:$mask, VR256X:$src), 0, "att">;
7613 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7614 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7615 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7616 "$dst {${mask}}, ${src}{1to4}}",
7617 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7618 VK4WM:$mask, f64mem:$src), 0, "att">;
7619 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7620 "$dst {${mask}} {z}, ${src}{1to4}}",
7621 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7622 VK4WM:$mask, f64mem:$src), 0, "att">;
7625 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7626 VEX_W, PD, EVEX_CD8<64, CD8VF>;
7627 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7628 PS, EVEX_CD8<32, CD8VH>;
7630 let Predicates = [HasAVX512] in {
7631 def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
7632 (VCVTPD2PSZrr VR512:$src)>;
7633 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7635 (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
7636 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7637 v8f32x_info.ImmAllZerosV),
7638 (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
7640 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7641 (VCVTPD2PSZrm addr:$src)>;
7642 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7644 (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7645 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7646 v8f32x_info.ImmAllZerosV),
7647 (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
7649 def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))),
7650 (VCVTPD2PSZrmb addr:$src)>;
7651 def : Pat<(vselect VK8WM:$mask,
7652 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
7653 (v8f32 VR256X:$src0)),
7654 (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7655 def : Pat<(vselect VK8WM:$mask,
7656 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
7657 v8f32x_info.ImmAllZerosV),
7658 (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
7661 let Predicates = [HasVLX] in {
7662 def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
7663 (VCVTPD2PSZ256rr VR256X:$src)>;
7664 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7666 (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
7667 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7668 v4f32x_info.ImmAllZerosV),
7669 (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
7671 def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
7672 (VCVTPD2PSZ256rm addr:$src)>;
7673 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7675 (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7676 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7677 v4f32x_info.ImmAllZerosV),
7678 (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
7680 def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7681 (VCVTPD2PSZ256rmb addr:$src)>;
7682 def : Pat<(vselect VK4WM:$mask,
7683 (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7685 (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7686 def : Pat<(vselect VK4WM:$mask,
7687 (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7688 v4f32x_info.ImmAllZerosV),
7689 (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
7691 // Special patterns to allow use of X86vmfpround for masking. Instruction
7692 // patterns have been disabled with null_frag.
7693 def : Pat<(X86vfpround (v2f64 VR128X:$src)),
7694 (VCVTPD2PSZ128rr VR128X:$src)>;
7695 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7697 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7698 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7700 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7702 def : Pat<(X86vfpround (loadv2f64 addr:$src)),
7703 (VCVTPD2PSZ128rm addr:$src)>;
7704 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7706 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7707 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7709 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7711 def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))),
7712 (VCVTPD2PSZ128rmb addr:$src)>;
7713 def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
7714 (v4f32 VR128X:$src0), VK2WM:$mask),
7715 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7716 def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
7717 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7718 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7721 // Convert Signed/Unsigned Doubleword to Double
7722 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7723 SDNode OpNode128, X86SchedWriteWidths sched> {
7724 // No rounding in this op
7725 let Predicates = [HasAVX512] in
7726 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7727 sched.ZMM>, EVEX_V512;
7729 let Predicates = [HasVLX] in {
7730 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7731 OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
7732 (v2f64 (OpNode128 (bc_v4i32
7734 (scalar_to_vector (loadi64 addr:$src))))))>,
7736 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7737 sched.YMM>, EVEX_V256;
7741 // Convert Signed/Unsigned Doubleword to Float
7742 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7743 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7744 let Predicates = [HasAVX512] in
7745 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7747 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7748 OpNodeRnd, sched.ZMM>, EVEX_V512;
7750 let Predicates = [HasVLX] in {
7751 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7752 sched.XMM>, EVEX_V128;
7753 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7754 sched.YMM>, EVEX_V256;
7758 // Convert Float to Signed/Unsigned Doubleword with truncation
7759 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7760 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7761 let Predicates = [HasAVX512] in {
7762 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7764 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7765 OpNodeSAE, sched.ZMM>, EVEX_V512;
7767 let Predicates = [HasVLX] in {
7768 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7769 sched.XMM>, EVEX_V128;
7770 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7771 sched.YMM>, EVEX_V256;
7775 // Convert Float to Signed/Unsigned Doubleword
7776 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7777 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7778 let Predicates = [HasAVX512] in {
7779 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7781 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7782 OpNodeRnd, sched.ZMM>, EVEX_V512;
7784 let Predicates = [HasVLX] in {
7785 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7786 sched.XMM>, EVEX_V128;
7787 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7788 sched.YMM>, EVEX_V256;
7792 // Convert Double to Signed/Unsigned Doubleword with truncation
7793 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7794 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7795 let Predicates = [HasAVX512] in {
7796 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7798 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7799 OpNodeSAE, sched.ZMM>, EVEX_V512;
7801 let Predicates = [HasVLX] in {
7802 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7803 // memory forms of these instructions in Asm Parser. They have the same
7804 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7805 // due to the same reason.
7806 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7807 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7809 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7810 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7813 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7814 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7815 VR128X:$src), 0, "att">;
7816 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7817 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7818 VK2WM:$mask, VR128X:$src), 0, "att">;
7819 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7820 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7821 VK2WM:$mask, VR128X:$src), 0, "att">;
7822 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7823 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7824 f64mem:$src), 0, "att">;
7825 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7826 "$dst {${mask}}, ${src}{1to2}}",
7827 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7828 VK2WM:$mask, f64mem:$src), 0, "att">;
7829 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7830 "$dst {${mask}} {z}, ${src}{1to2}}",
7831 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7832 VK2WM:$mask, f64mem:$src), 0, "att">;
7834 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7835 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7836 VR256X:$src), 0, "att">;
7837 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7838 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7839 VK4WM:$mask, VR256X:$src), 0, "att">;
7840 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7841 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7842 VK4WM:$mask, VR256X:$src), 0, "att">;
7843 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7844 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7845 f64mem:$src), 0, "att">;
7846 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7847 "$dst {${mask}}, ${src}{1to4}}",
7848 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7849 VK4WM:$mask, f64mem:$src), 0, "att">;
7850 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7851 "$dst {${mask}} {z}, ${src}{1to4}}",
7852 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7853 VK4WM:$mask, f64mem:$src), 0, "att">;
7856 // Convert Double to Signed/Unsigned Doubleword
7857 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7858 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7859 let Predicates = [HasAVX512] in {
7860 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7862 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7863 OpNodeRnd, sched.ZMM>, EVEX_V512;
7865 let Predicates = [HasVLX] in {
7866 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7867 // memory forms of these instructions in Asm Parcer. They have the same
7868 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7869 // due to the same reason.
7870 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7871 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7873 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7874 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7877 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7878 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7879 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7880 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7881 VK2WM:$mask, VR128X:$src), 0, "att">;
7882 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7883 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7884 VK2WM:$mask, VR128X:$src), 0, "att">;
7885 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7886 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7887 f64mem:$src), 0, "att">;
7888 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7889 "$dst {${mask}}, ${src}{1to2}}",
7890 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7891 VK2WM:$mask, f64mem:$src), 0, "att">;
7892 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7893 "$dst {${mask}} {z}, ${src}{1to2}}",
7894 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7895 VK2WM:$mask, f64mem:$src), 0, "att">;
7897 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7898 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7899 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7900 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7901 VK4WM:$mask, VR256X:$src), 0, "att">;
7902 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7903 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7904 VK4WM:$mask, VR256X:$src), 0, "att">;
7905 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7906 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7907 f64mem:$src), 0, "att">;
7908 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7909 "$dst {${mask}}, ${src}{1to4}}",
7910 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7911 VK4WM:$mask, f64mem:$src), 0, "att">;
7912 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7913 "$dst {${mask}} {z}, ${src}{1to4}}",
7914 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7915 VK4WM:$mask, f64mem:$src), 0, "att">;
7918 // Convert Double to Signed/Unsigned Quardword
7919 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7920 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7921 let Predicates = [HasDQI] in {
7922 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7924 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7925 OpNodeRnd, sched.ZMM>, EVEX_V512;
7927 let Predicates = [HasDQI, HasVLX] in {
7928 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7929 sched.XMM>, EVEX_V128;
7930 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7931 sched.YMM>, EVEX_V256;
7935 // Convert Double to Signed/Unsigned Quardword with truncation
7936 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7937 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7938 let Predicates = [HasDQI] in {
7939 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7941 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7942 OpNodeRnd, sched.ZMM>, EVEX_V512;
7944 let Predicates = [HasDQI, HasVLX] in {
7945 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7946 sched.XMM>, EVEX_V128;
7947 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7948 sched.YMM>, EVEX_V256;
7952 // Convert Signed/Unsigned Quardword to Double
7953 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7954 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7955 let Predicates = [HasDQI] in {
7956 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7958 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7959 OpNodeRnd, sched.ZMM>, EVEX_V512;
7961 let Predicates = [HasDQI, HasVLX] in {
7962 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7963 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7964 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7965 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7969 // Convert Float to Signed/Unsigned Quardword
7970 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7971 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7972 let Predicates = [HasDQI] in {
7973 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7975 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7976 OpNodeRnd, sched.ZMM>, EVEX_V512;
7978 let Predicates = [HasDQI, HasVLX] in {
7979 // Explicitly specified broadcast string, since we take only 2 elements
7980 // from v4f32x_info source
7981 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7982 sched.XMM, "{1to2}", "", f64mem, VK2WM,
7983 (v2i64 (OpNode (bc_v4f32
7985 (scalar_to_vector (loadf64 addr:$src))))))>,
7987 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7988 sched.YMM>, EVEX_V256;
7992 // Convert Float to Signed/Unsigned Quardword with truncation
7993 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7994 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7995 let Predicates = [HasDQI] in {
7996 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7997 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7998 OpNodeRnd, sched.ZMM>, EVEX_V512;
8000 let Predicates = [HasDQI, HasVLX] in {
8001 // Explicitly specified broadcast string, since we take only 2 elements
8002 // from v4f32x_info source
8003 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8004 sched.XMM, "{1to2}", "", f64mem, VK2WM,
8005 (v2i64 (OpNode (bc_v4f32
8007 (scalar_to_vector (loadf64 addr:$src))))))>,
8009 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8010 sched.YMM>, EVEX_V256;
8014 // Convert Signed/Unsigned Quardword to Float
8015 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8016 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8017 let Predicates = [HasDQI] in {
8018 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8020 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8021 OpNodeRnd, sched.ZMM>, EVEX_V512;
8023 let Predicates = [HasDQI, HasVLX] in {
8024 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8025 // memory forms of these instructions in Asm Parcer. They have the same
8026 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8027 // due to the same reason.
8028 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8029 sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8030 EVEX_V128, NotEVEX2VEXConvertible;
8031 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8032 sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8033 NotEVEX2VEXConvertible;
8036 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8037 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8038 VR128X:$src), 0, "att">;
8039 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8040 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8041 VK2WM:$mask, VR128X:$src), 0, "att">;
8042 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8043 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8044 VK2WM:$mask, VR128X:$src), 0, "att">;
8045 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8046 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8047 i64mem:$src), 0, "att">;
8048 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
8049 "$dst {${mask}}, ${src}{1to2}}",
8050 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8051 VK2WM:$mask, i64mem:$src), 0, "att">;
8052 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8053 "$dst {${mask}} {z}, ${src}{1to2}}",
8054 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8055 VK2WM:$mask, i64mem:$src), 0, "att">;
8057 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8058 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8059 VR256X:$src), 0, "att">;
8060 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
8061 "$dst {${mask}}, $src}",
8062 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8063 VK4WM:$mask, VR256X:$src), 0, "att">;
8064 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
8065 "$dst {${mask}} {z}, $src}",
8066 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8067 VK4WM:$mask, VR256X:$src), 0, "att">;
8068 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8069 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8070 i64mem:$src), 0, "att">;
8071 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
8072 "$dst {${mask}}, ${src}{1to4}}",
8073 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8074 VK4WM:$mask, i64mem:$src), 0, "att">;
8075 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8076 "$dst {${mask}} {z}, ${src}{1to4}}",
8077 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8078 VK4WM:$mask, i64mem:$src), 0, "att">;
8081 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
8082 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8084 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
8085 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8086 PS, EVEX_CD8<32, CD8VF>;
8088 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
8089 X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
8090 XS, EVEX_CD8<32, CD8VF>;
8092 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
8093 X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
8094 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8096 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8097 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
8098 EVEX_CD8<32, CD8VF>;
8100 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8101 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
8102 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8104 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8105 X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8106 EVEX_CD8<32, CD8VH>;
8108 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8109 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8110 EVEX_CD8<32, CD8VF>;
8112 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8113 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8114 EVEX_CD8<32, CD8VF>;
8116 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8117 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8118 VEX_W, EVEX_CD8<64, CD8VF>;
8120 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8121 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8122 PS, EVEX_CD8<32, CD8VF>;
8124 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8125 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8126 PS, EVEX_CD8<64, CD8VF>;
8128 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8129 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8130 PD, EVEX_CD8<64, CD8VF>;
8132 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8133 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8134 EVEX_CD8<32, CD8VH>;
8136 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8137 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8138 PD, EVEX_CD8<64, CD8VF>;
8140 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8141 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8142 EVEX_CD8<32, CD8VH>;
8144 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8145 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
8146 PD, EVEX_CD8<64, CD8VF>;
8148 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8149 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
8150 EVEX_CD8<32, CD8VH>;
8152 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8153 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
8154 PD, EVEX_CD8<64, CD8VF>;
8156 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8157 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
8158 EVEX_CD8<32, CD8VH>;
8160 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8161 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8162 EVEX_CD8<64, CD8VF>;
8164 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8165 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8166 EVEX_CD8<64, CD8VF>;
8168 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
8169 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8170 EVEX_CD8<64, CD8VF>;
8172 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
8173 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8174 EVEX_CD8<64, CD8VF>;
8176 let Predicates = [HasVLX] in {
8177 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8178 // patterns have been disabled with null_frag.
8179 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8180 (VCVTPD2DQZ128rr VR128X:$src)>;
8181 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8183 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8184 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8186 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8188 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8189 (VCVTPD2DQZ128rm addr:$src)>;
8190 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8192 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8193 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8195 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8197 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8198 (VCVTPD2DQZ128rmb addr:$src)>;
8199 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8200 (v4i32 VR128X:$src0), VK2WM:$mask),
8201 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8202 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8203 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8204 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8206 // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8207 // patterns have been disabled with null_frag.
8208 def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
8209 (VCVTTPD2DQZ128rr VR128X:$src)>;
8210 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8212 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8213 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8215 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8217 def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
8218 (VCVTTPD2DQZ128rm addr:$src)>;
8219 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8221 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8222 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8224 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8226 def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8227 (VCVTTPD2DQZ128rmb addr:$src)>;
8228 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8229 (v4i32 VR128X:$src0), VK2WM:$mask),
8230 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8231 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8232 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8233 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8235 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8236 // patterns have been disabled with null_frag.
8237 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8238 (VCVTPD2UDQZ128rr VR128X:$src)>;
8239 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8241 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8242 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8244 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8246 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8247 (VCVTPD2UDQZ128rm addr:$src)>;
8248 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8250 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8251 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8253 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8255 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8256 (VCVTPD2UDQZ128rmb addr:$src)>;
8257 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8258 (v4i32 VR128X:$src0), VK2WM:$mask),
8259 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8260 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8261 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8262 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8264 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8265 // patterns have been disabled with null_frag.
8266 def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
8267 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8268 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8270 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8271 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8273 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8275 def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
8276 (VCVTTPD2UDQZ128rm addr:$src)>;
8277 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8279 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8280 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8282 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8284 def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8285 (VCVTTPD2UDQZ128rmb addr:$src)>;
8286 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8287 (v4i32 VR128X:$src0), VK2WM:$mask),
8288 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8289 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8290 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8291 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8294 let Predicates = [HasDQI, HasVLX] in {
8295 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
8296 (VCVTPS2QQZ128rm addr:$src)>;
8297 def : Pat<(v2i64 (vselect VK2WM:$mask,
8298 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
8300 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8301 def : Pat<(v2i64 (vselect VK2WM:$mask,
8302 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
8303 v2i64x_info.ImmAllZerosV)),
8304 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8306 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
8307 (VCVTPS2UQQZ128rm addr:$src)>;
8308 def : Pat<(v2i64 (vselect VK2WM:$mask,
8309 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
8311 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8312 def : Pat<(v2i64 (vselect VK2WM:$mask,
8313 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
8314 v2i64x_info.ImmAllZerosV)),
8315 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8317 def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
8318 (VCVTTPS2QQZ128rm addr:$src)>;
8319 def : Pat<(v2i64 (vselect VK2WM:$mask,
8320 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
8322 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8323 def : Pat<(v2i64 (vselect VK2WM:$mask,
8324 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
8325 v2i64x_info.ImmAllZerosV)),
8326 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8328 def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
8329 (VCVTTPS2UQQZ128rm addr:$src)>;
8330 def : Pat<(v2i64 (vselect VK2WM:$mask,
8331 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
8333 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8334 def : Pat<(v2i64 (vselect VK2WM:$mask,
8335 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
8336 v2i64x_info.ImmAllZerosV)),
8337 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8340 let Predicates = [HasAVX512, NoVLX] in {
8341 def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))),
8342 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8343 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8344 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8346 def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))),
8347 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8348 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8349 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8351 def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
8352 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8353 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8354 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8356 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8357 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8358 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8359 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8361 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8362 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8363 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8364 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8366 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8367 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8368 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8369 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8371 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8372 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8373 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8374 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8377 let Predicates = [HasVLX] in {
8378 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8379 (VCVTDQ2PDZ128rm addr:$src)>;
8380 def : Pat<(v2f64 (vselect VK2WM:$mask,
8381 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
8383 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8384 def : Pat<(v2f64 (vselect VK2WM:$mask,
8385 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
8386 v2f64x_info.ImmAllZerosV)),
8387 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8389 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8390 (VCVTUDQ2PDZ128rm addr:$src)>;
8391 def : Pat<(v2f64 (vselect VK2WM:$mask,
8392 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
8394 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8395 def : Pat<(v2f64 (vselect VK2WM:$mask,
8396 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
8397 v2f64x_info.ImmAllZerosV)),
8398 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8401 let Predicates = [HasDQI, HasVLX] in {
8402 // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8403 // patterns have been disabled with null_frag.
8404 def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
8405 (VCVTQQ2PSZ128rr VR128X:$src)>;
8406 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8408 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8409 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8411 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8413 def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))),
8414 (VCVTQQ2PSZ128rm addr:$src)>;
8415 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8417 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8418 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8420 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8422 def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
8423 (VCVTQQ2PSZ128rmb addr:$src)>;
8424 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8425 (v4f32 VR128X:$src0), VK2WM:$mask),
8426 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8427 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8428 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8429 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8431 // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8432 // patterns have been disabled with null_frag.
8433 def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))),
8434 (VCVTUQQ2PSZ128rr VR128X:$src)>;
8435 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8437 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8438 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8440 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8442 def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))),
8443 (VCVTUQQ2PSZ128rm addr:$src)>;
8444 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8446 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8447 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8449 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8451 def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
8452 (VCVTUQQ2PSZ128rmb addr:$src)>;
8453 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8454 (v4f32 VR128X:$src0), VK2WM:$mask),
8455 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8456 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8457 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8458 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8461 let Predicates = [HasDQI, NoVLX] in {
8462 def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))),
8463 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8464 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8465 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8467 def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))),
8468 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8469 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8470 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8472 def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))),
8473 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8474 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8475 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8477 def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))),
8478 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8479 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8480 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8482 def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))),
8483 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8484 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8485 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8487 def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
8488 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8489 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8490 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8492 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8493 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8494 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8495 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8497 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8498 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8499 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8500 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8502 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8503 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8504 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8505 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8507 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8508 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8509 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8510 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8512 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8513 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8514 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8515 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8517 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8518 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8519 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8520 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8523 //===----------------------------------------------------------------------===//
8524 // Half precision conversion instructions
8525 //===----------------------------------------------------------------------===//
8527 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8528 X86MemOperand x86memop, PatFrag ld_frag,
8529 X86FoldableSchedWrite sched> {
8530 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8531 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8532 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8533 T8PD, Sched<[sched]>;
8534 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8535 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8536 (X86cvtph2ps (_src.VT
8537 (ld_frag addr:$src)))>,
8538 T8PD, Sched<[sched.Folded]>;
8541 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8542 X86FoldableSchedWrite sched> {
8543 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8544 (ins _src.RC:$src), "vcvtph2ps",
8545 "{sae}, $src", "$src, {sae}",
8546 (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8547 T8PD, EVEX_B, Sched<[sched]>;
8550 let Predicates = [HasAVX512] in
8551 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8553 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8554 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8556 let Predicates = [HasVLX] in {
8557 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8558 load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8559 EVEX_CD8<32, CD8VH>;
8560 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8561 load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8562 EVEX_CD8<32, CD8VH>;
8564 // Pattern match vcvtph2ps of a scalar i64 load.
8565 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
8566 (VCVTPH2PSZ128rm addr:$src)>;
8567 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8568 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8569 (VCVTPH2PSZ128rm addr:$src)>;
8572 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8573 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8574 let ExeDomain = GenericDomain in {
8575 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8576 (ins _src.RC:$src1, i32u8imm:$src2),
8577 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8578 [(set _dest.RC:$dst,
8579 (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
8581 let Constraints = "$src0 = $dst" in
8582 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8583 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8584 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8585 [(set _dest.RC:$dst,
8586 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8587 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8588 Sched<[RR]>, EVEX_K;
8589 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8590 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8591 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8592 [(set _dest.RC:$dst,
8593 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8594 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8595 Sched<[RR]>, EVEX_KZ;
8596 let hasSideEffects = 0, mayStore = 1 in {
8597 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8598 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8599 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8601 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8602 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8603 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8604 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8609 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8611 let hasSideEffects = 0 in
8612 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8613 (outs _dest.RC:$dst),
8614 (ins _src.RC:$src1, i32u8imm:$src2),
8615 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8616 EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8619 let Predicates = [HasAVX512] in {
8620 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8621 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8622 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8623 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8624 let Predicates = [HasVLX] in {
8625 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8626 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8627 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8628 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8629 WriteCvtPS2PH, WriteCvtPS2PHSt>,
8630 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8633 def : Pat<(store (f64 (extractelt
8634 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8635 (iPTR 0))), addr:$dst),
8636 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8637 def : Pat<(store (i64 (extractelt
8638 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8639 (iPTR 0))), addr:$dst),
8640 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8641 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
8642 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
8643 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
8644 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
8647 // Patterns for matching conversions from float to half-float and vice versa.
8648 let Predicates = [HasVLX] in {
8649 // Use MXCSR.RC for rounding instead of explicitly specifying the default
8650 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8651 // configurations we support (the default). However, falling back to MXCSR is
8652 // more consistent with other instructions, which are always controlled by it.
8653 // It's encoded as 0b100.
8654 def : Pat<(fp_to_f16 FR32X:$src),
8655 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8656 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8658 def : Pat<(f16_to_fp GR16:$src),
8659 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8660 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8662 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8663 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8664 (v8i16 (VCVTPS2PHZ128rr
8665 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8668 // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8669 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8670 string OpcodeStr, X86FoldableSchedWrite sched> {
8671 let hasSideEffects = 0 in
8672 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8673 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8674 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8677 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8678 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8679 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8680 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8681 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8682 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8683 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8684 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8685 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8688 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8689 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8690 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8691 EVEX_CD8<32, CD8VT1>;
8692 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8693 "ucomisd", WriteFCom>, PD, EVEX,
8694 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8695 let Pattern = []<dag> in {
8696 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8697 "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8698 EVEX_CD8<32, CD8VT1>;
8699 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8700 "comisd", WriteFCom>, PD, EVEX,
8701 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8703 let isCodeGenOnly = 1 in {
8704 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8705 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8706 EVEX_CD8<32, CD8VT1>;
8707 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8708 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8709 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8711 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8712 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8713 EVEX_CD8<32, CD8VT1>;
8714 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8715 sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8716 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8720 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8721 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8722 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8723 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8724 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8725 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8726 "$src2, $src1", "$src1, $src2",
8727 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8728 EVEX_4V, VEX_LIG, Sched<[sched]>;
8729 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8730 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8731 "$src2, $src1", "$src1, $src2",
8732 (OpNode (_.VT _.RC:$src1),
8733 _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
8734 Sched<[sched.Folded, sched.ReadAfterFold]>;
8738 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8739 f32x_info>, EVEX_CD8<32, CD8VT1>,
8741 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8742 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8744 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8745 SchedWriteFRsqrt.Scl, f32x_info>,
8746 EVEX_CD8<32, CD8VT1>, T8PD;
8747 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8748 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8749 EVEX_CD8<64, CD8VT1>, T8PD;
8751 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8752 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8753 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8754 let ExeDomain = _.ExeDomain in {
8755 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8756 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8757 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8759 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8760 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8762 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8763 Sched<[sched.Folded, sched.ReadAfterFold]>;
8764 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8765 (ins _.ScalarMemOp:$src), OpcodeStr,
8766 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8768 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8769 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8773 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8774 X86SchedWriteWidths sched> {
8775 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8776 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8777 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8778 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8780 // Define only if AVX512VL feature is present.
8781 let Predicates = [HasVLX] in {
8782 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8783 OpNode, sched.XMM, v4f32x_info>,
8784 EVEX_V128, EVEX_CD8<32, CD8VF>;
8785 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8786 OpNode, sched.YMM, v8f32x_info>,
8787 EVEX_V256, EVEX_CD8<32, CD8VF>;
8788 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8789 OpNode, sched.XMM, v2f64x_info>,
8790 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8791 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8792 OpNode, sched.YMM, v4f64x_info>,
8793 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8797 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8798 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8800 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8801 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8802 SDNode OpNode, SDNode OpNodeSAE,
8803 X86FoldableSchedWrite sched> {
8804 let ExeDomain = _.ExeDomain in {
8805 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8806 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8807 "$src2, $src1", "$src1, $src2",
8808 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8811 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8812 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8813 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8814 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8815 EVEX_B, Sched<[sched]>;
8817 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8818 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8819 "$src2, $src1", "$src1, $src2",
8820 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
8821 Sched<[sched.Folded, sched.ReadAfterFold]>;
8825 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8826 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8827 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8828 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8829 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8830 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8833 let Predicates = [HasERI] in {
8834 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8835 SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8836 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8837 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8840 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8841 SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8842 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8844 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8845 SDNode OpNode, X86FoldableSchedWrite sched> {
8846 let ExeDomain = _.ExeDomain in {
8847 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8848 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8849 (OpNode (_.VT _.RC:$src))>,
8852 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8853 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8855 (bitconvert (_.LdFrag addr:$src))))>,
8856 Sched<[sched.Folded, sched.ReadAfterFold]>;
8858 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8859 (ins _.ScalarMemOp:$src), OpcodeStr,
8860 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8862 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8863 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8866 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8867 SDNode OpNode, X86FoldableSchedWrite sched> {
8868 let ExeDomain = _.ExeDomain in
8869 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8870 (ins _.RC:$src), OpcodeStr,
8871 "{sae}, $src", "$src, {sae}",
8872 (OpNode (_.VT _.RC:$src))>,
8873 EVEX_B, Sched<[sched]>;
8876 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8877 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8878 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8879 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8880 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8881 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8882 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8883 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8886 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8887 SDNode OpNode, X86SchedWriteWidths sched> {
8888 // Define only if AVX512VL feature is present.
8889 let Predicates = [HasVLX] in {
8890 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8892 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8893 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8895 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8896 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8898 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8899 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8901 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8905 let Predicates = [HasERI] in {
8906 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8907 SchedWriteFRsqrt>, EVEX;
8908 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8909 SchedWriteFRcp>, EVEX;
8910 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8911 SchedWriteFAdd>, EVEX;
8913 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8915 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8916 SchedWriteFRnd>, EVEX;
8918 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8919 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8920 let ExeDomain = _.ExeDomain in
8921 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8922 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8923 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8924 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8927 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8928 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8929 let ExeDomain = _.ExeDomain in {
8930 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8931 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8932 (_.VT (fsqrt _.RC:$src))>, EVEX,
8934 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8935 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8937 (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8938 Sched<[sched.Folded, sched.ReadAfterFold]>;
8939 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8940 (ins _.ScalarMemOp:$src), OpcodeStr,
8941 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8943 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8944 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8948 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8949 X86SchedWriteSizes sched> {
8950 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8951 sched.PS.ZMM, v16f32_info>,
8952 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8953 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8954 sched.PD.ZMM, v8f64_info>,
8955 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8956 // Define only if AVX512VL feature is present.
8957 let Predicates = [HasVLX] in {
8958 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8959 sched.PS.XMM, v4f32x_info>,
8960 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8961 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8962 sched.PS.YMM, v8f32x_info>,
8963 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8964 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8965 sched.PD.XMM, v2f64x_info>,
8966 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8967 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8968 sched.PD.YMM, v4f64x_info>,
8969 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8973 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8974 X86SchedWriteSizes sched> {
8975 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8976 sched.PS.ZMM, v16f32_info>,
8977 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8978 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8979 sched.PD.ZMM, v8f64_info>,
8980 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8983 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8984 X86VectorVTInfo _, string Name> {
8985 let ExeDomain = _.ExeDomain in {
8986 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8987 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8988 "$src2, $src1", "$src1, $src2",
8989 (X86fsqrts (_.VT _.RC:$src1),
8990 (_.VT _.RC:$src2))>,
8992 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8993 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8994 "$src2, $src1", "$src1, $src2",
8995 (X86fsqrts (_.VT _.RC:$src1),
8996 _.ScalarIntMemCPat:$src2)>,
8997 Sched<[sched.Folded, sched.ReadAfterFold]>;
8998 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8999 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9000 "$rc, $src2, $src1", "$src1, $src2, $rc",
9001 (X86fsqrtRnds (_.VT _.RC:$src1),
9004 EVEX_B, EVEX_RC, Sched<[sched]>;
9006 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
9007 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9008 (ins _.FRC:$src1, _.FRC:$src2),
9009 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9012 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9013 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9014 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9015 Sched<[sched.Folded, sched.ReadAfterFold]>;
9019 let Predicates = [HasAVX512] in {
9020 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
9021 (!cast<Instruction>(Name#Zr)
9022 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9025 let Predicates = [HasAVX512, OptForSize] in {
9026 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
9027 (!cast<Instruction>(Name#Zm)
9028 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9032 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9033 X86SchedWriteSizes sched> {
9034 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9035 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9036 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9037 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9040 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9041 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9043 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9045 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9046 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9047 let ExeDomain = _.ExeDomain in {
9048 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9049 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9050 "$src3, $src2, $src1", "$src1, $src2, $src3",
9051 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9055 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9056 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9057 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9058 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9059 (i32 imm:$src3)))>, EVEX_B,
9062 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9063 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9065 "$src3, $src2, $src1", "$src1, $src2, $src3",
9066 (_.VT (X86RndScales _.RC:$src1,
9067 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
9068 Sched<[sched.Folded, sched.ReadAfterFold]>;
9070 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9071 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9072 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9073 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9074 []>, Sched<[sched]>;
9077 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9078 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9079 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9080 []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
9084 let Predicates = [HasAVX512] in {
9085 def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2),
9086 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9087 _.FRC:$src1, imm:$src2))>;
9090 let Predicates = [HasAVX512, OptForSize] in {
9091 def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2),
9092 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9093 addr:$src1, imm:$src2))>;
9097 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9098 SchedWriteFRnd.Scl, f32x_info>,
9099 AVX512AIi8Base, EVEX_4V, VEX_LIG,
9100 EVEX_CD8<32, CD8VT1>;
9102 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9103 SchedWriteFRnd.Scl, f64x_info>,
9104 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9105 EVEX_CD8<64, CD8VT1>;
9107 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9108 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9109 dag OutMask, Predicate BasePredicate> {
9110 let Predicates = [BasePredicate] in {
9111 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9112 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9113 (extractelt _.VT:$dst, (iPTR 0))))),
9114 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9115 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9117 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9118 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9120 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9121 OutMask, _.VT:$src2, _.VT:$src1)>;
9125 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9126 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9127 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9128 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9129 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9130 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9133 //-------------------------------------------------
9134 // Integer truncate and extend operations
9135 //-------------------------------------------------
9137 // PatFrags that contain a select and a truncate op. The take operands in the
9138 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9139 // either to the multiclasses.
9140 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9141 (vselect node:$mask,
9142 (trunc node:$src), node:$src0)>;
9143 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9144 (vselect node:$mask,
9145 (X86vtruncs node:$src), node:$src0)>;
9146 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9147 (vselect node:$mask,
9148 (X86vtruncus node:$src), node:$src0)>;
9150 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9151 SDPatternOperator MaskNode,
9152 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9153 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9154 let ExeDomain = DestInfo.ExeDomain in {
9155 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9156 (ins SrcInfo.RC:$src),
9157 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9158 [(set DestInfo.RC:$dst,
9159 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9160 EVEX, Sched<[sched]>;
9161 let Constraints = "$src0 = $dst" in
9162 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9163 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9164 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9165 [(set DestInfo.RC:$dst,
9166 (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9167 (DestInfo.VT DestInfo.RC:$src0),
9168 SrcInfo.KRCWM:$mask))]>,
9169 EVEX, EVEX_K, Sched<[sched]>;
9170 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9171 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9172 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9173 [(set DestInfo.RC:$dst,
9174 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9175 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9176 EVEX, EVEX_KZ, Sched<[sched]>;
9179 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9180 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9181 (ins x86memop:$dst, SrcInfo.RC:$src),
9182 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9183 EVEX, Sched<[sched.Folded]>;
9185 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9186 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9187 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9188 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9189 }//mayStore = 1, hasSideEffects = 0
9192 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9193 X86VectorVTInfo DestInfo,
9194 PatFrag truncFrag, PatFrag mtruncFrag,
9197 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9198 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9199 addr:$dst, SrcInfo.RC:$src)>;
9201 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9202 SrcInfo.KRCWM:$mask),
9203 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9204 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9207 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9208 SDNode OpNode256, SDNode OpNode512,
9209 SDPatternOperator MaskNode128,
9210 SDPatternOperator MaskNode256,
9211 SDPatternOperator MaskNode512,
9212 X86FoldableSchedWrite sched,
9213 AVX512VLVectorVTInfo VTSrcInfo,
9214 X86VectorVTInfo DestInfoZ128,
9215 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9216 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9217 X86MemOperand x86memopZ, PatFrag truncFrag,
9218 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9220 let Predicates = [HasVLX, prd] in {
9221 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9222 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9223 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9224 truncFrag, mtruncFrag, NAME>, EVEX_V128;
9226 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9227 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9228 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9229 truncFrag, mtruncFrag, NAME>, EVEX_V256;
9231 let Predicates = [prd] in
9232 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9233 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9234 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9235 truncFrag, mtruncFrag, NAME>, EVEX_V512;
9238 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9239 SDPatternOperator MaskNode,
9240 X86FoldableSchedWrite sched, PatFrag StoreNode,
9241 PatFrag MaskedStoreNode, SDNode InVecNode,
9242 SDPatternOperator InVecMaskNode> {
9243 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9244 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9245 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9246 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9247 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9250 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9251 SDPatternOperator MaskNode,
9252 X86FoldableSchedWrite sched, PatFrag StoreNode,
9253 PatFrag MaskedStoreNode, SDNode InVecNode,
9254 SDPatternOperator InVecMaskNode> {
9255 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9256 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9257 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9258 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9259 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9262 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9263 SDPatternOperator MaskNode,
9264 X86FoldableSchedWrite sched, PatFrag StoreNode,
9265 PatFrag MaskedStoreNode, SDNode InVecNode,
9266 SDPatternOperator InVecMaskNode> {
9267 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9268 InVecMaskNode, MaskNode, MaskNode, sched,
9269 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9270 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9271 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9274 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9275 SDPatternOperator MaskNode,
9276 X86FoldableSchedWrite sched, PatFrag StoreNode,
9277 PatFrag MaskedStoreNode, SDNode InVecNode,
9278 SDPatternOperator InVecMaskNode> {
9279 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9280 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9281 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9282 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9283 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9286 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9287 SDPatternOperator MaskNode,
9288 X86FoldableSchedWrite sched, PatFrag StoreNode,
9289 PatFrag MaskedStoreNode, SDNode InVecNode,
9290 SDPatternOperator InVecMaskNode> {
9291 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9292 InVecMaskNode, MaskNode, MaskNode, sched,
9293 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9294 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9295 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9298 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9299 SDPatternOperator MaskNode,
9300 X86FoldableSchedWrite sched, PatFrag StoreNode,
9301 PatFrag MaskedStoreNode, SDNode InVecNode,
9302 SDPatternOperator InVecMaskNode> {
9303 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9304 InVecMaskNode, MaskNode, MaskNode, sched,
9305 avx512vl_i16_info, v16i8x_info, v16i8x_info,
9306 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9307 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9310 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
9311 WriteShuffle256, truncstorevi8,
9312 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9313 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
9314 WriteShuffle256, truncstore_s_vi8,
9315 masked_truncstore_s_vi8, X86vtruncs,
9317 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9318 select_truncus, WriteShuffle256,
9319 truncstore_us_vi8, masked_truncstore_us_vi8,
9320 X86vtruncus, X86vmtruncus>;
9322 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9323 WriteShuffle256, truncstorevi16,
9324 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9325 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
9326 WriteShuffle256, truncstore_s_vi16,
9327 masked_truncstore_s_vi16, X86vtruncs,
9329 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9330 select_truncus, WriteShuffle256,
9331 truncstore_us_vi16, masked_truncstore_us_vi16,
9332 X86vtruncus, X86vmtruncus>;
9334 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9335 WriteShuffle256, truncstorevi32,
9336 masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9337 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
9338 WriteShuffle256, truncstore_s_vi32,
9339 masked_truncstore_s_vi32, X86vtruncs,
9341 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9342 select_truncus, WriteShuffle256,
9343 truncstore_us_vi32, masked_truncstore_us_vi32,
9344 X86vtruncus, X86vmtruncus>;
9346 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9347 WriteShuffle256, truncstorevi8,
9348 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9349 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9350 WriteShuffle256, truncstore_s_vi8,
9351 masked_truncstore_s_vi8, X86vtruncs,
9353 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
9354 select_truncus, WriteShuffle256,
9355 truncstore_us_vi8, masked_truncstore_us_vi8,
9356 X86vtruncus, X86vmtruncus>;
9358 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9359 WriteShuffle256, truncstorevi16,
9360 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9361 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9362 WriteShuffle256, truncstore_s_vi16,
9363 masked_truncstore_s_vi16, X86vtruncs,
9365 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9366 select_truncus, WriteShuffle256,
9367 truncstore_us_vi16, masked_truncstore_us_vi16,
9368 X86vtruncus, X86vmtruncus>;
9370 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9371 WriteShuffle256, truncstorevi8,
9372 masked_truncstorevi8, X86vtrunc,
9374 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9375 WriteShuffle256, truncstore_s_vi8,
9376 masked_truncstore_s_vi8, X86vtruncs,
9378 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9379 select_truncus, WriteShuffle256,
9380 truncstore_us_vi8, masked_truncstore_us_vi8,
9381 X86vtruncus, X86vmtruncus>;
9383 let Predicates = [HasAVX512, NoVLX] in {
9384 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9385 (v8i16 (EXTRACT_SUBREG
9386 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9387 VR256X:$src, sub_ymm)))), sub_xmm))>;
9388 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9389 (v4i32 (EXTRACT_SUBREG
9390 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9391 VR256X:$src, sub_ymm)))), sub_xmm))>;
9394 let Predicates = [HasBWI, NoVLX] in {
9395 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9396 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9397 VR256X:$src, sub_ymm))), sub_xmm))>;
9400 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9401 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9402 X86VectorVTInfo DestInfo,
9403 X86VectorVTInfo SrcInfo> {
9404 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9406 SrcInfo.KRCWM:$mask)),
9407 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9408 SrcInfo.KRCWM:$mask,
9411 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9412 DestInfo.ImmAllZerosV,
9413 SrcInfo.KRCWM:$mask)),
9414 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9418 let Predicates = [HasVLX] in {
9419 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9420 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9421 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9424 let Predicates = [HasAVX512] in {
9425 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9426 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9427 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9429 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9430 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9431 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9433 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9434 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9435 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9438 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9439 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9440 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9441 let ExeDomain = DestInfo.ExeDomain in {
9442 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9443 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9444 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9445 EVEX, Sched<[sched]>;
9447 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9448 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9449 (DestInfo.VT (LdFrag addr:$src))>,
9450 EVEX, Sched<[sched.Folded]>;
9454 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9455 SDNode OpNode, SDNode InVecNode, string ExtTy,
9456 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9457 let Predicates = [HasVLX, HasBWI] in {
9458 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9459 v16i8x_info, i64mem, LdFrag, InVecNode>,
9460 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9462 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9463 v16i8x_info, i128mem, LdFrag, OpNode>,
9464 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9466 let Predicates = [HasBWI] in {
9467 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9468 v32i8x_info, i256mem, LdFrag, OpNode>,
9469 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9473 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9474 SDNode OpNode, SDNode InVecNode, string ExtTy,
9475 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9476 let Predicates = [HasVLX, HasAVX512] in {
9477 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9478 v16i8x_info, i32mem, LdFrag, InVecNode>,
9479 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9481 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9482 v16i8x_info, i64mem, LdFrag, InVecNode>,
9483 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9485 let Predicates = [HasAVX512] in {
9486 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9487 v16i8x_info, i128mem, LdFrag, OpNode>,
9488 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9492 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9493 SDNode OpNode, SDNode InVecNode, string ExtTy,
9494 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9495 let Predicates = [HasVLX, HasAVX512] in {
9496 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9497 v16i8x_info, i16mem, LdFrag, InVecNode>,
9498 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9500 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9501 v16i8x_info, i32mem, LdFrag, InVecNode>,
9502 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9504 let Predicates = [HasAVX512] in {
9505 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9506 v16i8x_info, i64mem, LdFrag, InVecNode>,
9507 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9511 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9512 SDNode OpNode, SDNode InVecNode, string ExtTy,
9513 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9514 let Predicates = [HasVLX, HasAVX512] in {
9515 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9516 v8i16x_info, i64mem, LdFrag, InVecNode>,
9517 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9519 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9520 v8i16x_info, i128mem, LdFrag, OpNode>,
9521 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9523 let Predicates = [HasAVX512] in {
9524 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9525 v16i16x_info, i256mem, LdFrag, OpNode>,
9526 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9530 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9531 SDNode OpNode, SDNode InVecNode, string ExtTy,
9532 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9533 let Predicates = [HasVLX, HasAVX512] in {
9534 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9535 v8i16x_info, i32mem, LdFrag, InVecNode>,
9536 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9538 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9539 v8i16x_info, i64mem, LdFrag, InVecNode>,
9540 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9542 let Predicates = [HasAVX512] in {
9543 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9544 v8i16x_info, i128mem, LdFrag, OpNode>,
9545 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9549 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9550 SDNode OpNode, SDNode InVecNode, string ExtTy,
9551 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9553 let Predicates = [HasVLX, HasAVX512] in {
9554 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9555 v4i32x_info, i64mem, LdFrag, InVecNode>,
9556 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9558 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9559 v4i32x_info, i128mem, LdFrag, OpNode>,
9560 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9562 let Predicates = [HasAVX512] in {
9563 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9564 v8i32x_info, i256mem, LdFrag, OpNode>,
9565 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9569 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9570 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9571 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9572 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9573 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9574 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9576 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9577 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9578 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9579 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9580 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9581 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9584 // Patterns that we also need any extend versions of. aext_vector_inreg
9585 // is currently legalized to zext_vector_inreg.
9586 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9588 let Predicates = [HasVLX, HasBWI] in {
9589 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9590 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9593 let Predicates = [HasVLX] in {
9594 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9595 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9597 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9598 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9602 let Predicates = [HasBWI] in {
9603 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9604 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9606 let Predicates = [HasAVX512] in {
9607 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9608 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9609 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9610 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9612 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9613 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9615 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9616 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9620 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9622 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9624 let Predicates = [HasVLX, HasBWI] in {
9625 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9626 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9627 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9628 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9629 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9630 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9632 let Predicates = [HasVLX] in {
9633 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9634 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9635 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
9636 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9638 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9639 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9641 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9642 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9643 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9644 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9645 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9646 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9648 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9649 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9650 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v4i32 addr:$src)))),
9651 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9653 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9654 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9655 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9656 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9657 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
9658 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9660 let Predicates = [HasVLX] in {
9661 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9662 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9663 def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9664 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9666 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9667 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9668 def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
9669 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9671 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9672 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9673 def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9674 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9677 let Predicates = [HasAVX512] in {
9678 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9679 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9683 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9684 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9686 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9687 // ext+trunc aggresively making it impossible to legalize the DAG to this
9688 // pattern directly.
9689 let Predicates = [HasAVX512, NoBWI] in {
9690 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9691 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9692 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9693 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9696 //===----------------------------------------------------------------------===//
9697 // GATHER - SCATTER Operations
9699 // FIXME: Improve scheduling of gather/scatter instructions.
9700 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9701 X86MemOperand memop, PatFrag GatherNode,
9702 RegisterClass MaskRC = _.KRCWM> {
9703 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9704 ExeDomain = _.ExeDomain in
9705 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9706 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9707 !strconcat(OpcodeStr#_.Suffix,
9708 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9709 [(set _.RC:$dst, MaskRC:$mask_wb,
9710 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
9711 vectoraddr:$src2))]>, EVEX, EVEX_K,
9712 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9715 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9716 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9717 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9718 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9719 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9720 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
9721 let Predicates = [HasVLX] in {
9722 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9723 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9724 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9725 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9726 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9727 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9728 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9729 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9733 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9734 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9735 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9736 mgatherv16i32>, EVEX_V512;
9737 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9738 mgatherv8i64>, EVEX_V512;
9739 let Predicates = [HasVLX] in {
9740 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9741 vy256xmem, mgatherv8i32>, EVEX_V256;
9742 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9743 vy128xmem, mgatherv4i64>, EVEX_V256;
9744 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9745 vx128xmem, mgatherv4i32>, EVEX_V128;
9746 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9747 vx64xmem, mgatherv2i64, VK2WM>,
9753 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9754 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9756 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9757 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9759 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9760 X86MemOperand memop, PatFrag ScatterNode,
9761 RegisterClass MaskRC = _.KRCWM> {
9763 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9765 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9766 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9767 !strconcat(OpcodeStr#_.Suffix,
9768 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9769 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9770 MaskRC:$mask, vectoraddr:$dst))]>,
9771 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9772 Sched<[WriteStore]>;
9775 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9776 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9777 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9778 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9779 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9780 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
9781 let Predicates = [HasVLX] in {
9782 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9783 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9784 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9785 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9786 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9787 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9788 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9789 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9793 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9794 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9795 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9796 mscatterv16i32>, EVEX_V512;
9797 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9798 mscatterv8i64>, EVEX_V512;
9799 let Predicates = [HasVLX] in {
9800 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9801 vy256xmem, mscatterv8i32>, EVEX_V256;
9802 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9803 vy128xmem, mscatterv4i64>, EVEX_V256;
9804 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9805 vx128xmem, mscatterv4i32>, EVEX_V128;
9806 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9807 vx64xmem, mscatterv2i64, VK2WM>,
9812 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9813 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9815 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9816 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9819 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9820 RegisterClass KRC, X86MemOperand memop> {
9821 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9822 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9823 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9824 EVEX, EVEX_K, Sched<[WriteLoad]>;
9827 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9828 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9830 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9831 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9833 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9834 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9836 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9837 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9839 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9840 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9842 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9843 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9845 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9846 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9848 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9849 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9851 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9852 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9854 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9855 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9857 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9858 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9860 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9861 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9863 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9864 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9866 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9867 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9869 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9870 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9872 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9873 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9875 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9876 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9877 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9878 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9879 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9881 // Also need a pattern for anyextend.
9882 def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
9883 (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
9886 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9887 string OpcodeStr, Predicate prd> {
9888 let Predicates = [prd] in
9889 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9891 let Predicates = [prd, HasVLX] in {
9892 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9893 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9897 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9898 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9899 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9900 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9902 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9903 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9904 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9905 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9906 EVEX, Sched<[WriteMove]>;
9909 // Use 512bit version to implement 128/256 bit in case NoVLX.
9910 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9914 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9915 (_.KVT (COPY_TO_REGCLASS
9916 (!cast<Instruction>(Name#"Zrr")
9917 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9918 _.RC:$src, _.SubRegIdx)),
9922 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9923 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9924 let Predicates = [prd] in
9925 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9928 let Predicates = [prd, HasVLX] in {
9929 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9931 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9934 let Predicates = [prd, NoVLX] in {
9935 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9936 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9940 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9941 avx512vl_i8_info, HasBWI>;
9942 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9943 avx512vl_i16_info, HasBWI>, VEX_W;
9944 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9945 avx512vl_i32_info, HasDQI>;
9946 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9947 avx512vl_i64_info, HasDQI>, VEX_W;
9949 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9950 // is available, but BWI is not. We can't handle this in lowering because
9951 // a target independent DAG combine likes to combine sext and trunc.
9952 let Predicates = [HasDQI, NoBWI] in {
9953 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9954 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9955 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9956 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9958 def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
9959 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9960 def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
9961 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9964 let Predicates = [HasDQI, NoBWI, HasVLX] in {
9965 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9966 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9968 def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
9969 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9972 //===----------------------------------------------------------------------===//
9973 // AVX-512 - COMPRESS and EXPAND
9976 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9977 string OpcodeStr, X86FoldableSchedWrite sched> {
9978 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9979 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9980 (null_frag)>, AVX5128IBase,
9983 let mayStore = 1, hasSideEffects = 0 in
9984 def mr : AVX5128I<opc, MRMDestMem, (outs),
9985 (ins _.MemOp:$dst, _.RC:$src),
9986 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9987 []>, EVEX_CD8<_.EltSize, CD8VT1>,
9988 Sched<[sched.Folded]>;
9990 def mrk : AVX5128I<opc, MRMDestMem, (outs),
9991 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9992 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9994 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9995 Sched<[sched.Folded]>;
9998 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9999 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10000 (!cast<Instruction>(Name#_.ZSuffix##mrk)
10001 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10003 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10004 (!cast<Instruction>(Name#_.ZSuffix##rrk)
10005 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10006 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10007 (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10008 _.KRCWM:$mask, _.RC:$src)>;
10011 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10012 X86FoldableSchedWrite sched,
10013 AVX512VLVectorVTInfo VTInfo,
10014 Predicate Pred = HasAVX512> {
10015 let Predicates = [Pred] in
10016 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10017 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10019 let Predicates = [Pred, HasVLX] in {
10020 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10021 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10022 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10023 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10027 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10028 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10029 avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10030 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10031 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10032 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10033 avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10034 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10035 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10038 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10039 string OpcodeStr, X86FoldableSchedWrite sched> {
10040 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10041 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10042 (null_frag)>, AVX5128IBase,
10045 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10046 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10048 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10049 Sched<[sched.Folded, sched.ReadAfterFold]>;
10052 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10054 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10055 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10056 _.KRCWM:$mask, addr:$src)>;
10058 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10059 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10060 _.KRCWM:$mask, addr:$src)>;
10062 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10063 (_.VT _.RC:$src0))),
10064 (!cast<Instruction>(Name#_.ZSuffix##rmk)
10065 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10067 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10068 (!cast<Instruction>(Name#_.ZSuffix##rrk)
10069 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10070 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10071 (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10072 _.KRCWM:$mask, _.RC:$src)>;
10075 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10076 X86FoldableSchedWrite sched,
10077 AVX512VLVectorVTInfo VTInfo,
10078 Predicate Pred = HasAVX512> {
10079 let Predicates = [Pred] in
10080 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10081 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10083 let Predicates = [Pred, HasVLX] in {
10084 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10085 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10086 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10087 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10091 // FIXME: Is there a better scheduler class for VPEXPAND?
10092 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10093 avx512vl_i32_info>, EVEX;
10094 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10095 avx512vl_i64_info>, EVEX, VEX_W;
10096 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10097 avx512vl_f32_info>, EVEX;
10098 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10099 avx512vl_f64_info>, EVEX, VEX_W;
10101 //handle instruction reg_vec1 = op(reg_vec,imm)
10103 // op(broadcast(eltVt),imm)
10104 //all instruction created with FROUND_CURRENT
10105 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10106 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10107 let ExeDomain = _.ExeDomain in {
10108 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10109 (ins _.RC:$src1, i32u8imm:$src2),
10110 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10111 (OpNode (_.VT _.RC:$src1),
10112 (i32 imm:$src2))>, Sched<[sched]>;
10113 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10114 (ins _.MemOp:$src1, i32u8imm:$src2),
10115 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10116 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10118 Sched<[sched.Folded, sched.ReadAfterFold]>;
10119 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10120 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10121 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10122 "${src1}"##_.BroadcastStr##", $src2",
10123 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
10124 (i32 imm:$src2))>, EVEX_B,
10125 Sched<[sched.Folded, sched.ReadAfterFold]>;
10129 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10130 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10131 SDNode OpNode, X86FoldableSchedWrite sched,
10132 X86VectorVTInfo _> {
10133 let ExeDomain = _.ExeDomain in
10134 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10135 (ins _.RC:$src1, i32u8imm:$src2),
10136 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10137 "$src1, {sae}, $src2",
10138 (OpNode (_.VT _.RC:$src1),
10140 EVEX_B, Sched<[sched]>;
10143 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10144 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10145 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10146 let Predicates = [prd] in {
10147 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10149 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10150 sched.ZMM, _.info512>, EVEX_V512;
10152 let Predicates = [prd, HasVLX] in {
10153 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10154 _.info128>, EVEX_V128;
10155 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10156 _.info256>, EVEX_V256;
10160 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10161 // op(reg_vec2,mem_vec,imm)
10162 // op(reg_vec2,broadcast(eltVt),imm)
10163 //all instruction created with FROUND_CURRENT
10164 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10165 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10166 let ExeDomain = _.ExeDomain in {
10167 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10168 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10169 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10170 (OpNode (_.VT _.RC:$src1),
10174 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10175 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10176 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10177 (OpNode (_.VT _.RC:$src1),
10178 (_.VT (bitconvert (_.LdFrag addr:$src2))),
10180 Sched<[sched.Folded, sched.ReadAfterFold]>;
10181 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10182 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10183 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10184 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10185 (OpNode (_.VT _.RC:$src1),
10186 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10187 (i32 imm:$src3))>, EVEX_B,
10188 Sched<[sched.Folded, sched.ReadAfterFold]>;
10192 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10193 // op(reg_vec2,mem_vec,imm)
10194 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10195 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10196 X86VectorVTInfo SrcInfo>{
10197 let ExeDomain = DestInfo.ExeDomain in {
10198 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10199 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10200 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10201 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10202 (SrcInfo.VT SrcInfo.RC:$src2),
10205 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10206 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10207 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10208 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10209 (SrcInfo.VT (bitconvert
10210 (SrcInfo.LdFrag addr:$src2))),
10212 Sched<[sched.Folded, sched.ReadAfterFold]>;
10216 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10217 // op(reg_vec2,mem_vec,imm)
10218 // op(reg_vec2,broadcast(eltVt),imm)
10219 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10220 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10221 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10223 let ExeDomain = _.ExeDomain in
10224 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10225 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10226 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10227 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10228 (OpNode (_.VT _.RC:$src1),
10229 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10230 (i8 imm:$src3))>, EVEX_B,
10231 Sched<[sched.Folded, sched.ReadAfterFold]>;
10234 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10235 // op(reg_vec2,mem_scalar,imm)
10236 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10237 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10238 let ExeDomain = _.ExeDomain in {
10239 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10240 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10241 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10242 (OpNode (_.VT _.RC:$src1),
10246 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10247 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10248 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10249 (OpNode (_.VT _.RC:$src1),
10250 (_.VT (scalar_to_vector
10251 (_.ScalarLdFrag addr:$src2))),
10253 Sched<[sched.Folded, sched.ReadAfterFold]>;
10257 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10258 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10259 SDNode OpNode, X86FoldableSchedWrite sched,
10260 X86VectorVTInfo _> {
10261 let ExeDomain = _.ExeDomain in
10262 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10263 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10264 OpcodeStr, "$src3, {sae}, $src2, $src1",
10265 "$src1, $src2, {sae}, $src3",
10266 (OpNode (_.VT _.RC:$src1),
10269 EVEX_B, Sched<[sched]>;
10272 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10273 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10274 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10275 let ExeDomain = _.ExeDomain in
10276 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10277 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10278 OpcodeStr, "$src3, {sae}, $src2, $src1",
10279 "$src1, $src2, {sae}, $src3",
10280 (OpNode (_.VT _.RC:$src1),
10283 EVEX_B, Sched<[sched]>;
10286 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10287 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10288 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10289 let Predicates = [prd] in {
10290 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10291 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10295 let Predicates = [prd, HasVLX] in {
10296 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10298 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10303 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10304 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10305 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10306 let Predicates = [Pred] in {
10307 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10308 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10310 let Predicates = [Pred, HasVLX] in {
10311 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10312 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10313 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10314 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10318 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10319 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10320 Predicate Pred = HasAVX512> {
10321 let Predicates = [Pred] in {
10322 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10325 let Predicates = [Pred, HasVLX] in {
10326 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10328 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10333 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10334 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10335 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10336 let Predicates = [prd] in {
10337 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10338 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10342 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10343 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10344 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10345 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10346 opcPs, OpNode, OpNodeSAE, sched, prd>,
10347 EVEX_CD8<32, CD8VF>;
10348 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10349 opcPd, OpNode, OpNodeSAE, sched, prd>,
10350 EVEX_CD8<64, CD8VF>, VEX_W;
10353 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10354 X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
10355 AVX512AIi8Base, EVEX;
10356 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10357 X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
10358 AVX512AIi8Base, EVEX;
10359 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10360 X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
10361 AVX512AIi8Base, EVEX;
10363 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10364 0x50, X86VRange, X86VRangeSAE,
10365 SchedWriteFAdd, HasDQI>,
10366 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10367 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10368 0x50, X86VRange, X86VRangeSAE,
10369 SchedWriteFAdd, HasDQI>,
10370 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10372 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10373 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10374 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10375 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10376 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10377 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10379 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10380 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10381 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10382 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10383 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10384 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10386 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10387 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10388 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10389 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10390 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10391 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10393 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10394 X86FoldableSchedWrite sched,
10396 X86VectorVTInfo CastInfo,
10397 string EVEX2VEXOvrd> {
10398 let ExeDomain = _.ExeDomain in {
10399 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10400 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10401 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10403 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10404 (i8 imm:$src3)))))>,
10405 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10406 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10407 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10408 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10411 (CastInfo.VT (X86Shuf128 _.RC:$src1,
10412 (CastInfo.LdFrag addr:$src2),
10413 (i8 imm:$src3)))))>,
10414 Sched<[sched.Folded, sched.ReadAfterFold]>,
10415 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10416 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10417 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10418 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10419 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10423 (X86Shuf128 _.RC:$src1,
10424 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
10425 (i8 imm:$src3)))))>, EVEX_B,
10426 Sched<[sched.Folded, sched.ReadAfterFold]>;
10430 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10431 AVX512VLVectorVTInfo _,
10432 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10433 string EVEX2VEXOvrd>{
10434 let Predicates = [HasAVX512] in
10435 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10436 _.info512, CastInfo.info512, "">, EVEX_V512;
10438 let Predicates = [HasAVX512, HasVLX] in
10439 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10440 _.info256, CastInfo.info256,
10441 EVEX2VEXOvrd>, EVEX_V256;
10444 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10445 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10446 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10447 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10448 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10449 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10450 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10451 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10453 let Predicates = [HasAVX512] in {
10454 // Provide fallback in case the load node that is used in the broadcast
10455 // patterns above is used by additional users, which prevents the pattern
10457 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10458 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10459 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10461 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10462 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10463 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10466 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10467 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10468 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10470 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10471 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10472 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10475 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10476 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10477 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10480 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10481 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10482 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10486 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10487 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10488 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10489 // instantiation of this class.
10490 let ExeDomain = _.ExeDomain in {
10491 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10492 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10493 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10494 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
10495 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10496 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10497 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10498 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10499 (_.VT (X86VAlign _.RC:$src1,
10500 (bitconvert (_.LdFrag addr:$src2)),
10502 Sched<[sched.Folded, sched.ReadAfterFold]>,
10503 EVEX2VEXOverride<"VPALIGNRrmi">;
10505 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10506 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10507 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10508 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10509 (X86VAlign _.RC:$src1,
10510 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10511 (i8 imm:$src3))>, EVEX_B,
10512 Sched<[sched.Folded, sched.ReadAfterFold]>;
10516 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10517 AVX512VLVectorVTInfo _> {
10518 let Predicates = [HasAVX512] in {
10519 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10520 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10522 let Predicates = [HasAVX512, HasVLX] in {
10523 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10524 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10525 // We can't really override the 256-bit version so change it back to unset.
10526 let EVEX2VEXOverride = ? in
10527 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10528 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10532 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10533 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10534 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10535 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10538 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10539 SchedWriteShuffle, avx512vl_i8_info,
10540 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10542 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10544 def ValignqImm32XForm : SDNodeXForm<imm, [{
10545 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10547 def ValignqImm8XForm : SDNodeXForm<imm, [{
10548 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10550 def ValigndImm8XForm : SDNodeXForm<imm, [{
10551 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10554 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10555 X86VectorVTInfo From, X86VectorVTInfo To,
10556 SDNodeXForm ImmXForm> {
10557 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10559 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10562 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10563 To.RC:$src1, To.RC:$src2,
10564 (ImmXForm imm:$src3))>;
10566 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10568 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10571 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10572 To.RC:$src1, To.RC:$src2,
10573 (ImmXForm imm:$src3))>;
10575 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10577 (From.VT (OpNode From.RC:$src1,
10578 (From.LdFrag addr:$src2),
10581 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10582 To.RC:$src1, addr:$src2,
10583 (ImmXForm imm:$src3))>;
10585 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10587 (From.VT (OpNode From.RC:$src1,
10588 (From.LdFrag addr:$src2),
10591 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10592 To.RC:$src1, addr:$src2,
10593 (ImmXForm imm:$src3))>;
10596 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10597 X86VectorVTInfo From,
10598 X86VectorVTInfo To,
10599 SDNodeXForm ImmXForm> :
10600 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10601 def : Pat<(From.VT (OpNode From.RC:$src1,
10602 (bitconvert (To.VT (X86VBroadcast
10603 (To.ScalarLdFrag addr:$src2)))),
10605 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10606 (ImmXForm imm:$src3))>;
10608 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10610 (From.VT (OpNode From.RC:$src1,
10612 (To.VT (X86VBroadcast
10613 (To.ScalarLdFrag addr:$src2)))),
10616 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10617 To.RC:$src1, addr:$src2,
10618 (ImmXForm imm:$src3))>;
10620 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10622 (From.VT (OpNode From.RC:$src1,
10624 (To.VT (X86VBroadcast
10625 (To.ScalarLdFrag addr:$src2)))),
10628 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10629 To.RC:$src1, addr:$src2,
10630 (ImmXForm imm:$src3))>;
10633 let Predicates = [HasAVX512] in {
10634 // For 512-bit we lower to the widest element type we can. So we only need
10635 // to handle converting valignq to valignd.
10636 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10637 v16i32_info, ValignqImm32XForm>;
10640 let Predicates = [HasVLX] in {
10641 // For 128-bit we lower to the widest element type we can. So we only need
10642 // to handle converting valignq to valignd.
10643 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10644 v4i32x_info, ValignqImm32XForm>;
10645 // For 256-bit we lower to the widest element type we can. So we only need
10646 // to handle converting valignq to valignd.
10647 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10648 v8i32x_info, ValignqImm32XForm>;
10651 let Predicates = [HasVLX, HasBWI] in {
10652 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10653 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10654 v16i8x_info, ValignqImm8XForm>;
10655 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10656 v16i8x_info, ValigndImm8XForm>;
10659 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10660 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10661 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10663 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10664 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10665 let ExeDomain = _.ExeDomain in {
10666 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10667 (ins _.RC:$src1), OpcodeStr,
10669 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
10672 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10673 (ins _.MemOp:$src1), OpcodeStr,
10675 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
10676 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10677 Sched<[sched.Folded]>;
10681 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10682 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10683 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10684 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10685 (ins _.ScalarMemOp:$src1), OpcodeStr,
10686 "${src1}"##_.BroadcastStr,
10687 "${src1}"##_.BroadcastStr,
10688 (_.VT (OpNode (X86VBroadcast
10689 (_.ScalarLdFrag addr:$src1))))>,
10690 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10691 Sched<[sched.Folded]>;
10694 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10695 X86SchedWriteWidths sched,
10696 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10697 let Predicates = [prd] in
10698 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10701 let Predicates = [prd, HasVLX] in {
10702 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10704 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10709 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10710 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10712 let Predicates = [prd] in
10713 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10716 let Predicates = [prd, HasVLX] in {
10717 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10719 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10724 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10725 SDNode OpNode, X86SchedWriteWidths sched,
10727 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10728 avx512vl_i64_info, prd>, VEX_W;
10729 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10730 avx512vl_i32_info, prd>;
10733 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10734 SDNode OpNode, X86SchedWriteWidths sched,
10736 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10737 avx512vl_i16_info, prd>, VEX_WIG;
10738 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10739 avx512vl_i8_info, prd>, VEX_WIG;
10742 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10743 bits<8> opc_d, bits<8> opc_q,
10744 string OpcodeStr, SDNode OpNode,
10745 X86SchedWriteWidths sched> {
10746 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10748 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10752 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10755 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10756 let Predicates = [HasAVX512, NoVLX] in {
10757 def : Pat<(v4i64 (abs VR256X:$src)),
10760 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10762 def : Pat<(v2i64 (abs VR128X:$src)),
10765 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10769 // Use 512bit version to implement 128/256 bit.
10770 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10771 AVX512VLVectorVTInfo _, Predicate prd> {
10772 let Predicates = [prd, NoVLX] in {
10773 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
10775 (!cast<Instruction>(InstrStr # "Zrr")
10776 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10777 _.info256.RC:$src1,
10778 _.info256.SubRegIdx)),
10779 _.info256.SubRegIdx)>;
10781 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
10783 (!cast<Instruction>(InstrStr # "Zrr")
10784 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10785 _.info128.RC:$src1,
10786 _.info128.SubRegIdx)),
10787 _.info128.SubRegIdx)>;
10791 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10792 SchedWriteVecIMul, HasCDI>;
10794 // FIXME: Is there a better scheduler class for VPCONFLICT?
10795 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10796 SchedWriteVecALU, HasCDI>;
10798 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10799 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10800 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10802 //===---------------------------------------------------------------------===//
10803 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10804 //===---------------------------------------------------------------------===//
10806 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10807 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10808 SchedWriteVecALU, HasVPOPCNTDQ>;
10810 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10811 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10813 //===---------------------------------------------------------------------===//
10814 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10815 //===---------------------------------------------------------------------===//
10817 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10818 X86SchedWriteWidths sched> {
10819 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10820 avx512vl_f32_info, HasAVX512>, XS;
10823 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10824 SchedWriteFShuffle>;
10825 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10826 SchedWriteFShuffle>;
10828 //===----------------------------------------------------------------------===//
10829 // AVX-512 - MOVDDUP
10830 //===----------------------------------------------------------------------===//
10832 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
10833 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10834 let ExeDomain = _.ExeDomain in {
10835 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10836 (ins _.RC:$src), OpcodeStr, "$src", "$src",
10837 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
10839 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10840 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10841 (_.VT (OpNode (_.VT (scalar_to_vector
10842 (_.ScalarLdFrag addr:$src)))))>,
10843 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10844 Sched<[sched.Folded]>;
10848 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10849 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10850 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10851 VTInfo.info512>, EVEX_V512;
10853 let Predicates = [HasAVX512, HasVLX] in {
10854 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10855 VTInfo.info256>, EVEX_V256;
10856 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
10857 VTInfo.info128>, EVEX_V128;
10861 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10862 X86SchedWriteWidths sched> {
10863 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10864 avx512vl_f64_info>, XD, VEX_W;
10867 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10869 let Predicates = [HasVLX] in {
10870 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
10871 (VMOVDDUPZ128rm addr:$src)>;
10872 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10873 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10874 def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10875 (VMOVDDUPZ128rm addr:$src)>;
10876 def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
10877 (VMOVDDUPZ128rm addr:$src)>;
10879 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10880 (v2f64 VR128X:$src0)),
10881 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10882 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10883 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10885 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10887 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10888 (v2f64 VR128X:$src0)),
10889 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10890 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10892 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10894 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10895 (v2f64 VR128X:$src0)),
10896 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10897 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10899 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10902 //===----------------------------------------------------------------------===//
10903 // AVX-512 - Unpack Instructions
10904 //===----------------------------------------------------------------------===//
10906 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10907 SchedWriteFShuffleSizes, 0, 1>;
10908 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10909 SchedWriteFShuffleSizes>;
10911 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10912 SchedWriteShuffle, HasBWI>;
10913 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10914 SchedWriteShuffle, HasBWI>;
10915 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10916 SchedWriteShuffle, HasBWI>;
10917 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10918 SchedWriteShuffle, HasBWI>;
10920 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10921 SchedWriteShuffle, HasAVX512>;
10922 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10923 SchedWriteShuffle, HasAVX512>;
10924 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10925 SchedWriteShuffle, HasAVX512>;
10926 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10927 SchedWriteShuffle, HasAVX512>;
10929 //===----------------------------------------------------------------------===//
10930 // AVX-512 - Extract & Insert Integer Instructions
10931 //===----------------------------------------------------------------------===//
10933 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10934 X86VectorVTInfo _> {
10935 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10936 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10937 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10938 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10940 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10943 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10944 let Predicates = [HasBWI] in {
10945 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10946 (ins _.RC:$src1, u8imm:$src2),
10947 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10948 [(set GR32orGR64:$dst,
10949 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10950 EVEX, TAPD, Sched<[WriteVecExtract]>;
10952 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10956 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10957 let Predicates = [HasBWI] in {
10958 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10959 (ins _.RC:$src1, u8imm:$src2),
10960 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10961 [(set GR32orGR64:$dst,
10962 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10963 EVEX, PD, Sched<[WriteVecExtract]>;
10965 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10966 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10967 (ins _.RC:$src1, u8imm:$src2),
10968 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10969 EVEX, TAPD, FoldGenData<NAME#rr>,
10970 Sched<[WriteVecExtract]>;
10972 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10976 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10977 RegisterClass GRC> {
10978 let Predicates = [HasDQI] in {
10979 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10980 (ins _.RC:$src1, u8imm:$src2),
10981 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10983 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10984 EVEX, TAPD, Sched<[WriteVecExtract]>;
10986 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10987 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10988 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10989 [(store (extractelt (_.VT _.RC:$src1),
10990 imm:$src2),addr:$dst)]>,
10991 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10992 Sched<[WriteVecExtractSt]>;
10996 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10997 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10998 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10999 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
11001 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11002 X86VectorVTInfo _, PatFrag LdFrag> {
11003 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11004 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11005 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11007 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
11008 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11011 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11012 X86VectorVTInfo _, PatFrag LdFrag> {
11013 let Predicates = [HasBWI] in {
11014 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11015 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11016 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11018 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
11019 Sched<[WriteVecInsert]>;
11021 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
11025 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11026 X86VectorVTInfo _, RegisterClass GRC> {
11027 let Predicates = [HasDQI] in {
11028 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11029 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11030 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11032 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11033 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11035 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11036 _.ScalarLdFrag>, TAPD;
11040 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11041 extloadi8>, TAPD, VEX_WIG;
11042 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11043 extloadi16>, PD, VEX_WIG;
11044 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11045 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11047 //===----------------------------------------------------------------------===//
11048 // VSHUFPS - VSHUFPD Operations
11049 //===----------------------------------------------------------------------===//
11051 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11052 AVX512VLVectorVTInfo VTInfo_FP>{
11053 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11054 SchedWriteFShuffle>,
11055 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11056 AVX512AIi8Base, EVEX_4V;
11059 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11060 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11062 //===----------------------------------------------------------------------===//
11063 // AVX-512 - Byte shift Left/Right
11064 //===----------------------------------------------------------------------===//
11066 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
11067 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11068 Format MRMm, string OpcodeStr,
11069 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11070 def rr : AVX512<opc, MRMr,
11071 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11072 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11073 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
11075 def rm : AVX512<opc, MRMm,
11076 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11077 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11078 [(set _.RC:$dst,(_.VT (OpNode
11079 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11080 (i8 imm:$src2))))]>,
11081 Sched<[sched.Folded, sched.ReadAfterFold]>;
11084 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11085 Format MRMm, string OpcodeStr,
11086 X86SchedWriteWidths sched, Predicate prd>{
11087 let Predicates = [prd] in
11088 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11089 sched.ZMM, v64i8_info>, EVEX_V512;
11090 let Predicates = [prd, HasVLX] in {
11091 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11092 sched.YMM, v32i8x_info>, EVEX_V256;
11093 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11094 sched.XMM, v16i8x_info>, EVEX_V128;
11097 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11098 SchedWriteShuffle, HasBWI>,
11099 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11100 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11101 SchedWriteShuffle, HasBWI>,
11102 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11104 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11105 string OpcodeStr, X86FoldableSchedWrite sched,
11106 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11107 def rr : AVX512BI<opc, MRMSrcReg,
11108 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11109 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11110 [(set _dst.RC:$dst,(_dst.VT
11111 (OpNode (_src.VT _src.RC:$src1),
11112 (_src.VT _src.RC:$src2))))]>,
11114 def rm : AVX512BI<opc, MRMSrcMem,
11115 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11116 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11117 [(set _dst.RC:$dst,(_dst.VT
11118 (OpNode (_src.VT _src.RC:$src1),
11119 (_src.VT (bitconvert
11120 (_src.LdFrag addr:$src2))))))]>,
11121 Sched<[sched.Folded, sched.ReadAfterFold]>;
11124 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11125 string OpcodeStr, X86SchedWriteWidths sched,
11127 let Predicates = [prd] in
11128 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11129 v8i64_info, v64i8_info>, EVEX_V512;
11130 let Predicates = [prd, HasVLX] in {
11131 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11132 v4i64x_info, v32i8x_info>, EVEX_V256;
11133 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11134 v2i64x_info, v16i8x_info>, EVEX_V128;
11138 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11139 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11141 // Transforms to swizzle an immediate to enable better matching when
11142 // memory operand isn't in the right place.
11143 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
11144 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11145 uint8_t Imm = N->getZExtValue();
11146 // Swap bits 1/4 and 3/6.
11147 uint8_t NewImm = Imm & 0xa5;
11148 if (Imm & 0x02) NewImm |= 0x10;
11149 if (Imm & 0x10) NewImm |= 0x02;
11150 if (Imm & 0x08) NewImm |= 0x40;
11151 if (Imm & 0x40) NewImm |= 0x08;
11152 return getI8Imm(NewImm, SDLoc(N));
11154 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
11155 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11156 uint8_t Imm = N->getZExtValue();
11157 // Swap bits 2/4 and 3/5.
11158 uint8_t NewImm = Imm & 0xc3;
11159 if (Imm & 0x04) NewImm |= 0x10;
11160 if (Imm & 0x10) NewImm |= 0x04;
11161 if (Imm & 0x08) NewImm |= 0x20;
11162 if (Imm & 0x20) NewImm |= 0x08;
11163 return getI8Imm(NewImm, SDLoc(N));
11165 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11166 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11167 uint8_t Imm = N->getZExtValue();
11168 // Swap bits 1/2 and 5/6.
11169 uint8_t NewImm = Imm & 0x99;
11170 if (Imm & 0x02) NewImm |= 0x04;
11171 if (Imm & 0x04) NewImm |= 0x02;
11172 if (Imm & 0x20) NewImm |= 0x40;
11173 if (Imm & 0x40) NewImm |= 0x20;
11174 return getI8Imm(NewImm, SDLoc(N));
11176 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11177 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11178 uint8_t Imm = N->getZExtValue();
11179 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11180 uint8_t NewImm = Imm & 0x81;
11181 if (Imm & 0x02) NewImm |= 0x04;
11182 if (Imm & 0x04) NewImm |= 0x10;
11183 if (Imm & 0x08) NewImm |= 0x40;
11184 if (Imm & 0x10) NewImm |= 0x02;
11185 if (Imm & 0x20) NewImm |= 0x08;
11186 if (Imm & 0x40) NewImm |= 0x20;
11187 return getI8Imm(NewImm, SDLoc(N));
11189 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11190 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11191 uint8_t Imm = N->getZExtValue();
11192 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11193 uint8_t NewImm = Imm & 0x81;
11194 if (Imm & 0x02) NewImm |= 0x10;
11195 if (Imm & 0x04) NewImm |= 0x02;
11196 if (Imm & 0x08) NewImm |= 0x20;
11197 if (Imm & 0x10) NewImm |= 0x04;
11198 if (Imm & 0x20) NewImm |= 0x40;
11199 if (Imm & 0x40) NewImm |= 0x08;
11200 return getI8Imm(NewImm, SDLoc(N));
11203 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11204 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11206 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11207 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11208 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11209 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11210 (OpNode (_.VT _.RC:$src1),
11213 (i8 imm:$src4)), 1, 1>,
11214 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11215 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11216 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11217 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11218 (OpNode (_.VT _.RC:$src1),
11220 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11221 (i8 imm:$src4)), 1, 0>,
11222 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11223 Sched<[sched.Folded, sched.ReadAfterFold]>;
11224 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11225 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11226 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11227 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11228 (OpNode (_.VT _.RC:$src1),
11230 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11231 (i8 imm:$src4)), 1, 0>, EVEX_B,
11232 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11233 Sched<[sched.Folded, sched.ReadAfterFold]>;
11234 }// Constraints = "$src1 = $dst"
11236 // Additional patterns for matching passthru operand in other positions.
11237 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11238 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11240 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11241 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11242 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11243 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11245 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11246 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11248 // Additional patterns for matching loads in other positions.
11249 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11250 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11251 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11252 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11253 def : Pat<(_.VT (OpNode _.RC:$src1,
11254 (bitconvert (_.LdFrag addr:$src3)),
11255 _.RC:$src2, (i8 imm:$src4))),
11256 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11257 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11259 // Additional patterns for matching zero masking with loads in other
11261 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11262 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11263 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11265 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11266 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11267 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11268 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11269 _.RC:$src2, (i8 imm:$src4)),
11271 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11272 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11274 // Additional patterns for matching masked loads with different
11276 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11277 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11278 _.RC:$src2, (i8 imm:$src4)),
11280 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11281 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11282 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11283 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11284 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11286 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11287 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11288 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11289 (OpNode _.RC:$src2, _.RC:$src1,
11290 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11292 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11293 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11294 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11295 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11296 _.RC:$src1, (i8 imm:$src4)),
11298 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11299 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11300 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11301 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11302 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11304 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11305 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11307 // Additional patterns for matching broadcasts in other positions.
11308 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11309 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11310 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11311 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11312 def : Pat<(_.VT (OpNode _.RC:$src1,
11313 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11314 _.RC:$src2, (i8 imm:$src4))),
11315 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11316 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11318 // Additional patterns for matching zero masking with broadcasts in other
11320 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11321 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11322 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11324 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11325 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11326 (VPTERNLOG321_imm8 imm:$src4))>;
11327 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11328 (OpNode _.RC:$src1,
11329 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11330 _.RC:$src2, (i8 imm:$src4)),
11332 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11333 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11334 (VPTERNLOG132_imm8 imm:$src4))>;
11336 // Additional patterns for matching masked broadcasts with different
11338 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11339 (OpNode _.RC:$src1,
11340 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11341 _.RC:$src2, (i8 imm:$src4)),
11343 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11344 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11345 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11346 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11347 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11349 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11350 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11351 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11352 (OpNode _.RC:$src2, _.RC:$src1,
11353 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11354 (i8 imm:$src4)), _.RC:$src1)),
11355 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11356 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11357 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11358 (OpNode _.RC:$src2,
11359 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11360 _.RC:$src1, (i8 imm:$src4)),
11362 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11363 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11364 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11365 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11366 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11368 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11369 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11372 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11373 AVX512VLVectorVTInfo _> {
11374 let Predicates = [HasAVX512] in
11375 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11376 _.info512, NAME>, EVEX_V512;
11377 let Predicates = [HasAVX512, HasVLX] in {
11378 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11379 _.info128, NAME>, EVEX_V128;
11380 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11381 _.info256, NAME>, EVEX_V256;
11385 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11386 avx512vl_i32_info>;
11387 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11388 avx512vl_i64_info>, VEX_W;
11390 // Patterns to implement vnot using vpternlog instead of creating all ones
11391 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11392 // so that the result is only dependent on src0. But we use the same source
11393 // for all operands to prevent a false dependency.
11394 // TODO: We should maybe have a more generalized algorithm for folding to
11396 let Predicates = [HasAVX512] in {
11397 def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11398 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11399 def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11400 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11401 def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11402 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11403 def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11404 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11407 let Predicates = [HasAVX512, NoVLX] in {
11408 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11411 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11412 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11413 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11414 (i8 15)), sub_xmm)>;
11415 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11418 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11419 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11420 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11421 (i8 15)), sub_xmm)>;
11422 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11425 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11426 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11427 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11428 (i8 15)), sub_xmm)>;
11429 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11432 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11433 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11434 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11435 (i8 15)), sub_xmm)>;
11437 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11440 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11441 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11442 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11443 (i8 15)), sub_ymm)>;
11444 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11447 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11448 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11449 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11450 (i8 15)), sub_ymm)>;
11451 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11454 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11455 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11456 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11457 (i8 15)), sub_ymm)>;
11458 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11461 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11462 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11463 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11464 (i8 15)), sub_ymm)>;
11467 let Predicates = [HasVLX] in {
11468 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11469 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11470 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11471 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11472 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11473 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11474 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11475 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11477 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11478 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11479 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11480 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11481 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11482 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11483 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11484 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11487 //===----------------------------------------------------------------------===//
11488 // AVX-512 - FixupImm
11489 //===----------------------------------------------------------------------===//
11491 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11492 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11493 X86VectorVTInfo TblVT>{
11494 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11495 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11496 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11497 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11498 (X86VFixupimm (_.VT _.RC:$src1),
11500 (TblVT.VT _.RC:$src3),
11501 (i32 imm:$src4))>, Sched<[sched]>;
11502 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11503 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11504 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11505 (X86VFixupimm (_.VT _.RC:$src1),
11507 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11509 Sched<[sched.Folded, sched.ReadAfterFold]>;
11510 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11511 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11512 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11513 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11514 (X86VFixupimm (_.VT _.RC:$src1),
11516 (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
11518 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11519 } // Constraints = "$src1 = $dst"
11522 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11523 X86FoldableSchedWrite sched,
11524 X86VectorVTInfo _, X86VectorVTInfo TblVT>
11525 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11526 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11527 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11528 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11529 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11530 "$src2, $src3, {sae}, $src4",
11531 (X86VFixupimmSAE (_.VT _.RC:$src1),
11533 (TblVT.VT _.RC:$src3),
11535 EVEX_B, Sched<[sched]>;
11539 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11540 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11541 X86VectorVTInfo _src3VT> {
11542 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11543 ExeDomain = _.ExeDomain in {
11544 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11545 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11546 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11547 (X86VFixupimms (_.VT _.RC:$src1),
11549 (_src3VT.VT _src3VT.RC:$src3),
11550 (i32 imm:$src4))>, Sched<[sched]>;
11551 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11552 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11553 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11554 "$src2, $src3, {sae}, $src4",
11555 (X86VFixupimmSAEs (_.VT _.RC:$src1),
11557 (_src3VT.VT _src3VT.RC:$src3),
11559 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11560 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11561 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11562 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11563 (X86VFixupimms (_.VT _.RC:$src1),
11565 (_src3VT.VT (scalar_to_vector
11566 (_src3VT.ScalarLdFrag addr:$src3))),
11568 Sched<[sched.Folded, sched.ReadAfterFold]>;
11572 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11573 AVX512VLVectorVTInfo _Vec,
11574 AVX512VLVectorVTInfo _Tbl> {
11575 let Predicates = [HasAVX512] in
11576 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11577 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11578 EVEX_4V, EVEX_V512;
11579 let Predicates = [HasAVX512, HasVLX] in {
11580 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11581 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11582 EVEX_4V, EVEX_V128;
11583 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11584 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11585 EVEX_4V, EVEX_V256;
11589 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11590 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11591 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11592 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11593 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11594 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11595 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11596 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11597 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11598 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11600 // Patterns used to select SSE scalar fp arithmetic instructions from
11603 // (1) a scalar fp operation followed by a blend
11605 // The effect is that the backend no longer emits unnecessary vector
11606 // insert instructions immediately after SSE scalar fp instructions
11607 // like addss or mulss.
11609 // For example, given the following code:
11610 // __m128 foo(__m128 A, __m128 B) {
11615 // Previously we generated:
11616 // addss %xmm0, %xmm1
11617 // movss %xmm1, %xmm0
11619 // We now generate:
11620 // addss %xmm1, %xmm0
11622 // (2) a vector packed single/double fp operation followed by a vector insert
11624 // The effect is that the backend converts the packed fp instruction
11625 // followed by a vector insert into a single SSE scalar fp instruction.
11627 // For example, given the following code:
11628 // __m128 foo(__m128 A, __m128 B) {
11629 // __m128 C = A + B;
11630 // return (__m128) {c[0], a[1], a[2], a[3]};
11633 // Previously we generated:
11634 // addps %xmm0, %xmm1
11635 // movss %xmm1, %xmm0
11637 // We now generate:
11638 // addss %xmm1, %xmm0
11640 // TODO: Some canonicalization in lowering would simplify the number of
11641 // patterns we have to try to match.
11642 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11643 X86VectorVTInfo _, PatLeaf ZeroFP> {
11644 let Predicates = [HasAVX512] in {
11645 // extracted scalar math op with insert via movss
11646 def : Pat<(MoveNode
11647 (_.VT VR128X:$dst),
11648 (_.VT (scalar_to_vector
11649 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11651 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11652 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11653 def : Pat<(MoveNode
11654 (_.VT VR128X:$dst),
11655 (_.VT (scalar_to_vector
11656 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11657 (_.ScalarLdFrag addr:$src))))),
11658 (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11660 // extracted masked scalar math op with insert via movss
11661 def : Pat<(MoveNode (_.VT VR128X:$src1),
11663 (X86selects VK1WM:$mask,
11665 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11668 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11669 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11670 VK1WM:$mask, _.VT:$src1,
11671 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11672 def : Pat<(MoveNode (_.VT VR128X:$src1),
11674 (X86selects VK1WM:$mask,
11676 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11677 (_.ScalarLdFrag addr:$src2)),
11679 (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11680 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11681 VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11683 // extracted masked scalar math op with insert via movss
11684 def : Pat<(MoveNode (_.VT VR128X:$src1),
11686 (X86selects VK1WM:$mask,
11688 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11689 _.FRC:$src2), (_.EltVT ZeroFP)))),
11690 (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11691 VK1WM:$mask, _.VT:$src1,
11692 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11693 def : Pat<(MoveNode (_.VT VR128X:$src1),
11695 (X86selects VK1WM:$mask,
11697 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11698 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11699 (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11703 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11704 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11705 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11706 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11708 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11709 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11710 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11711 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11713 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11714 SDNode Move, X86VectorVTInfo _> {
11715 let Predicates = [HasAVX512] in {
11716 def : Pat<(_.VT (Move _.VT:$dst,
11717 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11718 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11722 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11723 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11725 //===----------------------------------------------------------------------===//
11726 // AES instructions
11727 //===----------------------------------------------------------------------===//
11729 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11730 let Predicates = [HasVLX, HasVAES] in {
11731 defm Z128 : AESI_binop_rm_int<Op, OpStr,
11732 !cast<Intrinsic>(IntPrefix),
11733 loadv2i64, 0, VR128X, i128mem>,
11734 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11735 defm Z256 : AESI_binop_rm_int<Op, OpStr,
11736 !cast<Intrinsic>(IntPrefix##"_256"),
11737 loadv4i64, 0, VR256X, i256mem>,
11738 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11740 let Predicates = [HasAVX512, HasVAES] in
11741 defm Z : AESI_binop_rm_int<Op, OpStr,
11742 !cast<Intrinsic>(IntPrefix##"_512"),
11743 loadv8i64, 0, VR512, i512mem>,
11744 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11747 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11748 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11749 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11750 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11752 //===----------------------------------------------------------------------===//
11753 // PCLMUL instructions - Carry less multiplication
11754 //===----------------------------------------------------------------------===//
11756 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11757 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11758 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11760 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11761 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11762 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11764 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11765 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11766 EVEX_CD8<64, CD8VF>, VEX_WIG;
11770 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11771 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11772 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11774 //===----------------------------------------------------------------------===//
11776 //===----------------------------------------------------------------------===//
11778 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11779 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11780 let Constraints = "$src1 = $dst",
11781 ExeDomain = VTI.ExeDomain in {
11782 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11783 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11784 "$src3, $src2", "$src2, $src3",
11785 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11786 AVX512FMA3Base, Sched<[sched]>;
11787 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11788 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11789 "$src3, $src2", "$src2, $src3",
11790 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11791 (VTI.VT (VTI.LdFrag addr:$src3))))>,
11793 Sched<[sched.Folded, sched.ReadAfterFold]>;
11797 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11798 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11799 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11800 let Constraints = "$src1 = $dst",
11801 ExeDomain = VTI.ExeDomain in
11802 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11803 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11804 "${src3}"##VTI.BroadcastStr##", $src2",
11805 "$src2, ${src3}"##VTI.BroadcastStr,
11806 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11807 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
11808 AVX512FMA3Base, EVEX_B,
11809 Sched<[sched.Folded, sched.ReadAfterFold]>;
11812 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11813 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11814 let Predicates = [HasVBMI2] in
11815 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11817 let Predicates = [HasVBMI2, HasVLX] in {
11818 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11820 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11825 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11826 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11827 let Predicates = [HasVBMI2] in
11828 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11830 let Predicates = [HasVBMI2, HasVLX] in {
11831 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11833 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11837 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11838 SDNode OpNode, X86SchedWriteWidths sched> {
11839 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11840 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11841 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11842 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11843 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11844 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11847 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11848 SDNode OpNode, X86SchedWriteWidths sched> {
11849 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11850 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11851 VEX_W, EVEX_CD8<16, CD8VF>;
11852 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11853 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11854 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11855 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11859 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11860 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11861 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11862 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11865 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11866 avx512vl_i8_info, HasVBMI2>, EVEX,
11868 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11869 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11872 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11873 avx512vl_i8_info, HasVBMI2>, EVEX;
11874 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11875 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11877 //===----------------------------------------------------------------------===//
11879 //===----------------------------------------------------------------------===//
11881 let Constraints = "$src1 = $dst" in
11882 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11883 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11884 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11885 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11886 "$src3, $src2", "$src2, $src3",
11887 (VTI.VT (OpNode VTI.RC:$src1,
11888 VTI.RC:$src2, VTI.RC:$src3))>,
11889 EVEX_4V, T8PD, Sched<[sched]>;
11890 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11891 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11892 "$src3, $src2", "$src2, $src3",
11893 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11894 (VTI.VT (VTI.LdFrag addr:$src3))))>,
11895 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11896 Sched<[sched.Folded, sched.ReadAfterFold]>;
11897 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11898 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11899 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11900 "$src2, ${src3}"##VTI.BroadcastStr,
11901 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11902 (VTI.VT (X86VBroadcast
11903 (VTI.ScalarLdFrag addr:$src3))))>,
11904 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11905 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
11908 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11909 X86SchedWriteWidths sched> {
11910 let Predicates = [HasVNNI] in
11911 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512;
11912 let Predicates = [HasVNNI, HasVLX] in {
11913 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256;
11914 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128;
11918 // FIXME: Is there a better scheduler class for VPDP?
11919 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>;
11920 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>;
11921 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>;
11922 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>;
11924 //===----------------------------------------------------------------------===//
11926 //===----------------------------------------------------------------------===//
11928 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11929 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11930 avx512vl_i8_info, HasBITALG>;
11931 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11932 avx512vl_i16_info, HasBITALG>, VEX_W;
11934 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11935 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11937 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
11938 (X86Vpshufbitqmb node:$src1, node:$src2), [{
11939 return N->hasOneUse();
11942 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11943 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11944 (ins VTI.RC:$src1, VTI.RC:$src2),
11946 "$src2, $src1", "$src1, $src2",
11947 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11948 (VTI.VT VTI.RC:$src2)),
11949 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11950 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11952 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11953 (ins VTI.RC:$src1, VTI.MemOp:$src2),
11955 "$src2, $src1", "$src1, $src2",
11956 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11957 (VTI.VT (VTI.LdFrag addr:$src2))),
11958 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11959 (VTI.VT (VTI.LdFrag addr:$src2)))>,
11960 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11961 Sched<[sched.Folded, sched.ReadAfterFold]>;
11964 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11965 let Predicates = [HasBITALG] in
11966 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11967 let Predicates = [HasBITALG, HasVLX] in {
11968 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11969 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11973 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11974 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11976 //===----------------------------------------------------------------------===//
11978 //===----------------------------------------------------------------------===//
11980 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11981 X86SchedWriteWidths sched> {
11982 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11983 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11985 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11986 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11988 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11993 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11995 EVEX_CD8<8, CD8VF>, T8PD;
11997 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11998 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11999 X86VectorVTInfo BcstVTI>
12000 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12001 let ExeDomain = VTI.ExeDomain in
12002 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12003 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12004 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
12005 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
12006 (OpNode (VTI.VT VTI.RC:$src1),
12007 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
12008 (i8 imm:$src3))>, EVEX_B,
12009 Sched<[sched.Folded, sched.ReadAfterFold]>;
12012 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12013 X86SchedWriteWidths sched> {
12014 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12015 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12016 v64i8_info, v8i64_info>, EVEX_V512;
12017 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12018 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12019 v32i8x_info, v4i64x_info>, EVEX_V256;
12020 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12021 v16i8x_info, v2i64x_info>, EVEX_V128;
12025 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12026 X86GF2P8affineinvqb, SchedWriteVecIMul>,
12027 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12028 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12029 X86GF2P8affineqb, SchedWriteVecIMul>,
12030 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12033 //===----------------------------------------------------------------------===//
12035 //===----------------------------------------------------------------------===//
12037 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12038 Constraints = "$src1 = $dst" in {
12039 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12040 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12041 "v4fmaddps", "$src3, $src2", "$src2, $src3",
12042 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12043 Sched<[SchedWriteFMA.ZMM.Folded]>;
12045 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12046 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12047 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12048 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12049 Sched<[SchedWriteFMA.ZMM.Folded]>;
12051 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12052 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12053 "v4fmaddss", "$src3, $src2", "$src2, $src3",
12054 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12055 Sched<[SchedWriteFMA.Scl.Folded]>;
12057 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12058 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12059 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12060 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12061 Sched<[SchedWriteFMA.Scl.Folded]>;
12064 //===----------------------------------------------------------------------===//
12066 //===----------------------------------------------------------------------===//
12068 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12069 Constraints = "$src1 = $dst" in {
12070 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12071 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12072 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12073 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12074 Sched<[SchedWriteFMA.ZMM.Folded]>;
12076 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12077 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12078 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12079 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12080 Sched<[SchedWriteFMA.ZMM.Folded]>;
12083 let hasSideEffects = 0 in {
12084 let mayStore = 1 in
12085 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12087 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12090 //===----------------------------------------------------------------------===//
12092 //===----------------------------------------------------------------------===//
12094 multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
12095 def rr : I<0x68, MRMSrcReg,
12096 (outs _.KRPC:$dst),
12097 (ins _.RC:$src1, _.RC:$src2),
12098 !strconcat("vp2intersect", _.Suffix,
12099 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12100 [(set _.KRPC:$dst, (X86vp2intersect
12101 _.RC:$src1, (_.VT _.RC:$src2)))]>,
12104 def rm : I<0x68, MRMSrcMem,
12105 (outs _.KRPC:$dst),
12106 (ins _.RC:$src1, _.MemOp:$src2),
12107 !strconcat("vp2intersect", _.Suffix,
12108 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12109 [(set _.KRPC:$dst, (X86vp2intersect
12110 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12111 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
12113 def rmb : I<0x68, MRMSrcMem,
12114 (outs _.KRPC:$dst),
12115 (ins _.RC:$src1, _.ScalarMemOp:$src2),
12116 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12117 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12118 [(set _.KRPC:$dst, (X86vp2intersect
12119 _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>,
12120 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
12123 multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
12124 let Predicates = [HasAVX512, HasVP2INTERSECT] in
12125 defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
12127 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12128 defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
12129 defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
12133 defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
12134 defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
12136 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12137 X86SchedWriteWidths sched,
12138 AVX512VLVectorVTInfo _SrcVTInfo,
12139 AVX512VLVectorVTInfo _DstVTInfo,
12140 SDNode OpNode, Predicate prd,
12141 bit IsCommutable = 0> {
12142 let Predicates = [prd] in
12143 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12144 _SrcVTInfo.info512, _DstVTInfo.info512,
12145 _SrcVTInfo.info512, IsCommutable>,
12146 EVEX_V512, EVEX_CD8<32, CD8VF>;
12147 let Predicates = [HasVLX, prd] in {
12148 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12149 _SrcVTInfo.info256, _DstVTInfo.info256,
12150 _SrcVTInfo.info256, IsCommutable>,
12151 EVEX_V256, EVEX_CD8<32, CD8VF>;
12152 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12153 _SrcVTInfo.info128, _DstVTInfo.info128,
12154 _SrcVTInfo.info128, IsCommutable>,
12155 EVEX_V128, EVEX_CD8<32, CD8VF>;
12159 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12160 SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
12161 avx512vl_f32_info, avx512vl_i16_info,
12162 X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12164 // Truncate Float to BFloat16
12165 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12166 X86SchedWriteWidths sched> {
12167 let Predicates = [HasBF16] in {
12168 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12169 X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12171 let Predicates = [HasBF16, HasVLX] in {
12172 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12173 null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12175 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12177 sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12179 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12180 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12182 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12183 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12184 f128mem:$src), 0, "intel">;
12185 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12186 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12188 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12189 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12190 f256mem:$src), 0, "intel">;
12194 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12195 SchedWriteCvtPD2PS>, T8XS,
12196 EVEX_CD8<32, CD8VF>;
12198 let Predicates = [HasBF16, HasVLX] in {
12199 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12200 // patterns have been disabled with null_frag.
12201 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12202 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12203 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12205 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12206 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12208 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12210 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12211 (VCVTNEPS2BF16Z128rm addr:$src)>;
12212 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12214 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12215 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12217 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12219 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12220 (X86VBroadcast (loadf32 addr:$src))))),
12221 (VCVTNEPS2BF16Z128rmb addr:$src)>;
12222 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
12223 (v8i16 VR128X:$src0), VK4WM:$mask),
12224 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12225 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
12226 v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12227 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12230 let Constraints = "$src1 = $dst" in {
12231 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12232 X86VectorVTInfo _, X86VectorVTInfo src_v> {
12233 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12234 (ins _.RC:$src2, _.RC:$src3),
12235 OpcodeStr, "$src3, $src2", "$src2, $src3",
12236 (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
12239 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12240 (ins _.RC:$src2, _.MemOp:$src3),
12241 OpcodeStr, "$src3, $src2", "$src2, $src3",
12242 (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12243 (src_v.VT (bitconvert
12244 (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
12246 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12247 (ins _.RC:$src2, _.ScalarMemOp:$src3),
12249 !strconcat("${src3}", _.BroadcastStr,", $src2"),
12250 !strconcat("$src2, ${src3}", _.BroadcastStr),
12251 (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12252 (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>,
12256 } // Constraints = "$src1 = $dst"
12258 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12259 AVX512VLVectorVTInfo _,
12260 AVX512VLVectorVTInfo src_v, Predicate prd> {
12261 let Predicates = [prd] in {
12262 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
12263 src_v.info512>, EVEX_V512;
12265 let Predicates = [HasVLX, prd] in {
12266 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
12267 src_v.info256>, EVEX_V256;
12268 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
12269 src_v.info128>, EVEX_V128;
12273 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
12274 avx512vl_f32_info, avx512vl_i32_info,
12275 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;