1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT). These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22 RegisterClass RC = rc;
23 ValueType EltVT = eltvt;
24 int NumElts = numelts;
26 // Corresponding mask register class.
27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29 // Corresponding mask register pair class.
30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31 !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33 // Corresponding write-mask register class.
34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39 // Suffix used in the instruction mnemonic.
40 string Suffix = suffix;
42 // VTName is a string name for vector VT. For vector types it will be
43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44 // It is a little bit complex for scalar types, where NumElts = 1.
45 // In this case we build v4f32 or v2f64
46 string VTName = "v" # !if (!eq (NumElts, 1),
47 !if (!eq (EltVT.Size, 32), 4,
48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
51 ValueType VT = !cast<ValueType>(VTName);
53 string EltTypeName = !cast<string>(EltVT);
54 // Size of the element type in bits, e.g. 32 for v16i32.
55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56 int EltSize = EltVT.Size;
58 // "i" for integer types and "f" for floating-point types
59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61 // Size of RC in bits, e.g. 512 for VR512.
64 // The corresponding memory operand, e.g. i512mem for VR512.
65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67 // FP scalar memory operand for intrinsics - ssmem/sdmem.
68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
79 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
80 !cast<ComplexPattern>("sse_load_f32"),
81 !if (!eq (EltTypeName, "f64"),
82 !cast<ComplexPattern>("sse_load_f64"),
85 // The string to specify embedded broadcast in assembly.
86 string BroadcastStr = "{1to" # NumElts # "}";
88 // 8-bit compressed displacement tuple/subvector format. This is only
89 // defined for NumElts <= 8.
90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91 !cast<CD8VForm>("CD8VT" # NumElts), ?);
93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94 !if (!eq (Size, 256), sub_ymm, ?));
96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
100 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
102 dag ImmAllZerosV = (VT immAllZerosV);
104 string ZSuffix = !if (!eq (Size, 128), "Z128",
105 !if (!eq (Size, 256), "Z256", "Z"));
108 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
109 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
110 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
111 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
112 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
113 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
115 // "x" in v32i8x_info means RC = VR256X
116 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
117 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
118 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
119 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
120 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
121 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
123 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
124 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
125 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
126 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
127 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
128 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
130 // We map scalar types to the smallest (128-bit) vector type
131 // with the appropriate element type. This allows to use the same masking logic.
132 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
133 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
134 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
135 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
137 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
138 X86VectorVTInfo i128> {
139 X86VectorVTInfo info512 = i512;
140 X86VectorVTInfo info256 = i256;
141 X86VectorVTInfo info128 = i128;
144 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
146 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
148 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
150 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
152 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
154 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
157 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
159 RegisterClass KRC = _krc;
160 RegisterClass KRCWM = _krcwm;
164 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
165 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
166 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
167 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
168 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
169 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
170 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
172 // This multiclass generates the masking variants from the non-masking
173 // variant. It only provides the assembly pieces for the masking variants.
174 // It assumes custom ISel patterns for masking which can be provided as
175 // template arguments.
176 multiclass AVX512_maskable_custom<bits<8> O, Format F,
178 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
180 string AttSrcAsm, string IntelSrcAsm,
182 list<dag> MaskingPattern,
183 list<dag> ZeroMaskingPattern,
184 string MaskingConstraint = "",
185 bit IsCommutable = 0,
186 bit IsKCommutable = 0,
187 bit IsKZCommutable = IsCommutable> {
188 let isCommutable = IsCommutable in
189 def NAME: AVX512<O, F, Outs, Ins,
190 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
191 "$dst, "#IntelSrcAsm#"}",
194 // Prefer over VMOV*rrk Pat<>
195 let isCommutable = IsKCommutable in
196 def NAME#k: AVX512<O, F, Outs, MaskingIns,
197 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
198 "$dst {${mask}}, "#IntelSrcAsm#"}",
201 // In case of the 3src subclass this is overridden with a let.
202 string Constraints = MaskingConstraint;
205 // Zero mask does not add any restrictions to commute operands transformation.
206 // So, it is Ok to use IsCommutable instead of IsKCommutable.
207 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
208 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
209 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
210 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
216 // Common base class of AVX512_maskable and AVX512_maskable_3src.
217 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
219 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
221 string AttSrcAsm, string IntelSrcAsm,
222 dag RHS, dag MaskingRHS,
223 SDNode Select = vselect,
224 string MaskingConstraint = "",
225 bit IsCommutable = 0,
226 bit IsKCommutable = 0,
227 bit IsKZCommutable = IsCommutable> :
228 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
229 AttSrcAsm, IntelSrcAsm,
230 [(set _.RC:$dst, RHS)],
231 [(set _.RC:$dst, MaskingRHS)],
233 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
234 MaskingConstraint, IsCommutable,
235 IsKCommutable, IsKZCommutable>;
237 // This multiclass generates the unconditional/non-masking, the masking and
238 // the zero-masking variant of the vector instruction. In the masking case, the
239 // perserved vector elements come from a new dummy input operand tied to $dst.
240 // This version uses a separate dag for non-masking and masking.
241 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
242 dag Outs, dag Ins, string OpcodeStr,
243 string AttSrcAsm, string IntelSrcAsm,
244 dag RHS, dag MaskRHS,
245 bit IsCommutable = 0, bit IsKCommutable = 0,
246 SDNode Select = vselect> :
247 AVX512_maskable_custom<O, F, Outs, Ins,
248 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
249 !con((ins _.KRCWM:$mask), Ins),
250 OpcodeStr, AttSrcAsm, IntelSrcAsm,
251 [(set _.RC:$dst, RHS)],
253 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
255 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
256 "$src0 = $dst", IsCommutable, IsKCommutable>;
258 // This multiclass generates the unconditional/non-masking, the masking and
259 // the zero-masking variant of the vector instruction. In the masking case, the
260 // perserved vector elements come from a new dummy input operand tied to $dst.
261 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
262 dag Outs, dag Ins, string OpcodeStr,
263 string AttSrcAsm, string IntelSrcAsm,
265 bit IsCommutable = 0, bit IsKCommutable = 0,
266 bit IsKZCommutable = IsCommutable,
267 SDNode Select = vselect> :
268 AVX512_maskable_common<O, F, _, Outs, Ins,
269 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
270 !con((ins _.KRCWM:$mask), Ins),
271 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
272 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
273 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
276 // This multiclass generates the unconditional/non-masking, the masking and
277 // the zero-masking variant of the scalar instruction.
278 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
279 dag Outs, dag Ins, string OpcodeStr,
280 string AttSrcAsm, string IntelSrcAsm,
282 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
283 RHS, 0, 0, 0, X86selects>;
285 // Similar to AVX512_maskable but in this case one of the source operands
286 // ($src1) is already tied to $dst so we just use that for the preserved
287 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
289 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
290 dag Outs, dag NonTiedIns, string OpcodeStr,
291 string AttSrcAsm, string IntelSrcAsm,
293 bit IsCommutable = 0,
294 bit IsKCommutable = 0,
295 SDNode Select = vselect,
297 AVX512_maskable_common<O, F, _, Outs,
298 !con((ins _.RC:$src1), NonTiedIns),
299 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
301 OpcodeStr, AttSrcAsm, IntelSrcAsm,
302 !if(MaskOnly, (null_frag), RHS),
303 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
304 Select, "", IsCommutable, IsKCommutable>;
306 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
307 // operand differs from the output VT. This requires a bitconvert on
308 // the preserved vector going into the vselect.
309 // NOTE: The unmasked pattern is disabled.
310 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
311 X86VectorVTInfo InVT,
312 dag Outs, dag NonTiedIns, string OpcodeStr,
313 string AttSrcAsm, string IntelSrcAsm,
314 dag RHS, bit IsCommutable = 0> :
315 AVX512_maskable_common<O, F, OutVT, Outs,
316 !con((ins InVT.RC:$src1), NonTiedIns),
317 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
319 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
320 (vselect InVT.KRCWM:$mask, RHS,
321 (bitconvert InVT.RC:$src1)),
322 vselect, "", IsCommutable>;
324 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
325 dag Outs, dag NonTiedIns, string OpcodeStr,
326 string AttSrcAsm, string IntelSrcAsm,
328 bit IsCommutable = 0,
329 bit IsKCommutable = 0,
331 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
332 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
333 X86selects, MaskOnly>;
335 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
338 string AttSrcAsm, string IntelSrcAsm,
340 AVX512_maskable_custom<O, F, Outs, Ins,
341 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
342 !con((ins _.KRCWM:$mask), Ins),
343 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
346 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
347 dag Outs, dag NonTiedIns,
349 string AttSrcAsm, string IntelSrcAsm,
351 AVX512_maskable_custom<O, F, Outs,
352 !con((ins _.RC:$src1), NonTiedIns),
353 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
355 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
358 // Instruction with mask that puts result in mask register,
359 // like "compare" and "vptest"
360 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
362 dag Ins, dag MaskingIns,
364 string AttSrcAsm, string IntelSrcAsm,
366 list<dag> MaskingPattern,
367 bit IsCommutable = 0> {
368 let isCommutable = IsCommutable in {
369 def NAME: AVX512<O, F, Outs, Ins,
370 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
371 "$dst, "#IntelSrcAsm#"}",
374 def NAME#k: AVX512<O, F, Outs, MaskingIns,
375 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
376 "$dst {${mask}}, "#IntelSrcAsm#"}",
377 MaskingPattern>, EVEX_K;
381 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
383 dag Ins, dag MaskingIns,
385 string AttSrcAsm, string IntelSrcAsm,
386 dag RHS, dag MaskingRHS,
387 bit IsCommutable = 0> :
388 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
389 AttSrcAsm, IntelSrcAsm,
390 [(set _.KRC:$dst, RHS)],
391 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
393 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
394 dag Outs, dag Ins, string OpcodeStr,
395 string AttSrcAsm, string IntelSrcAsm,
396 dag RHS, dag RHS_su, bit IsCommutable = 0> :
397 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
398 !con((ins _.KRCWM:$mask), Ins),
399 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
400 (and _.KRCWM:$mask, RHS_su), IsCommutable>;
403 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
404 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
405 // swizzled by ExecutionDomainFix to pxor.
406 // We set canFoldAsLoad because this can be converted to a constant-pool
407 // load of an all-zeros value if folding it would be beneficial.
408 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
409 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
410 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
411 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
412 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
413 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
416 let Predicates = [HasAVX512] in {
417 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
418 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
419 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
420 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
421 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
424 // Alias instructions that allow VPTERNLOG to be used with a mask to create
425 // a mix of all ones and all zeros elements. This is done this way to force
426 // the same register to be used as input for all three sources.
427 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
428 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
429 (ins VK16WM:$mask), "",
430 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
431 (v16i32 immAllOnesV),
432 (v16i32 immAllZerosV)))]>;
433 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
434 (ins VK8WM:$mask), "",
435 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
437 (v8i64 immAllZerosV)))]>;
440 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
441 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
442 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
443 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
444 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
445 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
448 let Predicates = [HasAVX512] in {
449 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
450 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
451 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
452 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
453 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
454 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
455 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
456 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
457 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
458 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
461 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
462 // This is expanded by ExpandPostRAPseudos.
463 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
464 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
465 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
466 [(set FR32X:$dst, fp32imm0)]>;
467 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
468 [(set FR64X:$dst, fp64imm0)]>;
469 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
470 [(set VR128X:$dst, fp128imm0)]>;
473 //===----------------------------------------------------------------------===//
474 // AVX-512 - VECTOR INSERT
477 // Supports two different pattern operators for mask and unmasked ops. Allows
478 // null_frag to be passed for one.
479 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
481 SDPatternOperator vinsert_insert,
482 SDPatternOperator vinsert_for_mask,
483 X86FoldableSchedWrite sched> {
484 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
485 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
486 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
487 "vinsert" # From.EltTypeName # "x" # From.NumElts,
488 "$src3, $src2, $src1", "$src1, $src2, $src3",
489 (vinsert_insert:$src3 (To.VT To.RC:$src1),
490 (From.VT From.RC:$src2),
492 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
493 (From.VT From.RC:$src2),
495 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
497 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
498 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
499 "vinsert" # From.EltTypeName # "x" # From.NumElts,
500 "$src3, $src2, $src1", "$src1, $src2, $src3",
501 (vinsert_insert:$src3 (To.VT To.RC:$src1),
502 (From.VT (From.LdFrag addr:$src2)),
504 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
505 (From.VT (From.LdFrag addr:$src2)),
506 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
507 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
508 Sched<[sched.Folded, sched.ReadAfterFold]>;
512 // Passes the same pattern operator for masked and unmasked ops.
513 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
515 SDPatternOperator vinsert_insert,
516 X86FoldableSchedWrite sched> :
517 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
519 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
520 X86VectorVTInfo To, PatFrag vinsert_insert,
521 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
522 let Predicates = p in {
523 def : Pat<(vinsert_insert:$ins
524 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
525 (To.VT (!cast<Instruction>(InstrStr#"rr")
526 To.RC:$src1, From.RC:$src2,
527 (INSERT_get_vinsert_imm To.RC:$ins)))>;
529 def : Pat<(vinsert_insert:$ins
531 (From.VT (From.LdFrag addr:$src2)),
533 (To.VT (!cast<Instruction>(InstrStr#"rm")
534 To.RC:$src1, addr:$src2,
535 (INSERT_get_vinsert_imm To.RC:$ins)))>;
539 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
540 ValueType EltVT64, int Opcode256,
541 X86FoldableSchedWrite sched> {
543 let Predicates = [HasVLX] in
544 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
545 X86VectorVTInfo< 4, EltVT32, VR128X>,
546 X86VectorVTInfo< 8, EltVT32, VR256X>,
547 vinsert128_insert, sched>, EVEX_V256;
549 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
550 X86VectorVTInfo< 4, EltVT32, VR128X>,
551 X86VectorVTInfo<16, EltVT32, VR512>,
552 vinsert128_insert, sched>, EVEX_V512;
554 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
555 X86VectorVTInfo< 4, EltVT64, VR256X>,
556 X86VectorVTInfo< 8, EltVT64, VR512>,
557 vinsert256_insert, sched>, VEX_W, EVEX_V512;
559 // Even with DQI we'd like to only use these instructions for masking.
560 let Predicates = [HasVLX, HasDQI] in
561 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
562 X86VectorVTInfo< 2, EltVT64, VR128X>,
563 X86VectorVTInfo< 4, EltVT64, VR256X>,
564 null_frag, vinsert128_insert, sched>,
567 // Even with DQI we'd like to only use these instructions for masking.
568 let Predicates = [HasDQI] in {
569 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
570 X86VectorVTInfo< 2, EltVT64, VR128X>,
571 X86VectorVTInfo< 8, EltVT64, VR512>,
572 null_frag, vinsert128_insert, sched>,
575 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
576 X86VectorVTInfo< 8, EltVT32, VR256X>,
577 X86VectorVTInfo<16, EltVT32, VR512>,
578 null_frag, vinsert256_insert, sched>,
583 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
584 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
585 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
587 // Codegen pattern with the alternative types,
588 // Even with AVX512DQ we'll still use these for unmasked operations.
589 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
590 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
591 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
592 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
594 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
595 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
596 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
597 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
599 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
600 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
601 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
602 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
604 // Codegen pattern with the alternative types insert VEC128 into VEC256
605 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
606 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
607 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
608 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
609 // Codegen pattern with the alternative types insert VEC128 into VEC512
610 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
611 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
612 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
613 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
614 // Codegen pattern with the alternative types insert VEC256 into VEC512
615 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
616 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
617 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
618 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
621 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
622 X86VectorVTInfo To, X86VectorVTInfo Cast,
623 PatFrag vinsert_insert,
624 SDNodeXForm INSERT_get_vinsert_imm,
626 let Predicates = p in {
628 (vselect Cast.KRCWM:$mask,
630 (vinsert_insert:$ins (To.VT To.RC:$src1),
631 (From.VT From.RC:$src2),
634 (!cast<Instruction>(InstrStr#"rrk")
635 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
636 (INSERT_get_vinsert_imm To.RC:$ins))>;
638 (vselect Cast.KRCWM:$mask,
640 (vinsert_insert:$ins (To.VT To.RC:$src1),
643 (From.LdFrag addr:$src2))),
646 (!cast<Instruction>(InstrStr#"rmk")
647 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
648 (INSERT_get_vinsert_imm To.RC:$ins))>;
651 (vselect Cast.KRCWM:$mask,
653 (vinsert_insert:$ins (To.VT To.RC:$src1),
654 (From.VT From.RC:$src2),
657 (!cast<Instruction>(InstrStr#"rrkz")
658 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659 (INSERT_get_vinsert_imm To.RC:$ins))>;
661 (vselect Cast.KRCWM:$mask,
663 (vinsert_insert:$ins (To.VT To.RC:$src1),
664 (From.VT (From.LdFrag addr:$src2)),
667 (!cast<Instruction>(InstrStr#"rmkz")
668 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
669 (INSERT_get_vinsert_imm To.RC:$ins))>;
673 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
674 v8f32x_info, vinsert128_insert,
675 INSERT_get_vinsert128_imm, [HasVLX]>;
676 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
677 v4f64x_info, vinsert128_insert,
678 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
680 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
681 v8i32x_info, vinsert128_insert,
682 INSERT_get_vinsert128_imm, [HasVLX]>;
683 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
684 v8i32x_info, vinsert128_insert,
685 INSERT_get_vinsert128_imm, [HasVLX]>;
686 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
687 v8i32x_info, vinsert128_insert,
688 INSERT_get_vinsert128_imm, [HasVLX]>;
689 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
690 v4i64x_info, vinsert128_insert,
691 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
692 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
693 v4i64x_info, vinsert128_insert,
694 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
695 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
696 v4i64x_info, vinsert128_insert,
697 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
699 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
700 v16f32_info, vinsert128_insert,
701 INSERT_get_vinsert128_imm, [HasAVX512]>;
702 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
703 v8f64_info, vinsert128_insert,
704 INSERT_get_vinsert128_imm, [HasDQI]>;
706 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
707 v16i32_info, vinsert128_insert,
708 INSERT_get_vinsert128_imm, [HasAVX512]>;
709 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
710 v16i32_info, vinsert128_insert,
711 INSERT_get_vinsert128_imm, [HasAVX512]>;
712 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
713 v16i32_info, vinsert128_insert,
714 INSERT_get_vinsert128_imm, [HasAVX512]>;
715 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
716 v8i64_info, vinsert128_insert,
717 INSERT_get_vinsert128_imm, [HasDQI]>;
718 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
719 v8i64_info, vinsert128_insert,
720 INSERT_get_vinsert128_imm, [HasDQI]>;
721 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
722 v8i64_info, vinsert128_insert,
723 INSERT_get_vinsert128_imm, [HasDQI]>;
725 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
726 v16f32_info, vinsert256_insert,
727 INSERT_get_vinsert256_imm, [HasDQI]>;
728 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
729 v8f64_info, vinsert256_insert,
730 INSERT_get_vinsert256_imm, [HasAVX512]>;
732 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
733 v16i32_info, vinsert256_insert,
734 INSERT_get_vinsert256_imm, [HasDQI]>;
735 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
736 v16i32_info, vinsert256_insert,
737 INSERT_get_vinsert256_imm, [HasDQI]>;
738 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
739 v16i32_info, vinsert256_insert,
740 INSERT_get_vinsert256_imm, [HasDQI]>;
741 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
742 v8i64_info, vinsert256_insert,
743 INSERT_get_vinsert256_imm, [HasAVX512]>;
744 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
745 v8i64_info, vinsert256_insert,
746 INSERT_get_vinsert256_imm, [HasAVX512]>;
747 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
748 v8i64_info, vinsert256_insert,
749 INSERT_get_vinsert256_imm, [HasAVX512]>;
751 // vinsertps - insert f32 to XMM
752 let ExeDomain = SSEPackedSingle in {
753 let isCommutable = 1 in
754 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
755 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
756 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
757 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
758 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
759 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
760 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
761 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
762 [(set VR128X:$dst, (X86insertps VR128X:$src1,
763 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
765 EVEX_4V, EVEX_CD8<32, CD8VT1>,
766 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
769 //===----------------------------------------------------------------------===//
770 // AVX-512 VECTOR EXTRACT
773 // Supports two different pattern operators for mask and unmasked ops. Allows
774 // null_frag to be passed for one.
775 multiclass vextract_for_size_split<int Opcode,
776 X86VectorVTInfo From, X86VectorVTInfo To,
777 SDPatternOperator vextract_extract,
778 SDPatternOperator vextract_for_mask,
779 SchedWrite SchedRR, SchedWrite SchedMR> {
781 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
782 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
783 (ins From.RC:$src1, u8imm:$idx),
784 "vextract" # To.EltTypeName # "x" # To.NumElts,
785 "$idx, $src1", "$src1, $idx",
786 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
787 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
788 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
790 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
791 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
792 "vextract" # To.EltTypeName # "x" # To.NumElts #
793 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
794 [(store (To.VT (vextract_extract:$idx
795 (From.VT From.RC:$src1), (iPTR imm))),
799 let mayStore = 1, hasSideEffects = 0 in
800 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
801 (ins To.MemOp:$dst, To.KRCWM:$mask,
802 From.RC:$src1, u8imm:$idx),
803 "vextract" # To.EltTypeName # "x" # To.NumElts #
804 "\t{$idx, $src1, $dst {${mask}}|"
805 "$dst {${mask}}, $src1, $idx}", []>,
806 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
810 // Passes the same pattern operator for masked and unmasked ops.
811 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
813 SDPatternOperator vextract_extract,
814 SchedWrite SchedRR, SchedWrite SchedMR> :
815 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
817 // Codegen pattern for the alternative types
818 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
819 X86VectorVTInfo To, PatFrag vextract_extract,
820 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
821 let Predicates = p in {
822 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
823 (To.VT (!cast<Instruction>(InstrStr#"rr")
825 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
826 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
827 (iPTR imm))), addr:$dst),
828 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
829 (EXTRACT_get_vextract_imm To.RC:$ext))>;
833 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
834 ValueType EltVT64, int Opcode256,
835 SchedWrite SchedRR, SchedWrite SchedMR> {
836 let Predicates = [HasAVX512] in {
837 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
838 X86VectorVTInfo<16, EltVT32, VR512>,
839 X86VectorVTInfo< 4, EltVT32, VR128X>,
840 vextract128_extract, SchedRR, SchedMR>,
841 EVEX_V512, EVEX_CD8<32, CD8VT4>;
842 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
843 X86VectorVTInfo< 8, EltVT64, VR512>,
844 X86VectorVTInfo< 4, EltVT64, VR256X>,
845 vextract256_extract, SchedRR, SchedMR>,
846 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
848 let Predicates = [HasVLX] in
849 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
850 X86VectorVTInfo< 8, EltVT32, VR256X>,
851 X86VectorVTInfo< 4, EltVT32, VR128X>,
852 vextract128_extract, SchedRR, SchedMR>,
853 EVEX_V256, EVEX_CD8<32, CD8VT4>;
855 // Even with DQI we'd like to only use these instructions for masking.
856 let Predicates = [HasVLX, HasDQI] in
857 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
858 X86VectorVTInfo< 4, EltVT64, VR256X>,
859 X86VectorVTInfo< 2, EltVT64, VR128X>,
860 null_frag, vextract128_extract, SchedRR, SchedMR>,
861 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
863 // Even with DQI we'd like to only use these instructions for masking.
864 let Predicates = [HasDQI] in {
865 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
866 X86VectorVTInfo< 8, EltVT64, VR512>,
867 X86VectorVTInfo< 2, EltVT64, VR128X>,
868 null_frag, vextract128_extract, SchedRR, SchedMR>,
869 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
870 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
871 X86VectorVTInfo<16, EltVT32, VR512>,
872 X86VectorVTInfo< 8, EltVT32, VR256X>,
873 null_frag, vextract256_extract, SchedRR, SchedMR>,
874 EVEX_V512, EVEX_CD8<32, CD8VT8>;
878 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
879 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
880 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
882 // extract_subvector codegen patterns with the alternative types.
883 // Even with AVX512DQ we'll still use these for unmasked operations.
884 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
885 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
886 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
887 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
889 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
890 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
891 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
892 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
894 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
895 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
896 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
897 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
899 // Codegen pattern with the alternative types extract VEC128 from VEC256
900 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
901 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
902 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
903 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
905 // Codegen pattern with the alternative types extract VEC128 from VEC512
906 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
907 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
908 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
909 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
910 // Codegen pattern with the alternative types extract VEC256 from VEC512
911 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
912 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
913 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
914 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
917 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
918 // smaller extract to enable EVEX->VEX.
919 let Predicates = [NoVLX] in {
920 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
921 (v2i64 (VEXTRACTI128rr
922 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
924 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
925 (v2f64 (VEXTRACTF128rr
926 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
928 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
929 (v4i32 (VEXTRACTI128rr
930 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
932 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
933 (v4f32 (VEXTRACTF128rr
934 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
936 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
937 (v8i16 (VEXTRACTI128rr
938 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
940 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
941 (v16i8 (VEXTRACTI128rr
942 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
946 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
947 // smaller extract to enable EVEX->VEX.
948 let Predicates = [HasVLX] in {
949 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
950 (v2i64 (VEXTRACTI32x4Z256rr
951 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
953 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
954 (v2f64 (VEXTRACTF32x4Z256rr
955 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
957 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
958 (v4i32 (VEXTRACTI32x4Z256rr
959 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
961 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
962 (v4f32 (VEXTRACTF32x4Z256rr
963 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
965 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
966 (v8i16 (VEXTRACTI32x4Z256rr
967 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
969 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
970 (v16i8 (VEXTRACTI32x4Z256rr
971 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
976 // Additional patterns for handling a bitcast between the vselect and the
977 // extract_subvector.
978 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
979 X86VectorVTInfo To, X86VectorVTInfo Cast,
980 PatFrag vextract_extract,
981 SDNodeXForm EXTRACT_get_vextract_imm,
983 let Predicates = p in {
984 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
986 (To.VT (vextract_extract:$ext
987 (From.VT From.RC:$src), (iPTR imm)))),
989 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
990 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
991 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
993 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
995 (To.VT (vextract_extract:$ext
996 (From.VT From.RC:$src), (iPTR imm)))),
998 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
999 Cast.KRCWM:$mask, From.RC:$src,
1000 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1004 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1005 v4f32x_info, vextract128_extract,
1006 EXTRACT_get_vextract128_imm, [HasVLX]>;
1007 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1008 v2f64x_info, vextract128_extract,
1009 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1011 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1012 v4i32x_info, vextract128_extract,
1013 EXTRACT_get_vextract128_imm, [HasVLX]>;
1014 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1015 v4i32x_info, vextract128_extract,
1016 EXTRACT_get_vextract128_imm, [HasVLX]>;
1017 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1018 v4i32x_info, vextract128_extract,
1019 EXTRACT_get_vextract128_imm, [HasVLX]>;
1020 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1021 v2i64x_info, vextract128_extract,
1022 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1023 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1024 v2i64x_info, vextract128_extract,
1025 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1026 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1027 v2i64x_info, vextract128_extract,
1028 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1030 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1031 v4f32x_info, vextract128_extract,
1032 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1033 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1034 v2f64x_info, vextract128_extract,
1035 EXTRACT_get_vextract128_imm, [HasDQI]>;
1037 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1038 v4i32x_info, vextract128_extract,
1039 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1040 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1041 v4i32x_info, vextract128_extract,
1042 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1043 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1044 v4i32x_info, vextract128_extract,
1045 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1046 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1047 v2i64x_info, vextract128_extract,
1048 EXTRACT_get_vextract128_imm, [HasDQI]>;
1049 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1050 v2i64x_info, vextract128_extract,
1051 EXTRACT_get_vextract128_imm, [HasDQI]>;
1052 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1053 v2i64x_info, vextract128_extract,
1054 EXTRACT_get_vextract128_imm, [HasDQI]>;
1056 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1057 v8f32x_info, vextract256_extract,
1058 EXTRACT_get_vextract256_imm, [HasDQI]>;
1059 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1060 v4f64x_info, vextract256_extract,
1061 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1063 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1064 v8i32x_info, vextract256_extract,
1065 EXTRACT_get_vextract256_imm, [HasDQI]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1067 v8i32x_info, vextract256_extract,
1068 EXTRACT_get_vextract256_imm, [HasDQI]>;
1069 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1070 v8i32x_info, vextract256_extract,
1071 EXTRACT_get_vextract256_imm, [HasDQI]>;
1072 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1073 v4i64x_info, vextract256_extract,
1074 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1075 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1076 v4i64x_info, vextract256_extract,
1077 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1078 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1079 v4i64x_info, vextract256_extract,
1080 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1082 // vextractps - extract 32 bits from XMM
1083 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1084 (ins VR128X:$src1, u8imm:$src2),
1085 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1086 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1087 EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1089 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1090 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1091 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1092 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1094 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1096 //===---------------------------------------------------------------------===//
1097 // AVX-512 BROADCAST
1099 // broadcast with a scalar argument.
1100 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1102 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1103 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1104 (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1105 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1106 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1107 (X86VBroadcast SrcInfo.FRC:$src),
1108 DestInfo.RC:$src0)),
1109 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1110 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1111 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1112 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1113 (X86VBroadcast SrcInfo.FRC:$src),
1114 DestInfo.ImmAllZerosV)),
1115 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1116 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1119 // Split version to allow mask and broadcast node to be different types. This
1120 // helps support the 32x2 broadcasts.
1121 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1123 SchedWrite SchedRR, SchedWrite SchedRM,
1124 X86VectorVTInfo MaskInfo,
1125 X86VectorVTInfo DestInfo,
1126 X86VectorVTInfo SrcInfo,
1127 bit IsConvertibleToThreeAddress,
1128 SDPatternOperator UnmaskedOp = X86VBroadcast,
1129 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1130 let hasSideEffects = 0 in
1131 def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1132 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1133 [(set MaskInfo.RC:$dst,
1137 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1138 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1139 def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1140 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1141 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1142 "${dst} {${mask}} {z}, $src}"),
1143 [(set MaskInfo.RC:$dst,
1144 (vselect MaskInfo.KRCWM:$mask,
1148 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1149 MaskInfo.ImmAllZerosV))],
1150 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1151 let Constraints = "$src0 = $dst" in
1152 def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1153 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1155 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1156 "${dst} {${mask}}, $src}"),
1157 [(set MaskInfo.RC:$dst,
1158 (vselect MaskInfo.KRCWM:$mask,
1162 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1163 MaskInfo.RC:$src0))],
1164 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1166 let hasSideEffects = 0, mayLoad = 1 in
1167 def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1168 (ins SrcInfo.ScalarMemOp:$src),
1169 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1170 [(set MaskInfo.RC:$dst,
1174 (UnmaskedBcastOp addr:$src)))))],
1175 DestInfo.ExeDomain>, T8PD, EVEX,
1176 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1178 def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1179 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1180 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1181 "${dst} {${mask}} {z}, $src}"),
1182 [(set MaskInfo.RC:$dst,
1183 (vselect MaskInfo.KRCWM:$mask,
1187 (SrcInfo.BroadcastLdFrag addr:$src)))),
1188 MaskInfo.ImmAllZerosV))],
1189 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1190 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1192 let Constraints = "$src0 = $dst",
1193 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1194 def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1195 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1196 SrcInfo.ScalarMemOp:$src),
1197 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1198 "${dst} {${mask}}, $src}"),
1199 [(set MaskInfo.RC:$dst,
1200 (vselect MaskInfo.KRCWM:$mask,
1204 (SrcInfo.BroadcastLdFrag addr:$src)))),
1205 MaskInfo.RC:$src0))],
1206 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1207 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1210 // Helper class to force mask and broadcast result to same type.
1211 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1212 SchedWrite SchedRR, SchedWrite SchedRM,
1213 X86VectorVTInfo DestInfo,
1214 X86VectorVTInfo SrcInfo,
1215 bit IsConvertibleToThreeAddress> :
1216 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1217 DestInfo, DestInfo, SrcInfo,
1218 IsConvertibleToThreeAddress>;
1220 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1221 AVX512VLVectorVTInfo _> {
1222 let Predicates = [HasAVX512] in {
1223 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1224 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1225 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1230 let Predicates = [HasVLX] in {
1231 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1232 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1233 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1239 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1240 AVX512VLVectorVTInfo _> {
1241 let Predicates = [HasAVX512] in {
1242 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1243 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1244 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1249 let Predicates = [HasVLX] in {
1250 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1251 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1252 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1255 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1256 WriteFShuffle256Ld, _.info128, _.info128, 1>,
1257 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1262 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1264 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1265 avx512vl_f64_info>, VEX_W1X;
1267 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1268 X86VectorVTInfo _, SDPatternOperator OpNode,
1269 RegisterClass SrcRC> {
1270 let ExeDomain = _.ExeDomain in
1271 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1273 "vpbroadcast"##_.Suffix, "$src", "$src",
1274 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1278 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1279 X86VectorVTInfo _, SDPatternOperator OpNode,
1280 RegisterClass SrcRC, SubRegIndex Subreg> {
1281 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1282 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1283 (outs _.RC:$dst), (ins GR32:$src),
1284 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1285 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1286 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1287 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1289 def : Pat <(_.VT (OpNode SrcRC:$src)),
1290 (!cast<Instruction>(Name#r)
1291 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1293 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1294 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1295 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1297 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1298 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1299 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1302 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1303 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1304 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1305 let Predicates = [prd] in
1306 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1307 OpNode, SrcRC, Subreg>, EVEX_V512;
1308 let Predicates = [prd, HasVLX] in {
1309 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1310 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1311 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1312 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1316 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1317 SDPatternOperator OpNode,
1318 RegisterClass SrcRC, Predicate prd> {
1319 let Predicates = [prd] in
1320 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1322 let Predicates = [prd, HasVLX] in {
1323 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1325 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1330 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1331 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1332 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1333 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1335 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1336 X86VBroadcast, GR32, HasAVX512>;
1337 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1338 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1340 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1341 AVX512VLVectorVTInfo _, Predicate prd,
1342 bit IsConvertibleToThreeAddress> {
1343 let Predicates = [prd] in {
1344 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1345 WriteShuffle256Ld, _.info512, _.info128,
1346 IsConvertibleToThreeAddress>,
1349 let Predicates = [prd, HasVLX] in {
1350 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1351 WriteShuffle256Ld, _.info256, _.info128,
1352 IsConvertibleToThreeAddress>,
1354 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1355 WriteShuffleXLd, _.info128, _.info128,
1356 IsConvertibleToThreeAddress>,
1361 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1362 avx512vl_i8_info, HasBWI, 0>;
1363 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1364 avx512vl_i16_info, HasBWI, 0>;
1365 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1366 avx512vl_i32_info, HasAVX512, 1>;
1367 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1368 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1370 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1371 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1372 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1373 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1374 (_Dst.VT (X86SubVBroadcast
1375 (_Src.VT (_Src.LdFrag addr:$src))))>,
1376 Sched<[SchedWriteShuffle.YMM.Folded]>,
1380 // This should be used for the AVX512DQ broadcast instructions. It disables
1381 // the unmasked patterns so that we only use the DQ instructions when masking
1383 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1384 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1385 let hasSideEffects = 0, mayLoad = 1 in
1386 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1387 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1389 (_Dst.VT (X86SubVBroadcast
1390 (_Src.VT (_Src.LdFrag addr:$src))))>,
1391 Sched<[SchedWriteShuffle.YMM.Folded]>,
1395 let Predicates = [HasAVX512] in {
1396 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1397 def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1398 (VPBROADCASTQZm addr:$src)>;
1400 // FIXME this is to handle aligned extloads from i8.
1401 def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))),
1402 (VPBROADCASTDZm addr:$src)>;
1405 let Predicates = [HasVLX] in {
1406 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1407 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1408 (VPBROADCASTQZ128m addr:$src)>;
1409 def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1410 (VPBROADCASTQZ256m addr:$src)>;
1412 // FIXME this is to handle aligned extloads from i8.
1413 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
1414 (VPBROADCASTDZ128m addr:$src)>;
1415 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
1416 (VPBROADCASTDZ256m addr:$src)>;
1418 let Predicates = [HasVLX, HasBWI] in {
1419 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1420 // This means we'll encounter truncated i32 loads; match that here.
1421 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1422 (VPBROADCASTWZ128m addr:$src)>;
1423 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1424 (VPBROADCASTWZ256m addr:$src)>;
1425 def : Pat<(v8i16 (X86VBroadcast
1426 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1427 (VPBROADCASTWZ128m addr:$src)>;
1428 def : Pat<(v8i16 (X86VBroadcast
1429 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1430 (VPBROADCASTWZ128m addr:$src)>;
1431 def : Pat<(v16i16 (X86VBroadcast
1432 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1433 (VPBROADCASTWZ256m addr:$src)>;
1434 def : Pat<(v16i16 (X86VBroadcast
1435 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1436 (VPBROADCASTWZ256m addr:$src)>;
1438 // FIXME this is to handle aligned extloads from i8.
1439 def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
1440 (VPBROADCASTWZ128m addr:$src)>;
1441 def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
1442 (VPBROADCASTWZ256m addr:$src)>;
1444 let Predicates = [HasBWI] in {
1445 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1446 // This means we'll encounter truncated i32 loads; match that here.
1447 def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1448 (VPBROADCASTWZm addr:$src)>;
1449 def : Pat<(v32i16 (X86VBroadcast
1450 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1451 (VPBROADCASTWZm addr:$src)>;
1452 def : Pat<(v32i16 (X86VBroadcast
1453 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1454 (VPBROADCASTWZm addr:$src)>;
1456 // FIXME this is to handle aligned extloads from i8.
1457 def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))),
1458 (VPBROADCASTWZm addr:$src)>;
1461 //===----------------------------------------------------------------------===//
1462 // AVX-512 BROADCAST SUBVECTORS
1465 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1466 v16i32_info, v4i32x_info>,
1467 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1468 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1469 v16f32_info, v4f32x_info>,
1470 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1471 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1472 v8i64_info, v4i64x_info>, VEX_W,
1473 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1474 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1475 v8f64_info, v4f64x_info>, VEX_W,
1476 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1478 let Predicates = [HasAVX512] in {
1479 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1480 (VBROADCASTF64X4rm addr:$src)>;
1481 def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1482 (VBROADCASTI64X4rm addr:$src)>;
1483 def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1484 (VBROADCASTI64X4rm addr:$src)>;
1485 def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1486 (VBROADCASTI64X4rm addr:$src)>;
1488 // Provide fallback in case the load node that is used in the patterns above
1489 // is used by additional users, which prevents the pattern selection.
1490 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1491 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1492 (v4f64 VR256X:$src), 1)>;
1493 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1494 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1495 (v8f32 VR256X:$src), 1)>;
1496 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1497 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1498 (v4i64 VR256X:$src), 1)>;
1499 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1500 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1501 (v8i32 VR256X:$src), 1)>;
1502 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1503 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1504 (v16i16 VR256X:$src), 1)>;
1505 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1506 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1507 (v32i8 VR256X:$src), 1)>;
1509 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1510 (VBROADCASTF32X4rm addr:$src)>;
1511 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1512 (VBROADCASTI32X4rm addr:$src)>;
1513 def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1514 (VBROADCASTI32X4rm addr:$src)>;
1515 def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1516 (VBROADCASTI32X4rm addr:$src)>;
1518 // Patterns for selects of bitcasted operations.
1519 def : Pat<(vselect VK16WM:$mask,
1520 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1521 (v16f32 immAllZerosV)),
1522 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1523 def : Pat<(vselect VK16WM:$mask,
1524 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1526 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1527 def : Pat<(vselect VK16WM:$mask,
1528 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1529 (v16i32 immAllZerosV)),
1530 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1531 def : Pat<(vselect VK16WM:$mask,
1532 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1534 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1536 def : Pat<(vselect VK8WM:$mask,
1537 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1538 (v8f64 immAllZerosV)),
1539 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1540 def : Pat<(vselect VK8WM:$mask,
1541 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1543 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1544 def : Pat<(vselect VK8WM:$mask,
1545 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1546 (v8i64 immAllZerosV)),
1547 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1548 def : Pat<(vselect VK8WM:$mask,
1549 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1551 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1554 let Predicates = [HasVLX] in {
1555 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1556 v8i32x_info, v4i32x_info>,
1557 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1558 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1559 v8f32x_info, v4f32x_info>,
1560 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1562 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1563 (VBROADCASTF32X4Z256rm addr:$src)>;
1564 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1565 (VBROADCASTI32X4Z256rm addr:$src)>;
1566 def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1567 (VBROADCASTI32X4Z256rm addr:$src)>;
1568 def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1569 (VBROADCASTI32X4Z256rm addr:$src)>;
1571 // Patterns for selects of bitcasted operations.
1572 def : Pat<(vselect VK8WM:$mask,
1573 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1574 (v8f32 immAllZerosV)),
1575 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1576 def : Pat<(vselect VK8WM:$mask,
1577 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1579 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1580 def : Pat<(vselect VK8WM:$mask,
1581 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1582 (v8i32 immAllZerosV)),
1583 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1584 def : Pat<(vselect VK8WM:$mask,
1585 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1587 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1590 // Provide fallback in case the load node that is used in the patterns above
1591 // is used by additional users, which prevents the pattern selection.
1592 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1593 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1594 (v2f64 VR128X:$src), 1)>;
1595 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1596 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1597 (v4f32 VR128X:$src), 1)>;
1598 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1599 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1600 (v2i64 VR128X:$src), 1)>;
1601 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1602 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1603 (v4i32 VR128X:$src), 1)>;
1604 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1605 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1606 (v8i16 VR128X:$src), 1)>;
1607 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1608 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1609 (v16i8 VR128X:$src), 1)>;
1612 let Predicates = [HasVLX, HasDQI] in {
1613 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1614 v4i64x_info, v2i64x_info>, VEX_W1X,
1615 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1616 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1617 v4f64x_info, v2f64x_info>, VEX_W1X,
1618 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1620 // Patterns for selects of bitcasted operations.
1621 def : Pat<(vselect VK4WM:$mask,
1622 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1623 (v4f64 immAllZerosV)),
1624 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1625 def : Pat<(vselect VK4WM:$mask,
1626 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1628 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1629 def : Pat<(vselect VK4WM:$mask,
1630 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1631 (v4i64 immAllZerosV)),
1632 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1633 def : Pat<(vselect VK4WM:$mask,
1634 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1636 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1639 let Predicates = [HasDQI] in {
1640 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1641 v8i64_info, v2i64x_info>, VEX_W,
1642 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1643 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1644 v16i32_info, v8i32x_info>,
1645 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1646 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1647 v8f64_info, v2f64x_info>, VEX_W,
1648 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1649 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1650 v16f32_info, v8f32x_info>,
1651 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1653 // Patterns for selects of bitcasted operations.
1654 def : Pat<(vselect VK16WM:$mask,
1655 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1656 (v16f32 immAllZerosV)),
1657 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1658 def : Pat<(vselect VK16WM:$mask,
1659 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1661 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1662 def : Pat<(vselect VK16WM:$mask,
1663 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1664 (v16i32 immAllZerosV)),
1665 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1666 def : Pat<(vselect VK16WM:$mask,
1667 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1669 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1671 def : Pat<(vselect VK8WM:$mask,
1672 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1673 (v8f64 immAllZerosV)),
1674 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1675 def : Pat<(vselect VK8WM:$mask,
1676 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1678 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1679 def : Pat<(vselect VK8WM:$mask,
1680 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1681 (v8i64 immAllZerosV)),
1682 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1683 def : Pat<(vselect VK8WM:$mask,
1684 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1686 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1689 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1690 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1691 let Predicates = [HasDQI] in
1692 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1693 WriteShuffle256Ld, _Dst.info512,
1694 _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1696 let Predicates = [HasDQI, HasVLX] in
1697 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1698 WriteShuffle256Ld, _Dst.info256,
1699 _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1703 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1704 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1705 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1707 let Predicates = [HasDQI, HasVLX] in
1708 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1709 WriteShuffleXLd, _Dst.info128,
1710 _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1714 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1715 avx512vl_i32_info, avx512vl_i64_info>;
1716 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1717 avx512vl_f32_info, avx512vl_f64_info>;
1719 //===----------------------------------------------------------------------===//
1720 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1722 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1723 X86VectorVTInfo _, RegisterClass KRC> {
1724 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1725 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1726 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1727 EVEX, Sched<[WriteShuffle]>;
1730 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1731 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1732 let Predicates = [HasCDI] in
1733 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1734 let Predicates = [HasCDI, HasVLX] in {
1735 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1736 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1740 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1741 avx512vl_i32_info, VK16>;
1742 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1743 avx512vl_i64_info, VK8>, VEX_W;
1745 //===----------------------------------------------------------------------===//
1746 // -- VPERMI2 - 3 source operands form --
1747 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1748 X86FoldableSchedWrite sched,
1749 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1750 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1751 hasSideEffects = 0 in {
1752 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1753 (ins _.RC:$src2, _.RC:$src3),
1754 OpcodeStr, "$src3, $src2", "$src2, $src3",
1755 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1756 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1759 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1760 (ins _.RC:$src2, _.MemOp:$src3),
1761 OpcodeStr, "$src3, $src2", "$src2, $src3",
1762 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1763 (_.VT (_.LdFrag addr:$src3)))), 1>,
1764 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1768 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1769 X86FoldableSchedWrite sched,
1770 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1771 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1772 hasSideEffects = 0, mayLoad = 1 in
1773 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1774 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1775 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1776 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1777 (_.VT (X86VPermt2 _.RC:$src2,
1778 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1779 AVX5128IBase, EVEX_4V, EVEX_B,
1780 Sched<[sched.Folded, sched.ReadAfterFold]>;
1783 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1784 X86FoldableSchedWrite sched,
1785 AVX512VLVectorVTInfo VTInfo,
1786 AVX512VLVectorVTInfo ShuffleMask> {
1787 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1788 ShuffleMask.info512>,
1789 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1790 ShuffleMask.info512>, EVEX_V512;
1791 let Predicates = [HasVLX] in {
1792 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1793 ShuffleMask.info128>,
1794 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1795 ShuffleMask.info128>, EVEX_V128;
1796 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1797 ShuffleMask.info256>,
1798 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1799 ShuffleMask.info256>, EVEX_V256;
1803 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1804 X86FoldableSchedWrite sched,
1805 AVX512VLVectorVTInfo VTInfo,
1806 AVX512VLVectorVTInfo Idx,
1808 let Predicates = [Prd] in
1809 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1810 Idx.info512>, EVEX_V512;
1811 let Predicates = [Prd, HasVLX] in {
1812 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1813 Idx.info128>, EVEX_V128;
1814 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1815 Idx.info256>, EVEX_V256;
1819 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1820 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1821 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1822 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1823 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1824 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1825 VEX_W, EVEX_CD8<16, CD8VF>;
1826 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1827 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1829 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1830 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1831 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1832 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1834 // Extra patterns to deal with extra bitcasts due to passthru and index being
1835 // different types on the fp versions.
1836 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1837 X86VectorVTInfo IdxVT,
1838 X86VectorVTInfo CastVT> {
1839 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1840 (X86VPermt2 (_.VT _.RC:$src2),
1841 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1842 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1843 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1844 _.RC:$src2, _.RC:$src3)>;
1845 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1846 (X86VPermt2 _.RC:$src2,
1847 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1848 (_.LdFrag addr:$src3)),
1849 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1850 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1851 _.RC:$src2, addr:$src3)>;
1852 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1853 (X86VPermt2 _.RC:$src2,
1854 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1855 (_.BroadcastLdFrag addr:$src3)),
1856 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1857 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1858 _.RC:$src2, addr:$src3)>;
1861 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1862 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1863 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1864 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1867 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1868 X86FoldableSchedWrite sched,
1869 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1870 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1871 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1872 (ins IdxVT.RC:$src2, _.RC:$src3),
1873 OpcodeStr, "$src3, $src2", "$src2, $src3",
1874 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1875 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1877 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1878 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1879 OpcodeStr, "$src3, $src2", "$src2, $src3",
1880 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1881 (_.LdFrag addr:$src3))), 1>,
1882 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1885 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1886 X86FoldableSchedWrite sched,
1887 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1888 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1889 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1890 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1891 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1892 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1893 (_.VT (X86VPermt2 _.RC:$src1,
1894 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1895 AVX5128IBase, EVEX_4V, EVEX_B,
1896 Sched<[sched.Folded, sched.ReadAfterFold]>;
1899 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1900 X86FoldableSchedWrite sched,
1901 AVX512VLVectorVTInfo VTInfo,
1902 AVX512VLVectorVTInfo ShuffleMask> {
1903 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1904 ShuffleMask.info512>,
1905 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1906 ShuffleMask.info512>, EVEX_V512;
1907 let Predicates = [HasVLX] in {
1908 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1909 ShuffleMask.info128>,
1910 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1911 ShuffleMask.info128>, EVEX_V128;
1912 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1913 ShuffleMask.info256>,
1914 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1915 ShuffleMask.info256>, EVEX_V256;
1919 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1920 X86FoldableSchedWrite sched,
1921 AVX512VLVectorVTInfo VTInfo,
1922 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1923 let Predicates = [Prd] in
1924 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1925 Idx.info512>, EVEX_V512;
1926 let Predicates = [Prd, HasVLX] in {
1927 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1928 Idx.info128>, EVEX_V128;
1929 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1930 Idx.info256>, EVEX_V256;
1934 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1935 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1936 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1937 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1938 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1939 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1940 VEX_W, EVEX_CD8<16, CD8VF>;
1941 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1942 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1944 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1945 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1946 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1947 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1949 //===----------------------------------------------------------------------===//
1950 // AVX-512 - BLEND using mask
1953 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1954 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1955 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1956 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1957 (ins _.RC:$src1, _.RC:$src2),
1958 !strconcat(OpcodeStr,
1959 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1960 EVEX_4V, Sched<[sched]>;
1961 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1962 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1963 !strconcat(OpcodeStr,
1964 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1965 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1966 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1967 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1968 !strconcat(OpcodeStr,
1969 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1970 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1971 let mayLoad = 1 in {
1972 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1973 (ins _.RC:$src1, _.MemOp:$src2),
1974 !strconcat(OpcodeStr,
1975 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1976 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1977 Sched<[sched.Folded, sched.ReadAfterFold]>;
1978 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1979 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1980 !strconcat(OpcodeStr,
1981 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1982 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1983 Sched<[sched.Folded, sched.ReadAfterFold]>;
1984 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1985 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1986 !strconcat(OpcodeStr,
1987 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1988 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1989 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1993 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1994 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1995 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1996 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1997 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1998 !strconcat(OpcodeStr,
1999 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2000 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2001 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2002 Sched<[sched.Folded, sched.ReadAfterFold]>;
2004 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2005 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2006 !strconcat(OpcodeStr,
2007 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2008 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2009 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2010 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2012 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2013 (ins _.RC:$src1, _.ScalarMemOp:$src2),
2014 !strconcat(OpcodeStr,
2015 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2016 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2017 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2018 Sched<[sched.Folded, sched.ReadAfterFold]>;
2022 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2023 AVX512VLVectorVTInfo VTInfo> {
2024 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2025 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2028 let Predicates = [HasVLX] in {
2029 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2030 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2032 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2033 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2038 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2039 AVX512VLVectorVTInfo VTInfo> {
2040 let Predicates = [HasBWI] in
2041 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2044 let Predicates = [HasBWI, HasVLX] in {
2045 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2047 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2052 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2054 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2055 avx512vl_f64_info>, VEX_W;
2056 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2058 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2059 avx512vl_i64_info>, VEX_W;
2060 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2062 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2063 avx512vl_i16_info>, VEX_W;
2065 //===----------------------------------------------------------------------===//
2066 // Compare Instructions
2067 //===----------------------------------------------------------------------===//
2069 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2071 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2072 PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2073 X86FoldableSchedWrite sched> {
2074 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2076 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2078 "$cc, $src2, $src1", "$src1, $src2, $cc",
2079 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2080 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2081 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
2083 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2085 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2087 "$cc, $src2, $src1", "$src1, $src2, $cc",
2088 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2090 (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2091 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2092 Sched<[sched.Folded, sched.ReadAfterFold]>;
2094 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2096 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2098 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2099 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2101 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2103 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2105 let isCodeGenOnly = 1 in {
2106 let isCommutable = 1 in
2107 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2108 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2109 !strconcat("vcmp", _.Suffix,
2110 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2111 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2114 EVEX_4V, VEX_LIG, Sched<[sched]>;
2115 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2117 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2118 !strconcat("vcmp", _.Suffix,
2119 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2120 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2121 (_.ScalarLdFrag addr:$src2),
2123 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2124 Sched<[sched.Folded, sched.ReadAfterFold]>;
2128 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2129 (X86cmpms node:$src1, node:$src2, node:$cc), [{
2130 return N->hasOneUse();
2132 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2133 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2134 return N->hasOneUse();
2137 let Predicates = [HasAVX512] in {
2138 let ExeDomain = SSEPackedSingle in
2139 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2140 X86cmpms_su, X86cmpmsSAE_su,
2141 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2142 let ExeDomain = SSEPackedDouble in
2143 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2144 X86cmpms_su, X86cmpmsSAE_su,
2145 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2148 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2149 X86FoldableSchedWrite sched,
2150 X86VectorVTInfo _, bit IsCommutable> {
2151 let isCommutable = IsCommutable, hasSideEffects = 0 in
2152 def rr : AVX512BI<opc, MRMSrcReg,
2153 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2154 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2155 []>, EVEX_4V, Sched<[sched]>;
2156 let mayLoad = 1, hasSideEffects = 0 in
2157 def rm : AVX512BI<opc, MRMSrcMem,
2158 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2159 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2160 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2161 let isCommutable = IsCommutable, hasSideEffects = 0 in
2162 def rrk : AVX512BI<opc, MRMSrcReg,
2163 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2164 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2165 "$dst {${mask}}, $src1, $src2}"),
2166 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2167 let mayLoad = 1, hasSideEffects = 0 in
2168 def rmk : AVX512BI<opc, MRMSrcMem,
2169 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2170 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2171 "$dst {${mask}}, $src1, $src2}"),
2172 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2175 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2176 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2178 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2179 let mayLoad = 1, hasSideEffects = 0 in {
2180 def rmb : AVX512BI<opc, MRMSrcMem,
2181 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2182 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2183 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2184 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2185 def rmbk : AVX512BI<opc, MRMSrcMem,
2186 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2187 _.ScalarMemOp:$src2),
2188 !strconcat(OpcodeStr,
2189 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2190 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2191 []>, EVEX_4V, EVEX_K, EVEX_B,
2192 Sched<[sched.Folded, sched.ReadAfterFold]>;
2196 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2197 X86SchedWriteWidths sched,
2198 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2199 bit IsCommutable = 0> {
2200 let Predicates = [prd] in
2201 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2202 VTInfo.info512, IsCommutable>, EVEX_V512;
2204 let Predicates = [prd, HasVLX] in {
2205 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2206 VTInfo.info256, IsCommutable>, EVEX_V256;
2207 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2208 VTInfo.info128, IsCommutable>, EVEX_V128;
2212 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2213 X86SchedWriteWidths sched,
2214 AVX512VLVectorVTInfo VTInfo,
2215 Predicate prd, bit IsCommutable = 0> {
2216 let Predicates = [prd] in
2217 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2218 VTInfo.info512, IsCommutable>, EVEX_V512;
2220 let Predicates = [prd, HasVLX] in {
2221 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2222 VTInfo.info256, IsCommutable>, EVEX_V256;
2223 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2224 VTInfo.info128, IsCommutable>, EVEX_V128;
2228 // This fragment treats X86cmpm as commutable to help match loads in both
2229 // operands for PCMPEQ.
2230 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2231 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2232 (setcc node:$src1, node:$src2, SETGT)>;
2234 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2235 // increase the pattern complexity the way an immediate would.
2236 let AddedComplexity = 2 in {
2237 // FIXME: Is there a better scheduler class for VPCMP?
2238 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2239 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2240 EVEX_CD8<8, CD8VF>, VEX_WIG;
2242 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2243 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2244 EVEX_CD8<16, CD8VF>, VEX_WIG;
2246 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2247 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2248 EVEX_CD8<32, CD8VF>;
2250 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2251 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2252 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2254 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2255 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2256 EVEX_CD8<8, CD8VF>, VEX_WIG;
2258 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2259 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2260 EVEX_CD8<16, CD8VF>, VEX_WIG;
2262 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2263 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2264 EVEX_CD8<32, CD8VF>;
2266 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2267 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2268 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2271 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2272 PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2273 X86FoldableSchedWrite sched,
2274 X86VectorVTInfo _, string Name> {
2275 let isCommutable = 1 in
2276 def rri : AVX512AIi8<opc, MRMSrcReg,
2277 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2278 !strconcat("vpcmp", Suffix,
2279 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2280 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2283 EVEX_4V, Sched<[sched]>;
2284 def rmi : AVX512AIi8<opc, MRMSrcMem,
2285 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2286 !strconcat("vpcmp", Suffix,
2287 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2288 [(set _.KRC:$dst, (_.KVT
2291 (_.VT (_.LdFrag addr:$src2)),
2293 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2294 let isCommutable = 1 in
2295 def rrik : AVX512AIi8<opc, MRMSrcReg,
2296 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2298 !strconcat("vpcmp", Suffix,
2299 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2300 "$dst {${mask}}, $src1, $src2, $cc}"),
2301 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2302 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2305 EVEX_4V, EVEX_K, Sched<[sched]>;
2306 def rmik : AVX512AIi8<opc, MRMSrcMem,
2307 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2309 !strconcat("vpcmp", Suffix,
2310 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2311 "$dst {${mask}}, $src1, $src2, $cc}"),
2312 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2316 (_.VT (_.LdFrag addr:$src2)),
2318 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2320 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2321 (_.VT _.RC:$src1), cond)),
2322 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2323 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2325 def : Pat<(and _.KRCWM:$mask,
2326 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2327 (_.VT _.RC:$src1), cond))),
2328 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2329 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2330 (CommFrag.OperandTransform $cc))>;
2333 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2334 PatFrag Frag_su, PatFrag CommFrag,
2335 PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2336 X86VectorVTInfo _, string Name> :
2337 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2339 def rmib : AVX512AIi8<opc, MRMSrcMem,
2340 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2342 !strconcat("vpcmp", Suffix,
2343 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2344 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2345 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2347 (_.BroadcastLdFrag addr:$src2),
2349 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2350 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2351 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2352 _.ScalarMemOp:$src2, u8imm:$cc),
2353 !strconcat("vpcmp", Suffix,
2354 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2355 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2356 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2359 (_.BroadcastLdFrag addr:$src2),
2361 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2363 def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2),
2364 (_.VT _.RC:$src1), cond)),
2365 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2366 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2368 def : Pat<(and _.KRCWM:$mask,
2369 (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2),
2370 (_.VT _.RC:$src1), cond))),
2371 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2372 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2373 (CommFrag_su.OperandTransform $cc))>;
2376 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2377 PatFrag Frag_su, PatFrag CommFrag,
2378 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2379 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2380 let Predicates = [prd] in
2381 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2382 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2384 let Predicates = [prd, HasVLX] in {
2385 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2386 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2387 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2388 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2392 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2393 PatFrag Frag_su, PatFrag CommFrag,
2394 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2395 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2396 let Predicates = [prd] in
2397 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2398 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2400 let Predicates = [prd, HasVLX] in {
2401 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2402 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2403 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2404 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2408 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2409 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2410 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2411 return getI8Imm(SSECC, SDLoc(N));
2414 // Swapped operand version of the above.
2415 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2416 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2417 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2418 SSECC = X86::getSwappedVPCMPImm(SSECC);
2419 return getI8Imm(SSECC, SDLoc(N));
2422 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2423 (setcc node:$src1, node:$src2, node:$cc), [{
2424 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2425 return !ISD::isUnsignedIntSetCC(CC);
2428 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2429 (setcc node:$src1, node:$src2, node:$cc), [{
2430 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2431 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2434 // Same as above, but commutes immediate. Use for load folding.
2435 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2436 (setcc node:$src1, node:$src2, node:$cc), [{
2437 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2438 return !ISD::isUnsignedIntSetCC(CC);
2439 }], X86pcmpm_imm_commute>;
2441 def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2442 (setcc node:$src1, node:$src2, node:$cc), [{
2443 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2444 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2445 }], X86pcmpm_imm_commute>;
2447 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2448 (setcc node:$src1, node:$src2, node:$cc), [{
2449 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2450 return ISD::isUnsignedIntSetCC(CC);
2453 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2454 (setcc node:$src1, node:$src2, node:$cc), [{
2455 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2456 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2459 // Same as above, but commutes immediate. Use for load folding.
2460 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2461 (setcc node:$src1, node:$src2, node:$cc), [{
2462 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2463 return ISD::isUnsignedIntSetCC(CC);
2464 }], X86pcmpm_imm_commute>;
2466 def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2467 (setcc node:$src1, node:$src2, node:$cc), [{
2468 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2469 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2470 }], X86pcmpm_imm_commute>;
2472 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2473 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2474 X86pcmpm_commute, X86pcmpm_commute_su,
2475 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2477 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2478 X86pcmpum_commute, X86pcmpum_commute_su,
2479 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2482 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2483 X86pcmpm_commute, X86pcmpm_commute_su,
2484 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2485 VEX_W, EVEX_CD8<16, CD8VF>;
2486 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2487 X86pcmpum_commute, X86pcmpum_commute_su,
2488 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2489 VEX_W, EVEX_CD8<16, CD8VF>;
2491 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2492 X86pcmpm_commute, X86pcmpm_commute_su,
2493 SchedWriteVecALU, avx512vl_i32_info,
2494 HasAVX512>, EVEX_CD8<32, CD8VF>;
2495 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2496 X86pcmpum_commute, X86pcmpum_commute_su,
2497 SchedWriteVecALU, avx512vl_i32_info,
2498 HasAVX512>, EVEX_CD8<32, CD8VF>;
2500 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2501 X86pcmpm_commute, X86pcmpm_commute_su,
2502 SchedWriteVecALU, avx512vl_i64_info,
2503 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2504 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2505 X86pcmpum_commute, X86pcmpum_commute_su,
2506 SchedWriteVecALU, avx512vl_i64_info,
2507 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2509 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2510 (X86cmpm node:$src1, node:$src2, node:$cc), [{
2511 return N->hasOneUse();
2513 def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2514 (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2515 return N->hasOneUse();
2518 def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2519 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2520 return getI8Imm(Imm, SDLoc(N));
2523 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2525 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2526 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2528 "$cc, $src2, $src1", "$src1, $src2, $cc",
2529 (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2530 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2533 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2534 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2536 "$cc, $src2, $src1", "$src1, $src2, $cc",
2537 (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2539 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2541 Sched<[sched.Folded, sched.ReadAfterFold]>;
2543 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2545 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2547 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2548 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2549 (X86cmpm (_.VT _.RC:$src1),
2550 (_.VT (_.BroadcastLdFrag addr:$src2)),
2552 (X86cmpm_su (_.VT _.RC:$src1),
2553 (_.VT (_.BroadcastLdFrag addr:$src2)),
2555 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2557 // Patterns for selecting with loads in other operand.
2558 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2560 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2561 (X86cmpm_imm_commute timm:$cc))>;
2563 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2566 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2567 _.RC:$src1, addr:$src2,
2568 (X86cmpm_imm_commute timm:$cc))>;
2570 def : Pat<(X86cmpm (_.BroadcastLdFrag addr:$src2),
2571 (_.VT _.RC:$src1), timm:$cc),
2572 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2573 (X86cmpm_imm_commute timm:$cc))>;
2575 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2578 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2579 _.RC:$src1, addr:$src2,
2580 (X86cmpm_imm_commute timm:$cc))>;
2583 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2584 // comparison code form (VCMP[EQ/LT/LE/...]
2585 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2586 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2588 "$cc, {sae}, $src2, $src1",
2589 "$src1, $src2, {sae}, $cc",
2590 (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2591 (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2593 EVEX_B, Sched<[sched]>;
2596 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2597 let Predicates = [HasAVX512] in {
2598 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2599 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2602 let Predicates = [HasAVX512,HasVLX] in {
2603 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2604 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2608 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2609 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2610 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2611 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2613 // Patterns to select fp compares with load as first operand.
2614 let Predicates = [HasAVX512] in {
2615 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2617 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2619 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2621 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2624 // ----------------------------------------------------------------
2627 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2628 (X86Vfpclasss node:$src1, node:$src2), [{
2629 return N->hasOneUse();
2632 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2633 (X86Vfpclass node:$src1, node:$src2), [{
2634 return N->hasOneUse();
2637 //handle fpclass instruction mask = op(reg_scalar,imm)
2638 // op(mem_scalar,imm)
2639 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2640 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2642 let Predicates = [prd], ExeDomain = _.ExeDomain in {
2643 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2644 (ins _.RC:$src1, i32u8imm:$src2),
2645 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2646 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2647 (i32 timm:$src2)))]>,
2649 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2650 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2651 OpcodeStr##_.Suffix#
2652 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2653 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2654 (X86Vfpclasss_su (_.VT _.RC:$src1),
2655 (i32 timm:$src2))))]>,
2656 EVEX_K, Sched<[sched]>;
2657 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2658 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2659 OpcodeStr##_.Suffix##
2660 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2662 (X86Vfpclasss _.ScalarIntMemCPat:$src1,
2663 (i32 timm:$src2)))]>,
2664 Sched<[sched.Folded, sched.ReadAfterFold]>;
2665 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2666 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2667 OpcodeStr##_.Suffix##
2668 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2669 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2670 (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
2671 (i32 timm:$src2))))]>,
2672 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2676 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2677 // fpclass(reg_vec, mem_vec, imm)
2678 // fpclass(reg_vec, broadcast(eltVt), imm)
2679 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2680 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2682 let ExeDomain = _.ExeDomain in {
2683 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2684 (ins _.RC:$src1, i32u8imm:$src2),
2685 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2686 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2687 (i32 timm:$src2)))]>,
2689 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2690 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2691 OpcodeStr##_.Suffix#
2692 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2693 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2694 (X86Vfpclass_su (_.VT _.RC:$src1),
2695 (i32 timm:$src2))))]>,
2696 EVEX_K, Sched<[sched]>;
2697 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2698 (ins _.MemOp:$src1, i32u8imm:$src2),
2699 OpcodeStr##_.Suffix#"{"#mem#"}"#
2700 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2701 [(set _.KRC:$dst,(X86Vfpclass
2702 (_.VT (_.LdFrag addr:$src1)),
2703 (i32 timm:$src2)))]>,
2704 Sched<[sched.Folded, sched.ReadAfterFold]>;
2705 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2706 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2707 OpcodeStr##_.Suffix#"{"#mem#"}"#
2708 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2709 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2710 (_.VT (_.LdFrag addr:$src1)),
2711 (i32 timm:$src2))))]>,
2712 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2713 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2714 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2715 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2716 _.BroadcastStr##", $dst|$dst, ${src1}"
2717 ##_.BroadcastStr##", $src2}",
2718 [(set _.KRC:$dst,(X86Vfpclass
2719 (_.VT (_.BroadcastLdFrag addr:$src1)),
2720 (i32 timm:$src2)))]>,
2721 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2722 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2723 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2724 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2725 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2726 _.BroadcastStr##", $src2}",
2727 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2728 (_.VT (_.BroadcastLdFrag addr:$src1)),
2729 (i32 timm:$src2))))]>,
2730 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2733 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2735 def : InstAlias<OpcodeStr#_.Suffix#mem#
2736 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2737 (!cast<Instruction>(NAME#"rr")
2738 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2739 def : InstAlias<OpcodeStr#_.Suffix#mem#
2740 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2741 (!cast<Instruction>(NAME#"rrk")
2742 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2743 def : InstAlias<OpcodeStr#_.Suffix#mem#
2744 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2745 _.BroadcastStr#", $src2}",
2746 (!cast<Instruction>(NAME#"rmb")
2747 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2748 def : InstAlias<OpcodeStr#_.Suffix#mem#
2749 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2750 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2751 (!cast<Instruction>(NAME#"rmbk")
2752 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2755 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2756 bits<8> opc, X86SchedWriteWidths sched,
2758 let Predicates = [prd] in {
2759 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2760 _.info512, "z">, EVEX_V512;
2762 let Predicates = [prd, HasVLX] in {
2763 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2764 _.info128, "x">, EVEX_V128;
2765 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2766 _.info256, "y">, EVEX_V256;
2770 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2771 bits<8> opcScalar, X86SchedWriteWidths sched,
2773 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2775 EVEX_CD8<32, CD8VF>;
2776 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2778 EVEX_CD8<64, CD8VF> , VEX_W;
2779 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2780 sched.Scl, f32x_info, prd>, VEX_LIG,
2781 EVEX_CD8<32, CD8VT1>;
2782 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2783 sched.Scl, f64x_info, prd>, VEX_LIG,
2784 EVEX_CD8<64, CD8VT1>, VEX_W;
2787 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2788 HasDQI>, AVX512AIi8Base, EVEX;
2790 //-----------------------------------------------------------------
2791 // Mask register copy, including
2792 // - copy between mask registers
2793 // - load/store mask registers
2794 // - copy from GPR to mask register and vice versa
2796 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2797 string OpcodeStr, RegisterClass KRC,
2798 ValueType vvt, X86MemOperand x86memop> {
2799 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2800 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2801 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2803 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2804 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2805 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2807 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2808 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2809 [(store KRC:$src, addr:$dst)]>,
2810 Sched<[WriteStore]>;
2813 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2815 RegisterClass KRC, RegisterClass GRC> {
2816 let hasSideEffects = 0 in {
2817 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2818 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2820 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2821 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2826 let Predicates = [HasDQI] in
2827 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2828 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2831 let Predicates = [HasAVX512] in
2832 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2833 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2836 let Predicates = [HasBWI] in {
2837 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2839 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2841 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2843 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2847 // GR from/to mask register
2848 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2849 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2850 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2851 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2853 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2854 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2855 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2856 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2858 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2859 (KMOVWrk VK16:$src)>;
2860 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2861 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2862 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2863 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2864 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2865 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2867 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2868 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2869 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2870 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2871 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2872 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2873 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2874 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2876 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2877 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2878 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2879 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2880 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2881 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2882 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2883 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2886 let Predicates = [HasDQI] in {
2887 def : Pat<(store VK1:$src, addr:$dst),
2888 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2890 def : Pat<(v1i1 (load addr:$src)),
2891 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2892 def : Pat<(v2i1 (load addr:$src)),
2893 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2894 def : Pat<(v4i1 (load addr:$src)),
2895 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2898 let Predicates = [HasAVX512] in {
2899 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2900 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2901 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2902 (KMOVWkm addr:$src)>;
2905 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2906 SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2907 SDTCVecEltisVT<1, i1>,
2910 let Predicates = [HasAVX512] in {
2911 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2912 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2913 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2915 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2916 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2918 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2919 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2921 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2922 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2925 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2926 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2927 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2928 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2929 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2930 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2931 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2933 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2934 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2937 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2941 // Mask unary operation
2943 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2944 RegisterClass KRC, SDPatternOperator OpNode,
2945 X86FoldableSchedWrite sched, Predicate prd> {
2946 let Predicates = [prd] in
2947 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2948 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2949 [(set KRC:$dst, (OpNode KRC:$src))]>,
2953 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2954 SDPatternOperator OpNode,
2955 X86FoldableSchedWrite sched> {
2956 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2957 sched, HasDQI>, VEX, PD;
2958 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2959 sched, HasAVX512>, VEX, PS;
2960 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2961 sched, HasBWI>, VEX, PD, VEX_W;
2962 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2963 sched, HasBWI>, VEX, PS, VEX_W;
2966 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2967 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2969 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2970 let Predicates = [HasAVX512, NoDQI] in
2971 def : Pat<(vnot VK8:$src),
2972 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2974 def : Pat<(vnot VK4:$src),
2975 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2976 def : Pat<(vnot VK2:$src),
2977 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2979 // Mask binary operation
2980 // - KAND, KANDN, KOR, KXNOR, KXOR
2981 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2982 RegisterClass KRC, SDPatternOperator OpNode,
2983 X86FoldableSchedWrite sched, Predicate prd,
2985 let Predicates = [prd], isCommutable = IsCommutable in
2986 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2987 !strconcat(OpcodeStr,
2988 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2989 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2993 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2994 SDPatternOperator OpNode,
2995 X86FoldableSchedWrite sched, bit IsCommutable,
2996 Predicate prdW = HasAVX512> {
2997 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2998 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2999 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3000 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3001 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3002 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3003 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3004 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3007 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
3008 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3009 // These nodes use 'vnot' instead of 'not' to support vectors.
3010 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3011 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3013 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3014 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
3015 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
3016 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
3017 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
3018 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
3019 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3021 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3023 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3024 // for the DQI set, this type is legal and KxxxB instruction is used
3025 let Predicates = [NoDQI] in
3026 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3028 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3029 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3031 // All types smaller than 8 bits require conversion anyway
3032 def : Pat<(OpNode VK1:$src1, VK1:$src2),
3033 (COPY_TO_REGCLASS (Inst
3034 (COPY_TO_REGCLASS VK1:$src1, VK16),
3035 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3036 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3037 (COPY_TO_REGCLASS (Inst
3038 (COPY_TO_REGCLASS VK2:$src1, VK16),
3039 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3040 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3041 (COPY_TO_REGCLASS (Inst
3042 (COPY_TO_REGCLASS VK4:$src1, VK16),
3043 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3046 defm : avx512_binop_pat<and, and, KANDWrr>;
3047 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3048 defm : avx512_binop_pat<or, or, KORWrr>;
3049 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3050 defm : avx512_binop_pat<xor, xor, KXORWrr>;
3053 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3054 X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3056 let Predicates = [prd] in {
3057 let hasSideEffects = 0 in
3058 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3059 (ins Src.KRC:$src1, Src.KRC:$src2),
3060 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3061 VEX_4V, VEX_L, Sched<[sched]>;
3063 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3064 (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
3068 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
3069 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3070 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3073 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3074 SDNode OpNode, X86FoldableSchedWrite sched,
3076 let Predicates = [prd], Defs = [EFLAGS] in
3077 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3078 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3079 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3083 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3084 X86FoldableSchedWrite sched,
3085 Predicate prdW = HasAVX512> {
3086 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3088 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3090 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3092 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3096 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3097 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3098 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3101 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3102 SDNode OpNode, X86FoldableSchedWrite sched> {
3103 let Predicates = [HasAVX512] in
3104 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3105 !strconcat(OpcodeStr,
3106 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3107 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3111 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3112 SDNode OpNode, X86FoldableSchedWrite sched> {
3113 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3114 sched>, VEX, TAPD, VEX_W;
3115 let Predicates = [HasDQI] in
3116 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3118 let Predicates = [HasBWI] in {
3119 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3120 sched>, VEX, TAPD, VEX_W;
3121 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3126 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3127 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3129 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3130 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3132 X86VectorVTInfo Narrow,
3133 X86VectorVTInfo Wide> {
3134 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3135 (Narrow.VT Narrow.RC:$src2), cond)),
3137 (!cast<Instruction>(InstStr#"Zrri")
3138 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3139 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3140 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3142 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3143 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3144 (Narrow.VT Narrow.RC:$src2),
3146 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3147 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3148 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3149 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3150 (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3153 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3154 PatFrag CommFrag, PatFrag CommFrag_su,
3156 X86VectorVTInfo Narrow,
3157 X86VectorVTInfo Wide> {
3159 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3160 (Narrow.BroadcastLdFrag addr:$src2), cond)),
3162 (!cast<Instruction>(InstStr#"Zrmib")
3163 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3164 addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
3166 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3168 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3169 (Narrow.BroadcastLdFrag addr:$src2),
3171 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3172 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3173 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3174 addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3176 // Commuted with broadcast load.
3177 def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3178 (Narrow.VT Narrow.RC:$src1),
3181 (!cast<Instruction>(InstStr#"Zrmib")
3182 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3183 addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
3185 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3187 (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3188 (Narrow.VT Narrow.RC:$src1),
3190 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3191 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3192 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3193 addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
3196 // Same as above, but for fp types which don't use PatFrags.
3197 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3198 X86VectorVTInfo Narrow,
3199 X86VectorVTInfo Wide> {
3200 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3201 (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3203 (!cast<Instruction>(InstStr#"Zrri")
3204 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3205 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3206 timm:$cc), Narrow.KRC)>;
3208 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3209 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3210 (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3211 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3212 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3213 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3214 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3215 timm:$cc), Narrow.KRC)>;
3218 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3219 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3221 (!cast<Instruction>(InstStr#"Zrmbi")
3222 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3223 addr:$src2, timm:$cc), Narrow.KRC)>;
3225 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3226 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3227 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3228 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3229 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3230 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3231 addr:$src2, timm:$cc), Narrow.KRC)>;
3233 // Commuted with broadcast load.
3234 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3235 (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3237 (!cast<Instruction>(InstStr#"Zrmbi")
3238 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3239 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3241 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3242 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3243 (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3244 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3245 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3246 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3247 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3250 let Predicates = [HasAVX512, NoVLX] in {
3251 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3252 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3254 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3255 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3257 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3258 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3260 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3261 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3263 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
3264 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3266 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
3267 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3269 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3270 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3272 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3273 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3275 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3276 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3277 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3278 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3281 let Predicates = [HasBWI, NoVLX] in {
3282 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3283 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3285 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3286 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3288 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3289 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3291 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3292 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3295 // Mask setting all 0s or 1s
3296 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3297 let Predicates = [HasAVX512] in
3298 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3299 SchedRW = [WriteZero] in
3300 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3301 [(set KRC:$dst, (VT Val))]>;
3304 multiclass avx512_mask_setop_w<PatFrag Val> {
3305 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3306 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3307 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3310 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3311 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3313 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3314 let Predicates = [HasAVX512] in {
3315 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3316 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3317 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3318 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3319 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3320 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3321 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3322 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3325 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3326 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3327 RegisterClass RC, ValueType VT> {
3328 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3329 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3331 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3332 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3334 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3335 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3336 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3337 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3338 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3339 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3341 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3342 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3343 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3344 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3345 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3347 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3348 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3349 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3350 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3352 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3353 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3354 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3356 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3357 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3359 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3361 //===----------------------------------------------------------------------===//
3362 // AVX-512 - Aligned and unaligned load and store
3365 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3366 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3367 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3368 bit NoRMPattern = 0,
3369 SDPatternOperator SelectOprr = vselect> {
3370 let hasSideEffects = 0 in {
3371 let isMoveReg = 1 in
3372 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3373 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3374 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3375 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3376 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3377 (ins _.KRCWM:$mask, _.RC:$src),
3378 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3379 "${dst} {${mask}} {z}, $src}"),
3380 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3382 _.ImmAllZerosV)))], _.ExeDomain>,
3383 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3385 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3386 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3387 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3388 !if(NoRMPattern, [],
3390 (_.VT (ld_frag addr:$src)))]),
3391 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3392 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3394 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3395 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3396 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3397 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3398 "${dst} {${mask}}, $src1}"),
3399 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3401 (_.VT _.RC:$src0))))], _.ExeDomain>,
3402 EVEX, EVEX_K, Sched<[Sched.RR]>;
3403 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3404 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3405 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3406 "${dst} {${mask}}, $src1}"),
3407 [(set _.RC:$dst, (_.VT
3408 (vselect _.KRCWM:$mask,
3409 (_.VT (ld_frag addr:$src1)),
3410 (_.VT _.RC:$src0))))], _.ExeDomain>,
3411 EVEX, EVEX_K, Sched<[Sched.RM]>;
3413 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3414 (ins _.KRCWM:$mask, _.MemOp:$src),
3415 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3416 "${dst} {${mask}} {z}, $src}",
3417 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3418 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3419 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3421 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3422 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3424 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3425 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3427 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3428 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3429 _.KRCWM:$mask, addr:$ptr)>;
3432 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3433 AVX512VLVectorVTInfo _, Predicate prd,
3434 X86SchedWriteMoveLSWidths Sched,
3435 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3436 let Predicates = [prd] in
3437 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3438 _.info512.AlignedLdFrag, masked_load_aligned,
3439 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3441 let Predicates = [prd, HasVLX] in {
3442 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3443 _.info256.AlignedLdFrag, masked_load_aligned,
3444 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3445 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3446 _.info128.AlignedLdFrag, masked_load_aligned,
3447 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3451 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3452 AVX512VLVectorVTInfo _, Predicate prd,
3453 X86SchedWriteMoveLSWidths Sched,
3454 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3455 SDPatternOperator SelectOprr = vselect> {
3456 let Predicates = [prd] in
3457 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3458 masked_load, Sched.ZMM, "",
3459 NoRMPattern, SelectOprr>, EVEX_V512;
3461 let Predicates = [prd, HasVLX] in {
3462 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3463 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3464 NoRMPattern, SelectOprr>, EVEX_V256;
3465 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3466 masked_load, Sched.XMM, EVEX2VEXOvrd,
3467 NoRMPattern, SelectOprr>, EVEX_V128;
3471 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3472 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3473 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3474 bit NoMRPattern = 0> {
3475 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3476 let isMoveReg = 1 in
3477 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3478 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3479 [], _.ExeDomain>, EVEX,
3480 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3481 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3482 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3483 (ins _.KRCWM:$mask, _.RC:$src),
3484 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3485 "${dst} {${mask}}, $src}",
3486 [], _.ExeDomain>, EVEX, EVEX_K,
3487 FoldGenData<BaseName#_.ZSuffix#rrk>,
3489 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3490 (ins _.KRCWM:$mask, _.RC:$src),
3491 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3492 "${dst} {${mask}} {z}, $src}",
3493 [], _.ExeDomain>, EVEX, EVEX_KZ,
3494 FoldGenData<BaseName#_.ZSuffix#rrkz>,
3498 let hasSideEffects = 0, mayStore = 1 in
3499 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3500 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3501 !if(NoMRPattern, [],
3502 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3503 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3504 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3505 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3506 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3507 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3508 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3511 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3512 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3513 _.KRCWM:$mask, _.RC:$src)>;
3515 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3516 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3517 _.RC:$dst, _.RC:$src), 0>;
3518 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3519 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3520 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3521 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3522 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3523 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3526 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3527 AVX512VLVectorVTInfo _, Predicate prd,
3528 X86SchedWriteMoveLSWidths Sched,
3529 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3530 let Predicates = [prd] in
3531 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3532 masked_store, Sched.ZMM, "",
3533 NoMRPattern>, EVEX_V512;
3534 let Predicates = [prd, HasVLX] in {
3535 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3536 masked_store, Sched.YMM,
3537 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3538 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3539 masked_store, Sched.XMM, EVEX2VEXOvrd,
3540 NoMRPattern>, EVEX_V128;
3544 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3545 AVX512VLVectorVTInfo _, Predicate prd,
3546 X86SchedWriteMoveLSWidths Sched,
3547 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3548 let Predicates = [prd] in
3549 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3550 masked_store_aligned, Sched.ZMM, "",
3551 NoMRPattern>, EVEX_V512;
3553 let Predicates = [prd, HasVLX] in {
3554 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3555 masked_store_aligned, Sched.YMM,
3556 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3557 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3558 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3559 NoMRPattern>, EVEX_V128;
3563 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3564 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3565 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3566 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3567 PS, EVEX_CD8<32, CD8VF>;
3569 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3570 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3571 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3572 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3573 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3575 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3576 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3577 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3578 SchedWriteFMoveLS, "VMOVUPS">,
3579 PS, EVEX_CD8<32, CD8VF>;
3581 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3582 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3583 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3584 SchedWriteFMoveLS, "VMOVUPD">,
3585 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3587 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3588 HasAVX512, SchedWriteVecMoveLS,
3590 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3591 HasAVX512, SchedWriteVecMoveLS,
3593 PD, EVEX_CD8<32, CD8VF>;
3595 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3596 HasAVX512, SchedWriteVecMoveLS,
3598 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3599 HasAVX512, SchedWriteVecMoveLS,
3601 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3603 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3604 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3605 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3606 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3607 XD, EVEX_CD8<8, CD8VF>;
3609 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3610 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3611 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3612 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3613 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3615 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3616 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3617 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3618 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3619 XS, EVEX_CD8<32, CD8VF>;
3621 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3622 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3623 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3624 SchedWriteVecMoveLS, "VMOVDQU">,
3625 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3627 // Special instructions to help with spilling when we don't have VLX. We need
3628 // to load or store from a ZMM register instead. These are converted in
3629 // expandPostRAPseudos.
3630 let isReMaterializable = 1, canFoldAsLoad = 1,
3631 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3632 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3633 "", []>, Sched<[WriteFLoadX]>;
3634 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3635 "", []>, Sched<[WriteFLoadY]>;
3636 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3637 "", []>, Sched<[WriteFLoadX]>;
3638 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3639 "", []>, Sched<[WriteFLoadY]>;
3642 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3643 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3644 "", []>, Sched<[WriteFStoreX]>;
3645 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3646 "", []>, Sched<[WriteFStoreY]>;
3647 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3648 "", []>, Sched<[WriteFStoreX]>;
3649 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3650 "", []>, Sched<[WriteFStoreY]>;
3653 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3654 (v8i64 VR512:$src))),
3655 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3658 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3659 (v16i32 VR512:$src))),
3660 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3662 // These patterns exist to prevent the above patterns from introducing a second
3663 // mask inversion when one already exists.
3664 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3665 (v8i64 immAllZerosV),
3666 (v8i64 VR512:$src))),
3667 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3668 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3669 (v16i32 immAllZerosV),
3670 (v16i32 VR512:$src))),
3671 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3673 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3674 X86VectorVTInfo Wide> {
3675 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3676 Narrow.RC:$src1, Narrow.RC:$src0)),
3679 (!cast<Instruction>(InstrStr#"rrk")
3680 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3681 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3682 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3685 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3686 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3689 (!cast<Instruction>(InstrStr#"rrkz")
3690 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3691 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3695 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3696 // available. Use a 512-bit operation and extract.
3697 let Predicates = [HasAVX512, NoVLX] in {
3698 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3699 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3700 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3701 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3703 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3704 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3705 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3706 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3709 let Predicates = [HasBWI, NoVLX] in {
3710 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3711 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3713 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3714 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3717 let Predicates = [HasAVX512] in {
3719 def : Pat<(alignedloadv16i32 addr:$src),
3720 (VMOVDQA64Zrm addr:$src)>;
3721 def : Pat<(alignedloadv32i16 addr:$src),
3722 (VMOVDQA64Zrm addr:$src)>;
3723 def : Pat<(alignedloadv64i8 addr:$src),
3724 (VMOVDQA64Zrm addr:$src)>;
3725 def : Pat<(loadv16i32 addr:$src),
3726 (VMOVDQU64Zrm addr:$src)>;
3727 def : Pat<(loadv32i16 addr:$src),
3728 (VMOVDQU64Zrm addr:$src)>;
3729 def : Pat<(loadv64i8 addr:$src),
3730 (VMOVDQU64Zrm addr:$src)>;
3733 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3734 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3735 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3736 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3737 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3738 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3739 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3740 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3741 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3742 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3743 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3744 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3747 let Predicates = [HasVLX] in {
3749 def : Pat<(alignedloadv4i32 addr:$src),
3750 (VMOVDQA64Z128rm addr:$src)>;
3751 def : Pat<(alignedloadv8i16 addr:$src),
3752 (VMOVDQA64Z128rm addr:$src)>;
3753 def : Pat<(alignedloadv16i8 addr:$src),
3754 (VMOVDQA64Z128rm addr:$src)>;
3755 def : Pat<(loadv4i32 addr:$src),
3756 (VMOVDQU64Z128rm addr:$src)>;
3757 def : Pat<(loadv8i16 addr:$src),
3758 (VMOVDQU64Z128rm addr:$src)>;
3759 def : Pat<(loadv16i8 addr:$src),
3760 (VMOVDQU64Z128rm addr:$src)>;
3763 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3764 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3765 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3766 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3767 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3768 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3769 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3770 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3771 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3772 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3773 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3774 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3777 def : Pat<(alignedloadv8i32 addr:$src),
3778 (VMOVDQA64Z256rm addr:$src)>;
3779 def : Pat<(alignedloadv16i16 addr:$src),
3780 (VMOVDQA64Z256rm addr:$src)>;
3781 def : Pat<(alignedloadv32i8 addr:$src),
3782 (VMOVDQA64Z256rm addr:$src)>;
3783 def : Pat<(loadv8i32 addr:$src),
3784 (VMOVDQU64Z256rm addr:$src)>;
3785 def : Pat<(loadv16i16 addr:$src),
3786 (VMOVDQU64Z256rm addr:$src)>;
3787 def : Pat<(loadv32i8 addr:$src),
3788 (VMOVDQU64Z256rm addr:$src)>;
3791 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3792 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3793 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3794 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3795 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3796 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3797 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3798 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3799 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3800 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3801 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3802 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3805 // Move Int Doubleword to Packed Double Int
3807 let ExeDomain = SSEPackedInt in {
3808 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3809 "vmovd\t{$src, $dst|$dst, $src}",
3811 (v4i32 (scalar_to_vector GR32:$src)))]>,
3812 EVEX, Sched<[WriteVecMoveFromGpr]>;
3813 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3814 "vmovd\t{$src, $dst|$dst, $src}",
3816 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3817 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3818 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3819 "vmovq\t{$src, $dst|$dst, $src}",
3821 (v2i64 (scalar_to_vector GR64:$src)))]>,
3822 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3823 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3824 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3826 "vmovq\t{$src, $dst|$dst, $src}", []>,
3827 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3828 let isCodeGenOnly = 1 in {
3829 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3830 "vmovq\t{$src, $dst|$dst, $src}",
3831 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3832 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3833 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3834 "vmovq\t{$src, $dst|$dst, $src}",
3835 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3836 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3838 } // ExeDomain = SSEPackedInt
3840 // Move Int Doubleword to Single Scalar
3842 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3843 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3844 "vmovd\t{$src, $dst|$dst, $src}",
3845 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3846 EVEX, Sched<[WriteVecMoveFromGpr]>;
3847 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3849 // Move doubleword from xmm register to r/m32
3851 let ExeDomain = SSEPackedInt in {
3852 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3853 "vmovd\t{$src, $dst|$dst, $src}",
3854 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3856 EVEX, Sched<[WriteVecMoveToGpr]>;
3857 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3858 (ins i32mem:$dst, VR128X:$src),
3859 "vmovd\t{$src, $dst|$dst, $src}",
3860 [(store (i32 (extractelt (v4i32 VR128X:$src),
3861 (iPTR 0))), addr:$dst)]>,
3862 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3863 } // ExeDomain = SSEPackedInt
3865 // Move quadword from xmm1 register to r/m64
3867 let ExeDomain = SSEPackedInt in {
3868 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3869 "vmovq\t{$src, $dst|$dst, $src}",
3870 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3872 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3873 Requires<[HasAVX512]>;
3875 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3876 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3877 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3878 EVEX, VEX_W, Sched<[WriteVecStore]>,
3879 Requires<[HasAVX512, In64BitMode]>;
3881 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3882 (ins i64mem:$dst, VR128X:$src),
3883 "vmovq\t{$src, $dst|$dst, $src}",
3884 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3886 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3887 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3889 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3890 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3892 "vmovq\t{$src, $dst|$dst, $src}", []>,
3893 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3894 } // ExeDomain = SSEPackedInt
3896 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3897 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3899 let Predicates = [HasAVX512] in {
3900 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3901 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3904 // Move Scalar Single to Double Int
3906 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3907 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3909 "vmovd\t{$src, $dst|$dst, $src}",
3910 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3911 EVEX, Sched<[WriteVecMoveToGpr]>;
3912 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3914 // Move Quadword Int to Packed Quadword Int
3916 let ExeDomain = SSEPackedInt in {
3917 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3919 "vmovq\t{$src, $dst|$dst, $src}",
3921 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3922 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3923 } // ExeDomain = SSEPackedInt
3925 // Allow "vmovd" but print "vmovq".
3926 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3927 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3928 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3929 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3931 //===----------------------------------------------------------------------===//
3932 // AVX-512 MOVSS, MOVSD
3933 //===----------------------------------------------------------------------===//
3935 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3936 X86VectorVTInfo _> {
3937 let Predicates = [HasAVX512, OptForSize] in
3938 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3939 (ins _.RC:$src1, _.RC:$src2),
3940 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3941 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3942 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3943 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3944 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3945 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3946 "$dst {${mask}} {z}, $src1, $src2}"),
3947 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3948 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3950 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3951 let Constraints = "$src0 = $dst" in
3952 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3953 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3954 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3955 "$dst {${mask}}, $src1, $src2}"),
3956 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3957 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3958 (_.VT _.RC:$src0))))],
3959 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3960 let canFoldAsLoad = 1, isReMaterializable = 1 in {
3961 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3962 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3963 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3964 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3965 // _alt version uses FR32/FR64 register class.
3966 let isCodeGenOnly = 1 in
3967 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3968 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3969 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3970 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3972 let mayLoad = 1, hasSideEffects = 0 in {
3973 let Constraints = "$src0 = $dst" in
3974 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3975 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3976 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3977 "$dst {${mask}}, $src}"),
3978 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3979 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3980 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3981 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3982 "$dst {${mask}} {z}, $src}"),
3983 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3985 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3986 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3987 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
3988 EVEX, Sched<[WriteFStore]>;
3989 let mayStore = 1, hasSideEffects = 0 in
3990 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3991 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3992 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3993 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3997 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3998 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4000 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4001 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4004 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4005 PatLeaf ZeroFP, X86VectorVTInfo _> {
4007 def : Pat<(_.VT (OpNode _.RC:$src0,
4008 (_.VT (scalar_to_vector
4009 (_.EltVT (X86selects VK1WM:$mask,
4010 (_.EltVT _.FRC:$src1),
4011 (_.EltVT _.FRC:$src2))))))),
4012 (!cast<Instruction>(InstrStr#rrk)
4013 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4016 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4018 def : Pat<(_.VT (OpNode _.RC:$src0,
4019 (_.VT (scalar_to_vector
4020 (_.EltVT (X86selects VK1WM:$mask,
4021 (_.EltVT _.FRC:$src1),
4022 (_.EltVT ZeroFP))))))),
4023 (!cast<Instruction>(InstrStr#rrkz)
4026 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4029 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4030 dag Mask, RegisterClass MaskRC> {
4032 def : Pat<(masked_store
4033 (_.info512.VT (insert_subvector undef,
4034 (_.info128.VT _.info128.RC:$src),
4035 (iPTR 0))), addr:$dst, Mask),
4036 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4037 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4038 _.info128.RC:$src)>;
4042 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4043 AVX512VLVectorVTInfo _,
4044 dag Mask, RegisterClass MaskRC,
4045 SubRegIndex subreg> {
4047 def : Pat<(masked_store
4048 (_.info512.VT (insert_subvector undef,
4049 (_.info128.VT _.info128.RC:$src),
4050 (iPTR 0))), addr:$dst, Mask),
4051 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4052 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4053 _.info128.RC:$src)>;
4057 // This matches the more recent codegen from clang that avoids emitting a 512
4058 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4059 // bits on AVX512F only targets.
4060 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4061 AVX512VLVectorVTInfo _,
4062 dag Mask512, dag Mask128,
4063 RegisterClass MaskRC,
4064 SubRegIndex subreg> {
4067 def : Pat<(masked_store
4068 (_.info512.VT (insert_subvector undef,
4069 (_.info128.VT _.info128.RC:$src),
4070 (iPTR 0))), addr:$dst, Mask512),
4071 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4072 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4073 _.info128.RC:$src)>;
4075 // AVX512VL pattern.
4076 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4077 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4078 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4079 _.info128.RC:$src)>;
4082 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4083 dag Mask, RegisterClass MaskRC> {
4085 def : Pat<(_.info128.VT (extract_subvector
4086 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4087 _.info512.ImmAllZerosV)),
4089 (!cast<Instruction>(InstrStr#rmkz)
4090 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4093 def : Pat<(_.info128.VT (extract_subvector
4094 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4095 (_.info512.VT (insert_subvector undef,
4096 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4099 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4100 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4105 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4106 AVX512VLVectorVTInfo _,
4107 dag Mask, RegisterClass MaskRC,
4108 SubRegIndex subreg> {
4110 def : Pat<(_.info128.VT (extract_subvector
4111 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4112 _.info512.ImmAllZerosV)),
4114 (!cast<Instruction>(InstrStr#rmkz)
4115 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4118 def : Pat<(_.info128.VT (extract_subvector
4119 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4120 (_.info512.VT (insert_subvector undef,
4121 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4124 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4125 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4130 // This matches the more recent codegen from clang that avoids emitting a 512
4131 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4132 // bits on AVX512F only targets.
4133 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4134 AVX512VLVectorVTInfo _,
4135 dag Mask512, dag Mask128,
4136 RegisterClass MaskRC,
4137 SubRegIndex subreg> {
4138 // AVX512F patterns.
4139 def : Pat<(_.info128.VT (extract_subvector
4140 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4141 _.info512.ImmAllZerosV)),
4143 (!cast<Instruction>(InstrStr#rmkz)
4144 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4147 def : Pat<(_.info128.VT (extract_subvector
4148 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4149 (_.info512.VT (insert_subvector undef,
4150 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4153 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4154 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4157 // AVX512Vl patterns.
4158 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4159 _.info128.ImmAllZerosV)),
4160 (!cast<Instruction>(InstrStr#rmkz)
4161 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4164 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4165 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4166 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4167 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4171 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4172 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4174 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4175 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4176 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4177 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4178 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4179 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4181 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4182 (v16i1 (insert_subvector
4183 (v16i1 immAllZerosV),
4184 (v4i1 (extract_subvector
4185 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4188 (v4i1 (extract_subvector
4189 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4190 (iPTR 0))), GR8, sub_8bit>;
4191 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4196 (v16i1 immAllZerosV),
4197 (v2i1 (extract_subvector
4198 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4202 (v2i1 (extract_subvector
4203 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4204 (iPTR 0))), GR8, sub_8bit>;
4206 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4207 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4208 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4209 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4210 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4211 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4213 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4214 (v16i1 (insert_subvector
4215 (v16i1 immAllZerosV),
4216 (v4i1 (extract_subvector
4217 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4220 (v4i1 (extract_subvector
4221 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4222 (iPTR 0))), GR8, sub_8bit>;
4223 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4228 (v16i1 immAllZerosV),
4229 (v2i1 (extract_subvector
4230 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4234 (v2i1 (extract_subvector
4235 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4236 (iPTR 0))), GR8, sub_8bit>;
4238 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4239 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4240 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4241 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4242 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4244 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4245 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4246 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4248 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4250 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4251 VK1WM:$mask, addr:$src)),
4253 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4254 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4256 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4257 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4258 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4259 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4260 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4262 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4263 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4264 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4266 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4268 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4269 VK1WM:$mask, addr:$src)),
4271 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4272 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4274 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4275 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4276 (ins VR128X:$src1, VR128X:$src2),
4277 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4278 []>, XS, EVEX_4V, VEX_LIG,
4279 FoldGenData<"VMOVSSZrr">,
4280 Sched<[SchedWriteFShuffle.XMM]>;
4282 let Constraints = "$src0 = $dst" in
4283 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4284 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4285 VR128X:$src1, VR128X:$src2),
4286 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4287 "$dst {${mask}}, $src1, $src2}",
4288 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4289 FoldGenData<"VMOVSSZrrk">,
4290 Sched<[SchedWriteFShuffle.XMM]>;
4292 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4293 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4294 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4295 "$dst {${mask}} {z}, $src1, $src2}",
4296 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4297 FoldGenData<"VMOVSSZrrkz">,
4298 Sched<[SchedWriteFShuffle.XMM]>;
4300 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4301 (ins VR128X:$src1, VR128X:$src2),
4302 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4303 []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4304 FoldGenData<"VMOVSDZrr">,
4305 Sched<[SchedWriteFShuffle.XMM]>;
4307 let Constraints = "$src0 = $dst" in
4308 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4309 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4310 VR128X:$src1, VR128X:$src2),
4311 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4312 "$dst {${mask}}, $src1, $src2}",
4313 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4314 VEX_W, FoldGenData<"VMOVSDZrrk">,
4315 Sched<[SchedWriteFShuffle.XMM]>;
4317 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4318 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4320 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4321 "$dst {${mask}} {z}, $src1, $src2}",
4322 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4323 VEX_W, FoldGenData<"VMOVSDZrrkz">,
4324 Sched<[SchedWriteFShuffle.XMM]>;
4327 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4328 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4329 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4330 "$dst {${mask}}, $src1, $src2}",
4331 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4332 VR128X:$src1, VR128X:$src2), 0>;
4333 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4334 "$dst {${mask}} {z}, $src1, $src2}",
4335 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4336 VR128X:$src1, VR128X:$src2), 0>;
4337 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4338 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4339 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4340 "$dst {${mask}}, $src1, $src2}",
4341 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4342 VR128X:$src1, VR128X:$src2), 0>;
4343 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4344 "$dst {${mask}} {z}, $src1, $src2}",
4345 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4346 VR128X:$src1, VR128X:$src2), 0>;
4348 let Predicates = [HasAVX512, OptForSize] in {
4349 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4350 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4351 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4352 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4354 // Move low f32 and clear high bits.
4355 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4356 (SUBREG_TO_REG (i32 0),
4357 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4358 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4359 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4360 (SUBREG_TO_REG (i32 0),
4361 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4362 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4364 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4365 (SUBREG_TO_REG (i32 0),
4366 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4367 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4368 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4369 (SUBREG_TO_REG (i32 0),
4370 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4371 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4374 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4375 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4376 let Predicates = [HasAVX512, OptForSpeed] in {
4377 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4378 (SUBREG_TO_REG (i32 0),
4379 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4380 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4381 (i8 1))), sub_xmm)>;
4382 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4383 (SUBREG_TO_REG (i32 0),
4384 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4385 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4386 (i8 3))), sub_xmm)>;
4389 let Predicates = [HasAVX512] in {
4390 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4391 (VMOVSSZrm addr:$src)>;
4392 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4393 (VMOVSDZrm addr:$src)>;
4395 // Represent the same patterns above but in the form they appear for
4397 def : Pat<(v8f32 (X86vzload32 addr:$src)),
4398 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4399 def : Pat<(v4f64 (X86vzload64 addr:$src)),
4400 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4402 // Represent the same patterns above but in the form they appear for
4404 def : Pat<(v16f32 (X86vzload32 addr:$src)),
4405 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4406 def : Pat<(v8f64 (X86vzload64 addr:$src)),
4407 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4410 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4411 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4413 "vmovq\t{$src, $dst|$dst, $src}",
4414 [(set VR128X:$dst, (v2i64 (X86vzmovl
4415 (v2i64 VR128X:$src))))]>,
4419 let Predicates = [HasAVX512] in {
4420 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4421 (VMOVDI2PDIZrr GR32:$src)>;
4423 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4424 (VMOV64toPQIZrr GR64:$src)>;
4426 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4427 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4428 (VMOVDI2PDIZrm addr:$src)>;
4429 def : Pat<(v4i32 (X86vzload32 addr:$src)),
4430 (VMOVDI2PDIZrm addr:$src)>;
4431 def : Pat<(v8i32 (X86vzload32 addr:$src)),
4432 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4433 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4434 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4435 def : Pat<(v2i64 (X86vzload64 addr:$src)),
4436 (VMOVQI2PQIZrm addr:$src)>;
4437 def : Pat<(v4i64 (X86vzload64 addr:$src)),
4438 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4440 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4441 def : Pat<(v16i32 (X86vzload32 addr:$src)),
4442 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4443 def : Pat<(v8i64 (X86vzload64 addr:$src)),
4444 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4446 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4447 (SUBREG_TO_REG (i32 0),
4448 (v2f64 (VMOVZPQILo2PQIZrr
4449 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4451 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4452 (SUBREG_TO_REG (i32 0),
4453 (v2i64 (VMOVZPQILo2PQIZrr
4454 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4457 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4458 (SUBREG_TO_REG (i32 0),
4459 (v2f64 (VMOVZPQILo2PQIZrr
4460 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4462 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4463 (SUBREG_TO_REG (i32 0),
4464 (v2i64 (VMOVZPQILo2PQIZrr
4465 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4469 //===----------------------------------------------------------------------===//
4470 // AVX-512 - Non-temporals
4471 //===----------------------------------------------------------------------===//
4473 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4474 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4475 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4476 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4478 let Predicates = [HasVLX] in {
4479 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4481 "vmovntdqa\t{$src, $dst|$dst, $src}",
4482 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4483 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4485 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4487 "vmovntdqa\t{$src, $dst|$dst, $src}",
4488 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4489 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4492 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4493 X86SchedWriteMoveLS Sched,
4494 PatFrag st_frag = alignednontemporalstore> {
4495 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4496 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4497 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4498 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4499 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4502 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4503 AVX512VLVectorVTInfo VTInfo,
4504 X86SchedWriteMoveLSWidths Sched> {
4505 let Predicates = [HasAVX512] in
4506 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4508 let Predicates = [HasAVX512, HasVLX] in {
4509 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4510 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4514 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4515 SchedWriteVecMoveLSNT>, PD;
4516 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4517 SchedWriteFMoveLSNT>, PD, VEX_W;
4518 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4519 SchedWriteFMoveLSNT>, PS;
4521 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4522 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4523 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4524 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4525 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4526 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4527 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4529 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4530 (VMOVNTDQAZrm addr:$src)>;
4531 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4532 (VMOVNTDQAZrm addr:$src)>;
4533 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4534 (VMOVNTDQAZrm addr:$src)>;
4535 def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4536 (VMOVNTDQAZrm addr:$src)>;
4537 def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4538 (VMOVNTDQAZrm addr:$src)>;
4539 def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4540 (VMOVNTDQAZrm addr:$src)>;
4543 let Predicates = [HasVLX], AddedComplexity = 400 in {
4544 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4545 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4546 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4547 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4548 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4549 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4551 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4552 (VMOVNTDQAZ256rm addr:$src)>;
4553 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4554 (VMOVNTDQAZ256rm addr:$src)>;
4555 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4556 (VMOVNTDQAZ256rm addr:$src)>;
4557 def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4558 (VMOVNTDQAZ256rm addr:$src)>;
4559 def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4560 (VMOVNTDQAZ256rm addr:$src)>;
4561 def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4562 (VMOVNTDQAZ256rm addr:$src)>;
4564 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4565 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4566 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4567 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4568 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4569 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4571 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4572 (VMOVNTDQAZ128rm addr:$src)>;
4573 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4574 (VMOVNTDQAZ128rm addr:$src)>;
4575 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4576 (VMOVNTDQAZ128rm addr:$src)>;
4577 def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4578 (VMOVNTDQAZ128rm addr:$src)>;
4579 def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4580 (VMOVNTDQAZ128rm addr:$src)>;
4581 def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4582 (VMOVNTDQAZ128rm addr:$src)>;
4585 //===----------------------------------------------------------------------===//
4586 // AVX-512 - Integer arithmetic
4588 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4589 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4590 bit IsCommutable = 0> {
4591 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4592 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4593 "$src2, $src1", "$src1, $src2",
4594 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4595 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4598 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4599 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4600 "$src2, $src1", "$src1, $src2",
4601 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4602 AVX512BIBase, EVEX_4V,
4603 Sched<[sched.Folded, sched.ReadAfterFold]>;
4606 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4607 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4608 bit IsCommutable = 0> :
4609 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4610 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4611 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4612 "${src2}"##_.BroadcastStr##", $src1",
4613 "$src1, ${src2}"##_.BroadcastStr,
4614 (_.VT (OpNode _.RC:$src1,
4615 (_.BroadcastLdFrag addr:$src2)))>,
4616 AVX512BIBase, EVEX_4V, EVEX_B,
4617 Sched<[sched.Folded, sched.ReadAfterFold]>;
4620 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4621 AVX512VLVectorVTInfo VTInfo,
4622 X86SchedWriteWidths sched, Predicate prd,
4623 bit IsCommutable = 0> {
4624 let Predicates = [prd] in
4625 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4626 IsCommutable>, EVEX_V512;
4628 let Predicates = [prd, HasVLX] in {
4629 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4630 sched.YMM, IsCommutable>, EVEX_V256;
4631 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4632 sched.XMM, IsCommutable>, EVEX_V128;
4636 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4637 AVX512VLVectorVTInfo VTInfo,
4638 X86SchedWriteWidths sched, Predicate prd,
4639 bit IsCommutable = 0> {
4640 let Predicates = [prd] in
4641 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4642 IsCommutable>, EVEX_V512;
4644 let Predicates = [prd, HasVLX] in {
4645 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4646 sched.YMM, IsCommutable>, EVEX_V256;
4647 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4648 sched.XMM, IsCommutable>, EVEX_V128;
4652 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4653 X86SchedWriteWidths sched, Predicate prd,
4654 bit IsCommutable = 0> {
4655 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4656 sched, prd, IsCommutable>,
4657 VEX_W, EVEX_CD8<64, CD8VF>;
4660 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4661 X86SchedWriteWidths sched, Predicate prd,
4662 bit IsCommutable = 0> {
4663 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4664 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4667 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4668 X86SchedWriteWidths sched, Predicate prd,
4669 bit IsCommutable = 0> {
4670 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4671 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4675 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4676 X86SchedWriteWidths sched, Predicate prd,
4677 bit IsCommutable = 0> {
4678 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4679 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4683 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4684 SDNode OpNode, X86SchedWriteWidths sched,
4685 Predicate prd, bit IsCommutable = 0> {
4686 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4689 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4693 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4694 SDNode OpNode, X86SchedWriteWidths sched,
4695 Predicate prd, bit IsCommutable = 0> {
4696 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4699 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4703 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4704 bits<8> opc_d, bits<8> opc_q,
4705 string OpcodeStr, SDNode OpNode,
4706 X86SchedWriteWidths sched,
4707 bit IsCommutable = 0> {
4708 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4709 sched, HasAVX512, IsCommutable>,
4710 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4711 sched, HasBWI, IsCommutable>;
4714 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4715 X86FoldableSchedWrite sched,
4716 SDNode OpNode,X86VectorVTInfo _Src,
4717 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4718 bit IsCommutable = 0> {
4719 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4720 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4721 "$src2, $src1","$src1, $src2",
4723 (_Src.VT _Src.RC:$src1),
4724 (_Src.VT _Src.RC:$src2))),
4726 AVX512BIBase, EVEX_4V, Sched<[sched]>;
4727 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4728 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4729 "$src2, $src1", "$src1, $src2",
4730 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4731 (_Src.LdFrag addr:$src2)))>,
4732 AVX512BIBase, EVEX_4V,
4733 Sched<[sched.Folded, sched.ReadAfterFold]>;
4735 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4736 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4738 "${src2}"##_Brdct.BroadcastStr##", $src1",
4739 "$src1, ${src2}"##_Brdct.BroadcastStr,
4740 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4741 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4742 AVX512BIBase, EVEX_4V, EVEX_B,
4743 Sched<[sched.Folded, sched.ReadAfterFold]>;
4746 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4747 SchedWriteVecALU, 1>;
4748 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4749 SchedWriteVecALU, 0>;
4750 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4751 SchedWriteVecALU, HasBWI, 1>;
4752 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4753 SchedWriteVecALU, HasBWI, 0>;
4754 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4755 SchedWriteVecALU, HasBWI, 1>;
4756 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4757 SchedWriteVecALU, HasBWI, 0>;
4758 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4759 SchedWritePMULLD, HasAVX512, 1>, T8PD;
4760 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4761 SchedWriteVecIMul, HasBWI, 1>;
4762 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4763 SchedWriteVecIMul, HasDQI, 1>, T8PD,
4764 NotEVEX2VEXConvertible;
4765 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4767 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4769 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4770 SchedWriteVecIMul, HasBWI, 1>, T8PD;
4771 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4772 SchedWriteVecALU, HasBWI, 1>;
4773 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4774 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4775 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4776 SchedWriteVecIMul, HasAVX512, 1>;
4778 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4779 X86SchedWriteWidths sched,
4780 AVX512VLVectorVTInfo _SrcVTInfo,
4781 AVX512VLVectorVTInfo _DstVTInfo,
4782 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4783 let Predicates = [prd] in
4784 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4785 _SrcVTInfo.info512, _DstVTInfo.info512,
4786 v8i64_info, IsCommutable>,
4787 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4788 let Predicates = [HasVLX, prd] in {
4789 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4790 _SrcVTInfo.info256, _DstVTInfo.info256,
4791 v4i64x_info, IsCommutable>,
4792 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4793 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4794 _SrcVTInfo.info128, _DstVTInfo.info128,
4795 v2i64x_info, IsCommutable>,
4796 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4800 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4801 avx512vl_i8_info, avx512vl_i8_info,
4802 X86multishift, HasVBMI, 0>, T8PD;
4804 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4805 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4806 X86FoldableSchedWrite sched> {
4807 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4808 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4810 "${src2}"##_Src.BroadcastStr##", $src1",
4811 "$src1, ${src2}"##_Src.BroadcastStr,
4812 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4813 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4814 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4815 Sched<[sched.Folded, sched.ReadAfterFold]>;
4818 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4819 SDNode OpNode,X86VectorVTInfo _Src,
4820 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4821 bit IsCommutable = 0> {
4822 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4823 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4824 "$src2, $src1","$src1, $src2",
4826 (_Src.VT _Src.RC:$src1),
4827 (_Src.VT _Src.RC:$src2))),
4828 IsCommutable, IsCommutable>,
4829 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4830 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4831 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4832 "$src2, $src1", "$src1, $src2",
4833 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4834 (_Src.LdFrag addr:$src2)))>,
4835 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4836 Sched<[sched.Folded, sched.ReadAfterFold]>;
4839 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4841 let Predicates = [HasBWI] in
4842 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4843 v32i16_info, SchedWriteShuffle.ZMM>,
4844 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4845 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4846 let Predicates = [HasBWI, HasVLX] in {
4847 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4848 v16i16x_info, SchedWriteShuffle.YMM>,
4849 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4850 v16i16x_info, SchedWriteShuffle.YMM>,
4852 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4853 v8i16x_info, SchedWriteShuffle.XMM>,
4854 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4855 v8i16x_info, SchedWriteShuffle.XMM>,
4859 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4861 let Predicates = [HasBWI] in
4862 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4863 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4864 let Predicates = [HasBWI, HasVLX] in {
4865 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4866 v32i8x_info, SchedWriteShuffle.YMM>,
4868 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4869 v16i8x_info, SchedWriteShuffle.XMM>,
4874 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4875 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4876 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4877 let Predicates = [HasBWI] in
4878 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4879 _Dst.info512, SchedWriteVecIMul.ZMM,
4880 IsCommutable>, EVEX_V512;
4881 let Predicates = [HasBWI, HasVLX] in {
4882 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4883 _Dst.info256, SchedWriteVecIMul.YMM,
4884 IsCommutable>, EVEX_V256;
4885 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4886 _Dst.info128, SchedWriteVecIMul.XMM,
4887 IsCommutable>, EVEX_V128;
4891 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4892 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4893 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4894 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4896 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4897 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4898 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4899 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4901 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4902 SchedWriteVecALU, HasBWI, 1>, T8PD;
4903 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4904 SchedWriteVecALU, HasBWI, 1>;
4905 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4906 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4907 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4908 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4909 NotEVEX2VEXConvertible;
4911 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4912 SchedWriteVecALU, HasBWI, 1>;
4913 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4914 SchedWriteVecALU, HasBWI, 1>, T8PD;
4915 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4916 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4917 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4918 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4919 NotEVEX2VEXConvertible;
4921 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4922 SchedWriteVecALU, HasBWI, 1>, T8PD;
4923 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4924 SchedWriteVecALU, HasBWI, 1>;
4925 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4926 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4927 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4928 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4929 NotEVEX2VEXConvertible;
4931 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4932 SchedWriteVecALU, HasBWI, 1>;
4933 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4934 SchedWriteVecALU, HasBWI, 1>, T8PD;
4935 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4936 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4937 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4938 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4939 NotEVEX2VEXConvertible;
4941 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4942 let Predicates = [HasDQI, NoVLX] in {
4943 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4946 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4947 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4949 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4952 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4956 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4959 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4960 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4962 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4965 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4970 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
4971 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4973 (!cast<Instruction>(Instr#"rr")
4974 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4975 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4977 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4979 (!cast<Instruction>(Instr#"rmb")
4980 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4984 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4986 (!cast<Instruction>(Instr#"rr")
4987 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4988 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4990 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4992 (!cast<Instruction>(Instr#"rmb")
4993 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4998 let Predicates = [HasAVX512, NoVLX] in {
4999 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5000 defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5001 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5002 defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5005 //===----------------------------------------------------------------------===//
5006 // AVX-512 Logical Instructions
5007 //===----------------------------------------------------------------------===//
5009 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5010 SchedWriteVecLogic, HasAVX512, 1>;
5011 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5012 SchedWriteVecLogic, HasAVX512, 1>;
5013 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5014 SchedWriteVecLogic, HasAVX512, 1>;
5015 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5016 SchedWriteVecLogic, HasAVX512>;
5018 let Predicates = [HasVLX] in {
5019 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5020 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5021 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5022 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5024 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5025 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5026 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5027 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5029 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5030 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5031 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5032 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5034 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5035 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5036 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5037 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5039 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5040 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5041 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5042 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5044 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5045 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5046 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5047 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5049 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5050 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5051 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5052 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5054 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5055 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5056 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5057 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5059 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5060 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5061 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5062 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5064 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5065 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5066 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5067 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5069 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5070 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5071 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5072 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5074 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5075 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5076 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5077 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5079 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5080 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5081 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5082 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5084 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5085 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5086 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5087 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5089 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5090 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5091 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5092 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5094 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5095 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5096 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5097 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5100 let Predicates = [HasAVX512] in {
5101 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5102 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5103 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5104 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5106 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5107 (VPORQZrr VR512:$src1, VR512:$src2)>;
5108 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5109 (VPORQZrr VR512:$src1, VR512:$src2)>;
5111 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5112 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5113 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5114 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5116 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5117 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5118 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5119 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5121 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5122 (VPANDQZrm VR512:$src1, addr:$src2)>;
5123 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5124 (VPANDQZrm VR512:$src1, addr:$src2)>;
5126 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5127 (VPORQZrm VR512:$src1, addr:$src2)>;
5128 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5129 (VPORQZrm VR512:$src1, addr:$src2)>;
5131 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5132 (VPXORQZrm VR512:$src1, addr:$src2)>;
5133 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5134 (VPXORQZrm VR512:$src1, addr:$src2)>;
5136 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5137 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5138 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5139 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5142 // Patterns to catch vselect with different type than logic op.
5143 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5145 X86VectorVTInfo IntInfo> {
5146 // Masked register-register logical operations.
5147 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5148 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5150 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5151 _.RC:$src1, _.RC:$src2)>;
5153 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5154 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5156 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5159 // Masked register-memory logical operations.
5160 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5161 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5162 (load addr:$src2)))),
5164 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5165 _.RC:$src1, addr:$src2)>;
5166 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5167 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5168 (load addr:$src2)))),
5170 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5174 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5176 X86VectorVTInfo IntInfo> {
5177 // Register-broadcast logical operations.
5178 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5180 (IntInfo.VT (OpNode _.RC:$src1,
5181 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5183 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5184 _.RC:$src1, addr:$src2)>;
5185 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5187 (IntInfo.VT (OpNode _.RC:$src1,
5188 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5190 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5191 _.RC:$src1, addr:$src2)>;
5194 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5195 AVX512VLVectorVTInfo SelectInfo,
5196 AVX512VLVectorVTInfo IntInfo> {
5197 let Predicates = [HasVLX] in {
5198 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5200 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5203 let Predicates = [HasAVX512] in {
5204 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5209 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5210 AVX512VLVectorVTInfo SelectInfo,
5211 AVX512VLVectorVTInfo IntInfo> {
5212 let Predicates = [HasVLX] in {
5213 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5214 SelectInfo.info128, IntInfo.info128>;
5215 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5216 SelectInfo.info256, IntInfo.info256>;
5218 let Predicates = [HasAVX512] in {
5219 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5220 SelectInfo.info512, IntInfo.info512>;
5224 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5225 // i64 vselect with i32/i16/i8 logic op
5226 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5228 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5230 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5233 // i32 vselect with i64/i16/i8 logic op
5234 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5236 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5238 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5241 // f32 vselect with i64/i32/i16/i8 logic op
5242 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5244 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5246 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5248 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5251 // f64 vselect with i64/i32/i16/i8 logic op
5252 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5254 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5256 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5258 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5261 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5264 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5269 defm : avx512_logical_lowering_types<"VPAND", and>;
5270 defm : avx512_logical_lowering_types<"VPOR", or>;
5271 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5272 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5274 //===----------------------------------------------------------------------===//
5275 // AVX-512 FP arithmetic
5276 //===----------------------------------------------------------------------===//
5278 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5279 SDNode OpNode, SDNode VecNode,
5280 X86FoldableSchedWrite sched, bit IsCommutable> {
5281 let ExeDomain = _.ExeDomain in {
5282 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5283 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5284 "$src2, $src1", "$src1, $src2",
5285 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5288 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5289 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5290 "$src2, $src1", "$src1, $src2",
5291 (_.VT (VecNode _.RC:$src1,
5292 _.ScalarIntMemCPat:$src2))>,
5293 Sched<[sched.Folded, sched.ReadAfterFold]>;
5294 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5295 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5296 (ins _.FRC:$src1, _.FRC:$src2),
5297 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5298 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5300 let isCommutable = IsCommutable;
5302 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5303 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5304 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5305 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5306 (_.ScalarLdFrag addr:$src2)))]>,
5307 Sched<[sched.Folded, sched.ReadAfterFold]>;
5312 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5313 SDNode VecNode, X86FoldableSchedWrite sched,
5314 bit IsCommutable = 0> {
5315 let ExeDomain = _.ExeDomain in
5316 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5317 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5318 "$rc, $src2, $src1", "$src1, $src2, $rc",
5319 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5321 EVEX_B, EVEX_RC, Sched<[sched]>;
5323 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5324 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5325 X86FoldableSchedWrite sched, bit IsCommutable,
5326 string EVEX2VexOvrd> {
5327 let ExeDomain = _.ExeDomain in {
5328 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5329 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5330 "$src2, $src1", "$src1, $src2",
5331 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5334 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5335 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5336 "$src2, $src1", "$src1, $src2",
5337 (_.VT (VecNode _.RC:$src1,
5338 _.ScalarIntMemCPat:$src2))>,
5339 Sched<[sched.Folded, sched.ReadAfterFold]>;
5341 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5342 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5343 (ins _.FRC:$src1, _.FRC:$src2),
5344 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5345 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5347 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5348 let isCommutable = IsCommutable;
5350 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5351 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5352 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5353 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5354 (_.ScalarLdFrag addr:$src2)))]>,
5355 Sched<[sched.Folded, sched.ReadAfterFold]>,
5356 EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5359 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5360 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5361 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5362 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5363 EVEX_B, Sched<[sched]>;
5367 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5368 SDNode VecNode, SDNode RndNode,
5369 X86SchedWriteSizes sched, bit IsCommutable> {
5370 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5371 sched.PS.Scl, IsCommutable>,
5372 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5373 sched.PS.Scl, IsCommutable>,
5374 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5375 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5376 sched.PD.Scl, IsCommutable>,
5377 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5378 sched.PD.Scl, IsCommutable>,
5379 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5382 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5383 SDNode VecNode, SDNode SaeNode,
5384 X86SchedWriteSizes sched, bit IsCommutable> {
5385 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5386 VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5388 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5389 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5390 VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5392 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5394 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
5395 SchedWriteFAddSizes, 1>;
5396 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
5397 SchedWriteFMulSizes, 1>;
5398 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
5399 SchedWriteFAddSizes, 0>;
5400 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
5401 SchedWriteFDivSizes, 0>;
5402 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5403 SchedWriteFCmpSizes, 0>;
5404 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5405 SchedWriteFCmpSizes, 0>;
5407 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5408 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5409 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5410 X86VectorVTInfo _, SDNode OpNode,
5411 X86FoldableSchedWrite sched,
5412 string EVEX2VEXOvrd> {
5413 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5414 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5415 (ins _.FRC:$src1, _.FRC:$src2),
5416 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5417 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5418 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5419 let isCommutable = 1;
5421 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5422 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5423 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5424 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5425 (_.ScalarLdFrag addr:$src2)))]>,
5426 Sched<[sched.Folded, sched.ReadAfterFold]>,
5427 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5430 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5431 SchedWriteFCmp.Scl, "VMINCSS">, XS,
5432 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5434 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5435 SchedWriteFCmp.Scl, "VMINCSD">, XD,
5436 VEX_W, EVEX_4V, VEX_LIG,
5437 EVEX_CD8<64, CD8VT1>;
5439 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5440 SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5441 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5443 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5444 SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5445 VEX_W, EVEX_4V, VEX_LIG,
5446 EVEX_CD8<64, CD8VT1>;
5448 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5449 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5451 bit IsKCommutable = IsCommutable> {
5452 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5453 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5454 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5455 "$src2, $src1", "$src1, $src2",
5456 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5457 IsKCommutable, IsKCommutable>,
5458 EVEX_4V, Sched<[sched]>;
5459 let mayLoad = 1 in {
5460 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5461 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5462 "$src2, $src1", "$src1, $src2",
5463 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5464 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5465 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5466 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5467 "${src2}"##_.BroadcastStr##", $src1",
5468 "$src1, ${src2}"##_.BroadcastStr,
5469 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5471 Sched<[sched.Folded, sched.ReadAfterFold]>;
5476 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5477 SDPatternOperator OpNodeRnd,
5478 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5479 let ExeDomain = _.ExeDomain in
5480 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5481 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5482 "$rc, $src2, $src1", "$src1, $src2, $rc",
5483 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5484 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5487 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5488 SDPatternOperator OpNodeSAE,
5489 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5490 let ExeDomain = _.ExeDomain in
5491 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5492 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5493 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5494 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5495 EVEX_4V, EVEX_B, Sched<[sched]>;
5498 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5499 Predicate prd, X86SchedWriteSizes sched,
5500 bit IsCommutable = 0,
5501 bit IsPD128Commutable = IsCommutable> {
5502 let Predicates = [prd] in {
5503 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5504 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5505 EVEX_CD8<32, CD8VF>;
5506 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5507 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5508 EVEX_CD8<64, CD8VF>;
5511 // Define only if AVX512VL feature is present.
5512 let Predicates = [prd, HasVLX] in {
5513 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5514 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5515 EVEX_CD8<32, CD8VF>;
5516 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5517 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5518 EVEX_CD8<32, CD8VF>;
5519 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5520 sched.PD.XMM, IsPD128Commutable,
5521 IsCommutable>, EVEX_V128, PD, VEX_W,
5522 EVEX_CD8<64, CD8VF>;
5523 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5524 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5525 EVEX_CD8<64, CD8VF>;
5529 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5530 X86SchedWriteSizes sched> {
5531 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5533 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5534 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5536 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5539 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5540 X86SchedWriteSizes sched> {
5541 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5543 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5544 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5546 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5549 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5550 SchedWriteFAddSizes, 1>,
5551 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5552 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5553 SchedWriteFMulSizes, 1>,
5554 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5555 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5556 SchedWriteFAddSizes>,
5557 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5558 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5559 SchedWriteFDivSizes>,
5560 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5561 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5562 SchedWriteFCmpSizes, 0>,
5563 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5564 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5565 SchedWriteFCmpSizes, 0>,
5566 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5567 let isCodeGenOnly = 1 in {
5568 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5569 SchedWriteFCmpSizes, 1>;
5570 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5571 SchedWriteFCmpSizes, 1>;
5573 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5574 SchedWriteFLogicSizes, 1>;
5575 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5576 SchedWriteFLogicSizes, 0>;
5577 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5578 SchedWriteFLogicSizes, 1>;
5579 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5580 SchedWriteFLogicSizes, 1>;
5582 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5583 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5584 let ExeDomain = _.ExeDomain in {
5585 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5586 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5587 "$src2, $src1", "$src1, $src2",
5588 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5589 EVEX_4V, Sched<[sched]>;
5590 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5591 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5592 "$src2, $src1", "$src1, $src2",
5593 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5594 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5595 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5596 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5597 "${src2}"##_.BroadcastStr##", $src1",
5598 "$src1, ${src2}"##_.BroadcastStr,
5599 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5600 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5604 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5605 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5606 let ExeDomain = _.ExeDomain in {
5607 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5608 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5609 "$src2, $src1", "$src1, $src2",
5610 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5612 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5613 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5614 "$src2, $src1", "$src1, $src2",
5615 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
5616 Sched<[sched.Folded, sched.ReadAfterFold]>;
5620 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5621 X86SchedWriteWidths sched> {
5622 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5623 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5624 EVEX_V512, EVEX_CD8<32, CD8VF>;
5625 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5626 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5627 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5628 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5629 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
5630 X86scalefsRnd, sched.Scl>,
5631 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5632 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5633 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
5634 X86scalefsRnd, sched.Scl>,
5635 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5637 // Define only if AVX512VL feature is present.
5638 let Predicates = [HasVLX] in {
5639 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5640 EVEX_V128, EVEX_CD8<32, CD8VF>;
5641 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5642 EVEX_V256, EVEX_CD8<32, CD8VF>;
5643 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5644 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5645 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5646 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5649 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5650 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5652 //===----------------------------------------------------------------------===//
5653 // AVX-512 VPTESTM instructions
5654 //===----------------------------------------------------------------------===//
5656 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5657 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5659 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5660 // There are just too many permuations due to commutability and bitcasts.
5661 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5662 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5663 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5664 "$src2, $src1", "$src1, $src2",
5665 (null_frag), (null_frag), 1>,
5666 EVEX_4V, Sched<[sched]>;
5668 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5669 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5670 "$src2, $src1", "$src1, $src2",
5671 (null_frag), (null_frag)>,
5672 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5673 Sched<[sched.Folded, sched.ReadAfterFold]>;
5677 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5678 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5679 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5680 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5681 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5682 "${src2}"##_.BroadcastStr##", $src1",
5683 "$src1, ${src2}"##_.BroadcastStr,
5684 (null_frag), (null_frag)>,
5685 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5686 Sched<[sched.Folded, sched.ReadAfterFold]>;
5689 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5690 X86SchedWriteWidths sched,
5691 AVX512VLVectorVTInfo _> {
5692 let Predicates = [HasAVX512] in
5693 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5694 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5696 let Predicates = [HasAVX512, HasVLX] in {
5697 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5698 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5699 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5700 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5704 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5705 X86SchedWriteWidths sched> {
5706 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5708 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5709 avx512vl_i64_info>, VEX_W;
5712 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5713 X86SchedWriteWidths sched> {
5714 let Predicates = [HasBWI] in {
5715 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5716 v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5717 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5718 v64i8_info, NAME#"B">, EVEX_V512;
5720 let Predicates = [HasVLX, HasBWI] in {
5722 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5723 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5724 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5725 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5726 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5727 v32i8x_info, NAME#"B">, EVEX_V256;
5728 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5729 v16i8x_info, NAME#"B">, EVEX_V128;
5733 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5734 X86SchedWriteWidths sched> :
5735 avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5736 avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5738 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5739 SchedWriteVecLogic>, T8PD;
5740 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5741 SchedWriteVecLogic>, T8XS;
5743 //===----------------------------------------------------------------------===//
5744 // AVX-512 Shift instructions
5745 //===----------------------------------------------------------------------===//
5747 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5748 string OpcodeStr, SDNode OpNode,
5749 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5750 let ExeDomain = _.ExeDomain in {
5751 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5752 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5753 "$src2, $src1", "$src1, $src2",
5754 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5756 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5757 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5758 "$src2, $src1", "$src1, $src2",
5759 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5761 Sched<[sched.Folded]>;
5765 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5766 string OpcodeStr, SDNode OpNode,
5767 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5768 let ExeDomain = _.ExeDomain in
5769 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5770 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5771 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5772 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5773 EVEX_B, Sched<[sched.Folded]>;
5776 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5777 X86FoldableSchedWrite sched, ValueType SrcVT,
5778 X86VectorVTInfo _> {
5779 // src2 is always 128-bit
5780 let ExeDomain = _.ExeDomain in {
5781 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5782 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5783 "$src2, $src1", "$src1, $src2",
5784 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5785 AVX512BIBase, EVEX_4V, Sched<[sched]>;
5786 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5787 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5788 "$src2, $src1", "$src1, $src2",
5789 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5791 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5795 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5796 X86SchedWriteWidths sched, ValueType SrcVT,
5797 AVX512VLVectorVTInfo VTInfo,
5799 let Predicates = [prd] in
5800 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5801 VTInfo.info512>, EVEX_V512,
5802 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5803 let Predicates = [prd, HasVLX] in {
5804 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5805 VTInfo.info256>, EVEX_V256,
5806 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5807 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5808 VTInfo.info128>, EVEX_V128,
5809 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5813 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5814 string OpcodeStr, SDNode OpNode,
5815 X86SchedWriteWidths sched,
5816 bit NotEVEX2VEXConvertibleQ = 0> {
5817 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5818 avx512vl_i32_info, HasAVX512>;
5819 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5820 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5821 avx512vl_i64_info, HasAVX512>, VEX_W;
5822 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5823 avx512vl_i16_info, HasBWI>;
5826 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5827 string OpcodeStr, SDNode OpNode,
5828 X86SchedWriteWidths sched,
5829 AVX512VLVectorVTInfo VTInfo> {
5830 let Predicates = [HasAVX512] in
5831 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5832 sched.ZMM, VTInfo.info512>,
5833 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5834 VTInfo.info512>, EVEX_V512;
5835 let Predicates = [HasAVX512, HasVLX] in {
5836 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5837 sched.YMM, VTInfo.info256>,
5838 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5839 VTInfo.info256>, EVEX_V256;
5840 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5841 sched.XMM, VTInfo.info128>,
5842 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5843 VTInfo.info128>, EVEX_V128;
5847 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5848 string OpcodeStr, SDNode OpNode,
5849 X86SchedWriteWidths sched> {
5850 let Predicates = [HasBWI] in
5851 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5852 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5853 let Predicates = [HasVLX, HasBWI] in {
5854 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5855 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5856 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5857 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5861 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5862 Format ImmFormR, Format ImmFormM,
5863 string OpcodeStr, SDNode OpNode,
5864 X86SchedWriteWidths sched,
5865 bit NotEVEX2VEXConvertibleQ = 0> {
5866 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5867 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5868 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5869 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5870 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5873 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5874 SchedWriteVecShiftImm>,
5875 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5876 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5878 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5879 SchedWriteVecShiftImm>,
5880 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5881 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5883 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5884 SchedWriteVecShiftImm, 1>,
5885 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5886 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5888 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5889 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5890 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5891 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5893 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5894 SchedWriteVecShift>;
5895 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5896 SchedWriteVecShift, 1>;
5897 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5898 SchedWriteVecShift>;
5900 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5901 let Predicates = [HasAVX512, NoVLX] in {
5902 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5903 (EXTRACT_SUBREG (v8i64
5905 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5906 VR128X:$src2)), sub_ymm)>;
5908 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5909 (EXTRACT_SUBREG (v8i64
5911 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5912 VR128X:$src2)), sub_xmm)>;
5914 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
5915 (EXTRACT_SUBREG (v8i64
5917 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5918 timm:$src2)), sub_ymm)>;
5920 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
5921 (EXTRACT_SUBREG (v8i64
5923 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5924 timm:$src2)), sub_xmm)>;
5927 //===-------------------------------------------------------------------===//
5928 // Variable Bit Shifts
5929 //===-------------------------------------------------------------------===//
5931 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5932 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5933 let ExeDomain = _.ExeDomain in {
5934 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5935 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5936 "$src2, $src1", "$src1, $src2",
5937 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5938 AVX5128IBase, EVEX_4V, Sched<[sched]>;
5939 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5940 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5941 "$src2, $src1", "$src1, $src2",
5942 (_.VT (OpNode _.RC:$src1,
5943 (_.VT (_.LdFrag addr:$src2))))>,
5944 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5945 Sched<[sched.Folded, sched.ReadAfterFold]>;
5949 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5950 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5951 let ExeDomain = _.ExeDomain in
5952 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5953 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5954 "${src2}"##_.BroadcastStr##", $src1",
5955 "$src1, ${src2}"##_.BroadcastStr,
5956 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
5957 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5958 Sched<[sched.Folded, sched.ReadAfterFold]>;
5961 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5962 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5963 let Predicates = [HasAVX512] in
5964 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5965 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5967 let Predicates = [HasAVX512, HasVLX] in {
5968 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5969 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5970 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5971 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5975 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5976 SDNode OpNode, X86SchedWriteWidths sched> {
5977 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5979 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5980 avx512vl_i64_info>, VEX_W;
5983 // Use 512bit version to implement 128/256 bit in case NoVLX.
5984 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5985 SDNode OpNode, list<Predicate> p> {
5986 let Predicates = p in {
5987 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5988 (_.info256.VT _.info256.RC:$src2))),
5990 (!cast<Instruction>(OpcodeStr#"Zrr")
5991 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5992 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5995 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5996 (_.info128.VT _.info128.RC:$src2))),
5998 (!cast<Instruction>(OpcodeStr#"Zrr")
5999 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6000 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6004 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6005 SDNode OpNode, X86SchedWriteWidths sched> {
6006 let Predicates = [HasBWI] in
6007 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6009 let Predicates = [HasVLX, HasBWI] in {
6011 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6013 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6018 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6019 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6021 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6022 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6024 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6025 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6027 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6028 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6030 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6031 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6032 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6033 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6036 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6037 let Predicates = [HasAVX512, NoVLX] in {
6038 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6039 (EXTRACT_SUBREG (v8i64
6041 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6042 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6044 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6045 (EXTRACT_SUBREG (v8i64
6047 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6048 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6051 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6052 (EXTRACT_SUBREG (v16i32
6054 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6055 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6057 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6058 (EXTRACT_SUBREG (v16i32
6060 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6061 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6064 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6065 (EXTRACT_SUBREG (v8i64
6067 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6068 timm:$src2)), sub_xmm)>;
6069 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6070 (EXTRACT_SUBREG (v8i64
6072 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6073 timm:$src2)), sub_ymm)>;
6075 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6076 (EXTRACT_SUBREG (v16i32
6078 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6079 timm:$src2)), sub_xmm)>;
6080 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6081 (EXTRACT_SUBREG (v16i32
6083 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6084 timm:$src2)), sub_ymm)>;
6087 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6088 let Predicates = [HasAVX512, NoVLX] in {
6089 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6090 (EXTRACT_SUBREG (v8i64
6092 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6093 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6095 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6096 (EXTRACT_SUBREG (v8i64
6098 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6099 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6102 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6103 (EXTRACT_SUBREG (v16i32
6105 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6106 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6108 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6109 (EXTRACT_SUBREG (v16i32
6111 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6112 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6115 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6116 (EXTRACT_SUBREG (v8i64
6118 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6119 timm:$src2)), sub_xmm)>;
6120 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6121 (EXTRACT_SUBREG (v8i64
6123 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6124 timm:$src2)), sub_ymm)>;
6126 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6127 (EXTRACT_SUBREG (v16i32
6129 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6130 timm:$src2)), sub_xmm)>;
6131 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6132 (EXTRACT_SUBREG (v16i32
6134 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6135 timm:$src2)), sub_ymm)>;
6138 //===-------------------------------------------------------------------===//
6139 // 1-src variable permutation VPERMW/D/Q
6140 //===-------------------------------------------------------------------===//
6142 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6143 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6144 let Predicates = [HasAVX512] in
6145 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6146 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6148 let Predicates = [HasAVX512, HasVLX] in
6149 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6150 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6153 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6154 string OpcodeStr, SDNode OpNode,
6155 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6156 let Predicates = [HasAVX512] in
6157 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6158 sched, VTInfo.info512>,
6159 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6160 sched, VTInfo.info512>, EVEX_V512;
6161 let Predicates = [HasAVX512, HasVLX] in
6162 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6163 sched, VTInfo.info256>,
6164 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6165 sched, VTInfo.info256>, EVEX_V256;
6168 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6169 Predicate prd, SDNode OpNode,
6170 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6171 let Predicates = [prd] in
6172 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6174 let Predicates = [HasVLX, prd] in {
6175 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6177 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6182 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6183 WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6184 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6185 WriteVarShuffle256, avx512vl_i8_info>;
6187 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6188 WriteVarShuffle256, avx512vl_i32_info>;
6189 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6190 WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6191 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6192 WriteFVarShuffle256, avx512vl_f32_info>;
6193 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6194 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6196 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6197 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6198 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6199 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6200 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6201 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6203 //===----------------------------------------------------------------------===//
6204 // AVX-512 - VPERMIL
6205 //===----------------------------------------------------------------------===//
6207 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6208 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6209 X86VectorVTInfo Ctrl> {
6210 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6211 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6212 "$src2, $src1", "$src1, $src2",
6213 (_.VT (OpNode _.RC:$src1,
6214 (Ctrl.VT Ctrl.RC:$src2)))>,
6215 T8PD, EVEX_4V, Sched<[sched]>;
6216 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6217 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6218 "$src2, $src1", "$src1, $src2",
6221 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6222 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6223 Sched<[sched.Folded, sched.ReadAfterFold]>;
6224 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6225 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6226 "${src2}"##_.BroadcastStr##", $src1",
6227 "$src1, ${src2}"##_.BroadcastStr,
6230 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6231 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6232 Sched<[sched.Folded, sched.ReadAfterFold]>;
6235 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6236 X86SchedWriteWidths sched,
6237 AVX512VLVectorVTInfo _,
6238 AVX512VLVectorVTInfo Ctrl> {
6239 let Predicates = [HasAVX512] in {
6240 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6241 _.info512, Ctrl.info512>, EVEX_V512;
6243 let Predicates = [HasAVX512, HasVLX] in {
6244 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6245 _.info128, Ctrl.info128>, EVEX_V128;
6246 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6247 _.info256, Ctrl.info256>, EVEX_V256;
6251 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6252 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6253 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6255 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6256 X86VPermilpi, SchedWriteFShuffle, _>,
6257 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6260 let ExeDomain = SSEPackedSingle in
6261 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6263 let ExeDomain = SSEPackedDouble in
6264 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6265 avx512vl_i64_info>, VEX_W1X;
6267 //===----------------------------------------------------------------------===//
6268 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6269 //===----------------------------------------------------------------------===//
6271 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6272 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6273 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6274 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6275 X86PShufhw, SchedWriteShuffle>,
6276 EVEX, AVX512XSIi8Base;
6277 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6278 X86PShuflw, SchedWriteShuffle>,
6279 EVEX, AVX512XDIi8Base;
6281 //===----------------------------------------------------------------------===//
6282 // AVX-512 - VPSHUFB
6283 //===----------------------------------------------------------------------===//
6285 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6286 X86SchedWriteWidths sched> {
6287 let Predicates = [HasBWI] in
6288 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6291 let Predicates = [HasVLX, HasBWI] in {
6292 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6294 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6299 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6300 SchedWriteVarShuffle>, VEX_WIG;
6302 //===----------------------------------------------------------------------===//
6303 // Move Low to High and High to Low packed FP Instructions
6304 //===----------------------------------------------------------------------===//
6306 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6307 (ins VR128X:$src1, VR128X:$src2),
6308 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6309 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6310 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6311 let isCommutable = 1 in
6312 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6313 (ins VR128X:$src1, VR128X:$src2),
6314 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6315 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6316 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6318 //===----------------------------------------------------------------------===//
6319 // VMOVHPS/PD VMOVLPS Instructions
6320 // All patterns was taken from SSS implementation.
6321 //===----------------------------------------------------------------------===//
6323 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6324 SDPatternOperator OpNode,
6325 X86VectorVTInfo _> {
6326 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6327 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6328 (ins _.RC:$src1, f64mem:$src2),
6329 !strconcat(OpcodeStr,
6330 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6334 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6335 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6338 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6339 // SSE1. And MOVLPS pattern is even more complex.
6340 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6341 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6342 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6343 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6344 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6345 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6346 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6347 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6349 let Predicates = [HasAVX512] in {
6351 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6352 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6353 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6354 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6355 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6358 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6359 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6362 let SchedRW = [WriteFStore] in {
6363 let mayStore = 1, hasSideEffects = 0 in
6364 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6365 (ins f64mem:$dst, VR128X:$src),
6366 "vmovhps\t{$src, $dst|$dst, $src}",
6367 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6368 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6369 (ins f64mem:$dst, VR128X:$src),
6370 "vmovhpd\t{$src, $dst|$dst, $src}",
6371 [(store (f64 (extractelt
6372 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6373 (iPTR 0))), addr:$dst)]>,
6374 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6375 let mayStore = 1, hasSideEffects = 0 in
6376 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6377 (ins f64mem:$dst, VR128X:$src),
6378 "vmovlps\t{$src, $dst|$dst, $src}",
6379 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6380 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6381 (ins f64mem:$dst, VR128X:$src),
6382 "vmovlpd\t{$src, $dst|$dst, $src}",
6383 [(store (f64 (extractelt (v2f64 VR128X:$src),
6384 (iPTR 0))), addr:$dst)]>,
6385 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6388 let Predicates = [HasAVX512] in {
6390 def : Pat<(store (f64 (extractelt
6391 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6392 (iPTR 0))), addr:$dst),
6393 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6395 //===----------------------------------------------------------------------===//
6396 // FMA - Fused Multiply Operations
6399 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6400 X86FoldableSchedWrite sched,
6401 X86VectorVTInfo _, string Suff> {
6402 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6403 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6404 (ins _.RC:$src2, _.RC:$src3),
6405 OpcodeStr, "$src3, $src2", "$src2, $src3",
6406 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6407 AVX512FMA3Base, Sched<[sched]>;
6409 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6410 (ins _.RC:$src2, _.MemOp:$src3),
6411 OpcodeStr, "$src3, $src2", "$src2, $src3",
6412 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6413 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6415 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6416 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6417 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6418 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6420 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6421 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6425 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6426 X86FoldableSchedWrite sched,
6427 X86VectorVTInfo _, string Suff> {
6428 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6429 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6430 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6431 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6432 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6433 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6436 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6437 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6438 AVX512VLVectorVTInfo _, string Suff> {
6439 let Predicates = [HasAVX512] in {
6440 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6442 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6444 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6446 let Predicates = [HasVLX, HasAVX512] in {
6447 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6449 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6450 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6452 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6456 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6458 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6459 SchedWriteFMA, avx512vl_f32_info, "PS">;
6460 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6461 SchedWriteFMA, avx512vl_f64_info, "PD">,
6465 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6466 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6467 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6468 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6469 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6470 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6473 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6474 X86FoldableSchedWrite sched,
6475 X86VectorVTInfo _, string Suff> {
6476 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6477 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6478 (ins _.RC:$src2, _.RC:$src3),
6479 OpcodeStr, "$src3, $src2", "$src2, $src3",
6480 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6481 vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6483 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6484 (ins _.RC:$src2, _.MemOp:$src3),
6485 OpcodeStr, "$src3, $src2", "$src2, $src3",
6486 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6487 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6489 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6490 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6491 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6492 "$src2, ${src3}"##_.BroadcastStr,
6493 (_.VT (OpNode _.RC:$src2,
6494 (_.VT (_.BroadcastLdFrag addr:$src3)),
6495 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6496 Sched<[sched.Folded, sched.ReadAfterFold]>;
6500 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6501 X86FoldableSchedWrite sched,
6502 X86VectorVTInfo _, string Suff> {
6503 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6504 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6505 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6506 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6507 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6509 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6512 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6513 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6514 AVX512VLVectorVTInfo _, string Suff> {
6515 let Predicates = [HasAVX512] in {
6516 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6518 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6520 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6522 let Predicates = [HasVLX, HasAVX512] in {
6523 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6525 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6526 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6528 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6532 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6533 SDNode OpNodeRnd > {
6534 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6535 SchedWriteFMA, avx512vl_f32_info, "PS">;
6536 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6537 SchedWriteFMA, avx512vl_f64_info, "PD">,
6541 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6542 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6543 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6544 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6545 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6546 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6548 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6549 X86FoldableSchedWrite sched,
6550 X86VectorVTInfo _, string Suff> {
6551 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6552 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6553 (ins _.RC:$src2, _.RC:$src3),
6554 OpcodeStr, "$src3, $src2", "$src2, $src3",
6555 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6556 AVX512FMA3Base, Sched<[sched]>;
6558 // Pattern is 312 order so that the load is in a different place from the
6559 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6560 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6561 (ins _.RC:$src2, _.MemOp:$src3),
6562 OpcodeStr, "$src3, $src2", "$src2, $src3",
6563 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6564 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6566 // Pattern is 312 order so that the load is in a different place from the
6567 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6568 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6569 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6570 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6571 "$src2, ${src3}"##_.BroadcastStr,
6572 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6573 _.RC:$src1, _.RC:$src2)), 1, 0>,
6574 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6578 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6579 X86FoldableSchedWrite sched,
6580 X86VectorVTInfo _, string Suff> {
6581 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6582 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6583 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6584 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6585 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6587 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6590 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6591 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6592 AVX512VLVectorVTInfo _, string Suff> {
6593 let Predicates = [HasAVX512] in {
6594 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6596 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6598 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6600 let Predicates = [HasVLX, HasAVX512] in {
6601 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6603 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6604 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6606 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6610 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6611 SDNode OpNodeRnd > {
6612 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6613 SchedWriteFMA, avx512vl_f32_info, "PS">;
6614 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6615 SchedWriteFMA, avx512vl_f64_info, "PD">,
6619 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6620 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6621 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6622 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6623 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6624 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6627 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6628 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6629 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6630 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6631 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6632 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6633 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6636 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6637 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6638 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6639 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6641 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6642 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6643 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6644 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6646 let isCodeGenOnly = 1, isCommutable = 1 in {
6647 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6648 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6649 !strconcat(OpcodeStr,
6650 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6651 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6652 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6653 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6654 !strconcat(OpcodeStr,
6655 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6656 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6658 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6659 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6660 !strconcat(OpcodeStr,
6661 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6662 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6663 Sched<[SchedWriteFMA.Scl]>;
6664 }// isCodeGenOnly = 1
6665 }// Constraints = "$src1 = $dst"
6668 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6669 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6670 X86VectorVTInfo _, string SUFF> {
6671 let ExeDomain = _.ExeDomain in {
6672 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6673 // Operands for intrinsic are in 123 order to preserve passthu
6675 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6677 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6678 (_.ScalarLdFrag addr:$src3)))),
6679 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6680 _.FRC:$src3, (i32 timm:$rc)))), 0>;
6682 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6683 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6685 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6686 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6687 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6688 _.FRC:$src1, (i32 timm:$rc)))), 1>;
6690 // One pattern is 312 order so that the load is in a different place from the
6691 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6692 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6693 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6695 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6696 _.FRC:$src1, _.FRC:$src2))),
6697 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6698 _.FRC:$src2, (i32 timm:$rc)))), 1>;
6702 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6703 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6704 let Predicates = [HasAVX512] in {
6705 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6706 OpNodeRnd, f32x_info, "SS">,
6707 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6708 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6709 OpNodeRnd, f64x_info, "SD">,
6710 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6714 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6715 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6716 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6717 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6719 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6720 string Suffix, SDNode Move,
6721 X86VectorVTInfo _, PatLeaf ZeroFP> {
6722 let Predicates = [HasAVX512] in {
6723 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6725 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6727 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6728 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6729 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6731 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6732 (Op _.FRC:$src2, _.FRC:$src3,
6733 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6734 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6735 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6736 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6738 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6740 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6741 (_.ScalarLdFrag addr:$src3)))))),
6742 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6743 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6746 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6747 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6748 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6749 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6750 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6753 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6754 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6755 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6756 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6757 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6760 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6761 (X86selects VK1WM:$mask,
6763 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6765 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6766 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6767 VR128X:$src1, VK1WM:$mask,
6768 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6769 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6771 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6772 (X86selects VK1WM:$mask,
6774 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6775 (_.ScalarLdFrag addr:$src3)),
6776 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6777 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6778 VR128X:$src1, VK1WM:$mask,
6779 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6781 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6782 (X86selects VK1WM:$mask,
6783 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6784 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6785 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6786 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6787 VR128X:$src1, VK1WM:$mask,
6788 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6790 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6791 (X86selects VK1WM:$mask,
6792 (Op _.FRC:$src2, _.FRC:$src3,
6793 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6794 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6795 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6796 VR128X:$src1, VK1WM:$mask,
6797 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6798 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6800 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6801 (X86selects VK1WM:$mask,
6802 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6803 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6804 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6805 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6806 VR128X:$src1, VK1WM:$mask,
6807 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6809 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6810 (X86selects VK1WM:$mask,
6812 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6814 (_.EltVT ZeroFP)))))),
6815 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6816 VR128X:$src1, VK1WM:$mask,
6817 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6818 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6820 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6821 (X86selects VK1WM:$mask,
6822 (Op _.FRC:$src2, _.FRC:$src3,
6823 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6824 (_.EltVT ZeroFP)))))),
6825 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6826 VR128X:$src1, VK1WM:$mask,
6827 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6828 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6830 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6831 (X86selects VK1WM:$mask,
6833 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6834 (_.ScalarLdFrag addr:$src3)),
6835 (_.EltVT ZeroFP)))))),
6836 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6837 VR128X:$src1, VK1WM:$mask,
6838 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6840 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6841 (X86selects VK1WM:$mask,
6842 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6843 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6844 (_.EltVT ZeroFP)))))),
6845 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6846 VR128X:$src1, VK1WM:$mask,
6847 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6849 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6850 (X86selects VK1WM:$mask,
6851 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6852 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6853 (_.EltVT ZeroFP)))))),
6854 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6855 VR128X:$src1, VK1WM:$mask,
6856 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6858 // Patterns with rounding mode.
6859 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6861 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6862 _.FRC:$src3, (i32 timm:$rc)))))),
6863 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6864 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6865 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6867 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6868 (RndOp _.FRC:$src2, _.FRC:$src3,
6869 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6870 (i32 timm:$rc)))))),
6871 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6872 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6873 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6875 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6876 (X86selects VK1WM:$mask,
6878 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6879 _.FRC:$src3, (i32 timm:$rc)),
6880 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6881 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6882 VR128X:$src1, VK1WM:$mask,
6883 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6884 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6886 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6887 (X86selects VK1WM:$mask,
6888 (RndOp _.FRC:$src2, _.FRC:$src3,
6889 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6891 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6892 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6893 VR128X:$src1, VK1WM:$mask,
6894 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6895 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6897 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6898 (X86selects VK1WM:$mask,
6900 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6901 _.FRC:$src3, (i32 timm:$rc)),
6902 (_.EltVT ZeroFP)))))),
6903 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6904 VR128X:$src1, VK1WM:$mask,
6905 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6906 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6908 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6909 (X86selects VK1WM:$mask,
6910 (RndOp _.FRC:$src2, _.FRC:$src3,
6911 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6913 (_.EltVT ZeroFP)))))),
6914 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6915 VR128X:$src1, VK1WM:$mask,
6916 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6917 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6921 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
6922 X86Movss, v4f32x_info, fp32imm0>;
6923 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6924 X86Movss, v4f32x_info, fp32imm0>;
6925 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6926 X86Movss, v4f32x_info, fp32imm0>;
6927 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6928 X86Movss, v4f32x_info, fp32imm0>;
6930 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
6931 X86Movsd, v2f64x_info, fp64imm0>;
6932 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6933 X86Movsd, v2f64x_info, fp64imm0>;
6934 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6935 X86Movsd, v2f64x_info, fp64imm0>;
6936 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6937 X86Movsd, v2f64x_info, fp64imm0>;
6939 //===----------------------------------------------------------------------===//
6940 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6941 //===----------------------------------------------------------------------===//
6942 let Constraints = "$src1 = $dst" in {
6943 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6944 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6945 // NOTE: The SDNode have the multiply operands first with the add last.
6946 // This enables commuted load patterns to be autogenerated by tablegen.
6947 let ExeDomain = _.ExeDomain in {
6948 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6949 (ins _.RC:$src2, _.RC:$src3),
6950 OpcodeStr, "$src3, $src2", "$src2, $src3",
6951 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6952 AVX512FMA3Base, Sched<[sched]>;
6954 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6955 (ins _.RC:$src2, _.MemOp:$src3),
6956 OpcodeStr, "$src3, $src2", "$src2, $src3",
6957 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6958 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6960 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6961 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6962 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6963 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6965 (_.VT (_.BroadcastLdFrag addr:$src3)),
6967 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6970 } // Constraints = "$src1 = $dst"
6972 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6973 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6974 let Predicates = [HasIFMA] in {
6975 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6976 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6978 let Predicates = [HasVLX, HasIFMA] in {
6979 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6980 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6981 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6982 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6986 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6987 SchedWriteVecIMul, avx512vl_i64_info>,
6989 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6990 SchedWriteVecIMul, avx512vl_i64_info>,
6993 //===----------------------------------------------------------------------===//
6994 // AVX-512 Scalar convert from sign integer to float/double
6995 //===----------------------------------------------------------------------===//
6997 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
6998 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6999 X86MemOperand x86memop, PatFrag ld_frag, string asm,
7001 let hasSideEffects = 0, isCodeGenOnly = 1 in {
7002 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7003 (ins DstVT.FRC:$src1, SrcRC:$src),
7004 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7005 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7007 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7008 (ins DstVT.FRC:$src1, x86memop:$src),
7009 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7010 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7011 } // hasSideEffects = 0
7012 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7013 (ins DstVT.RC:$src1, SrcRC:$src2),
7014 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7015 [(set DstVT.RC:$dst,
7016 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7017 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7019 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7020 (ins DstVT.RC:$src1, x86memop:$src2),
7021 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7022 [(set DstVT.RC:$dst,
7023 (OpNode (DstVT.VT DstVT.RC:$src1),
7024 (ld_frag addr:$src2)))]>,
7025 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7026 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7027 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7028 DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7031 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7032 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7033 X86VectorVTInfo DstVT, string asm,
7035 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7036 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7038 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7039 [(set DstVT.RC:$dst,
7040 (OpNode (DstVT.VT DstVT.RC:$src1),
7043 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7044 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7045 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7046 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7049 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7050 X86FoldableSchedWrite sched,
7051 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7052 X86MemOperand x86memop, PatFrag ld_frag,
7053 string asm, string mem> {
7054 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7055 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7056 ld_frag, asm, mem>, VEX_LIG;
7059 let Predicates = [HasAVX512] in {
7060 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7062 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7063 XS, EVEX_CD8<32, CD8VT1>;
7064 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7066 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7067 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7068 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7069 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
7070 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7071 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7073 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7074 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7076 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7077 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7078 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7079 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7081 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7082 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7083 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7084 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7085 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7086 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7087 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7088 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7090 def : Pat<(f32 (sint_to_fp GR32:$src)),
7091 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7092 def : Pat<(f32 (sint_to_fp GR64:$src)),
7093 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7094 def : Pat<(f64 (sint_to_fp GR32:$src)),
7095 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7096 def : Pat<(f64 (sint_to_fp GR64:$src)),
7097 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7099 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7101 v4f32x_info, i32mem, loadi32,
7102 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7103 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7105 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7106 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7107 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7108 i32mem, loadi32, "cvtusi2sd", "l">,
7109 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7110 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7112 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7113 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7115 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7116 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7117 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7118 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7120 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7121 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7122 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7123 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7124 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7125 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7126 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7127 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7129 def : Pat<(f32 (uint_to_fp GR32:$src)),
7130 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7131 def : Pat<(f32 (uint_to_fp GR64:$src)),
7132 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7133 def : Pat<(f64 (uint_to_fp GR32:$src)),
7134 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7135 def : Pat<(f64 (uint_to_fp GR64:$src)),
7136 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7139 //===----------------------------------------------------------------------===//
7140 // AVX-512 Scalar convert from float/double to integer
7141 //===----------------------------------------------------------------------===//
7143 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7144 X86VectorVTInfo DstVT, SDNode OpNode,
7146 X86FoldableSchedWrite sched, string asm,
7148 let Predicates = [HasAVX512] in {
7149 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7150 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7151 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7152 EVEX, VEX_LIG, Sched<[sched]>;
7153 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7154 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7155 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7156 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7158 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7159 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7160 [(set DstVT.RC:$dst, (OpNode
7161 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7162 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7163 } // Predicates = [HasAVX512]
7165 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7166 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7167 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7168 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7169 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7170 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7171 SrcVT.IntScalarMemOp:$src), 0, "att">;
7174 // Convert float/double to signed/unsigned int 32/64
7175 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7176 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7177 XS, EVEX_CD8<32, CD8VT1>;
7178 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7179 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7180 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7181 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7182 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7183 XS, EVEX_CD8<32, CD8VT1>;
7184 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7185 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7186 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7187 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7188 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7189 XD, EVEX_CD8<64, CD8VT1>;
7190 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7191 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7192 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7193 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7194 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7195 XD, EVEX_CD8<64, CD8VT1>;
7196 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7197 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7198 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7200 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7201 // which produce unnecessary vmovs{s,d} instructions
7202 let Predicates = [HasAVX512] in {
7203 def : Pat<(v4f32 (X86Movss
7204 (v4f32 VR128X:$dst),
7205 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7206 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7208 def : Pat<(v4f32 (X86Movss
7209 (v4f32 VR128X:$dst),
7210 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7211 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7213 def : Pat<(v4f32 (X86Movss
7214 (v4f32 VR128X:$dst),
7215 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7216 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7218 def : Pat<(v4f32 (X86Movss
7219 (v4f32 VR128X:$dst),
7220 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7221 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7223 def : Pat<(v2f64 (X86Movsd
7224 (v2f64 VR128X:$dst),
7225 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7226 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7228 def : Pat<(v2f64 (X86Movsd
7229 (v2f64 VR128X:$dst),
7230 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7231 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7233 def : Pat<(v2f64 (X86Movsd
7234 (v2f64 VR128X:$dst),
7235 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7236 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7238 def : Pat<(v2f64 (X86Movsd
7239 (v2f64 VR128X:$dst),
7240 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7241 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7243 def : Pat<(v4f32 (X86Movss
7244 (v4f32 VR128X:$dst),
7245 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7246 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7248 def : Pat<(v4f32 (X86Movss
7249 (v4f32 VR128X:$dst),
7250 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7251 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7253 def : Pat<(v4f32 (X86Movss
7254 (v4f32 VR128X:$dst),
7255 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7256 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7258 def : Pat<(v4f32 (X86Movss
7259 (v4f32 VR128X:$dst),
7260 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7261 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7263 def : Pat<(v2f64 (X86Movsd
7264 (v2f64 VR128X:$dst),
7265 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7266 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7268 def : Pat<(v2f64 (X86Movsd
7269 (v2f64 VR128X:$dst),
7270 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7271 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7273 def : Pat<(v2f64 (X86Movsd
7274 (v2f64 VR128X:$dst),
7275 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7276 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7278 def : Pat<(v2f64 (X86Movsd
7279 (v2f64 VR128X:$dst),
7280 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7281 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7282 } // Predicates = [HasAVX512]
7284 // Convert float/double to signed/unsigned int 32/64 with truncation
7285 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7286 X86VectorVTInfo _DstRC, SDNode OpNode,
7287 SDNode OpNodeInt, SDNode OpNodeSAE,
7288 X86FoldableSchedWrite sched, string aliasStr>{
7289 let Predicates = [HasAVX512] in {
7290 let isCodeGenOnly = 1 in {
7291 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7292 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7293 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7294 EVEX, VEX_LIG, Sched<[sched]>;
7295 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7296 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7297 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7298 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7301 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7302 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7303 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7304 EVEX, VEX_LIG, Sched<[sched]>;
7305 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7306 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7307 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7308 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7309 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7310 (ins _SrcRC.IntScalarMemOp:$src),
7311 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7312 [(set _DstRC.RC:$dst,
7313 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7314 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7317 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7318 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7319 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7320 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7321 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7322 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7323 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7326 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7327 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7328 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7329 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7330 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7331 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7332 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7333 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7334 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7335 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7336 fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7337 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7339 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7340 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7341 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7342 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7343 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7344 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7345 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7346 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7347 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7348 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7349 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7350 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7352 //===----------------------------------------------------------------------===//
7353 // AVX-512 Convert form float to double and back
7354 //===----------------------------------------------------------------------===//
7356 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7357 X86VectorVTInfo _Src, SDNode OpNode,
7358 X86FoldableSchedWrite sched> {
7359 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7360 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7361 "$src2, $src1", "$src1, $src2",
7362 (_.VT (OpNode (_.VT _.RC:$src1),
7363 (_Src.VT _Src.RC:$src2)))>,
7364 EVEX_4V, VEX_LIG, Sched<[sched]>;
7365 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7366 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7367 "$src2, $src1", "$src1, $src2",
7368 (_.VT (OpNode (_.VT _.RC:$src1),
7369 (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
7371 Sched<[sched.Folded, sched.ReadAfterFold]>;
7373 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7374 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7375 (ins _.FRC:$src1, _Src.FRC:$src2),
7376 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7377 EVEX_4V, VEX_LIG, Sched<[sched]>;
7379 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7380 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7381 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7382 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7386 // Scalar Coversion with SAE - suppress all exceptions
7387 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7388 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7389 X86FoldableSchedWrite sched> {
7390 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7391 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7392 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7393 (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7394 (_Src.VT _Src.RC:$src2)))>,
7395 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7398 // Scalar Conversion with rounding control (RC)
7399 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7400 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7401 X86FoldableSchedWrite sched> {
7402 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7403 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7404 "$rc, $src2, $src1", "$src1, $src2, $rc",
7405 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7406 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7407 EVEX_4V, VEX_LIG, Sched<[sched]>,
7410 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7411 SDNode OpNode, SDNode OpNodeRnd,
7412 X86FoldableSchedWrite sched,
7413 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7414 let Predicates = [HasAVX512] in {
7415 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7416 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7417 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7421 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7422 SDNode OpNode, SDNode OpNodeSAE,
7423 X86FoldableSchedWrite sched,
7424 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7425 let Predicates = [HasAVX512] in {
7426 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7427 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7428 EVEX_CD8<32, CD8VT1>, XS;
7431 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7432 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7434 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7435 X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7438 def : Pat<(f64 (fpextend FR32X:$src)),
7439 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7440 Requires<[HasAVX512]>;
7441 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7442 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7443 Requires<[HasAVX512, OptForSize]>;
7445 def : Pat<(f32 (fpround FR64X:$src)),
7446 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7447 Requires<[HasAVX512]>;
7449 def : Pat<(v4f32 (X86Movss
7450 (v4f32 VR128X:$dst),
7451 (v4f32 (scalar_to_vector
7452 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7453 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7454 Requires<[HasAVX512]>;
7456 def : Pat<(v2f64 (X86Movsd
7457 (v2f64 VR128X:$dst),
7458 (v2f64 (scalar_to_vector
7459 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7460 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7461 Requires<[HasAVX512]>;
7463 //===----------------------------------------------------------------------===//
7464 // AVX-512 Vector convert from signed/unsigned integer to float/double
7465 // and from float/double to signed/unsigned integer
7466 //===----------------------------------------------------------------------===//
7468 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7469 X86VectorVTInfo _Src, SDNode OpNode,
7470 X86FoldableSchedWrite sched,
7471 string Broadcast = _.BroadcastStr,
7472 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7473 RegisterClass MaskRC = _.KRCWM,
7474 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7476 defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
7478 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7479 (ins MaskRC:$mask, _Src.RC:$src),
7480 OpcodeStr, "$src", "$src",
7481 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7482 (vselect MaskRC:$mask,
7483 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7485 vselect, "$src0 = $dst">,
7486 EVEX, Sched<[sched]>;
7488 defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7490 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7491 (ins MaskRC:$mask, MemOp:$src),
7492 OpcodeStr#Alias, "$src", "$src",
7494 (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
7495 vselect, "$src0 = $dst">,
7496 EVEX, Sched<[sched.Folded]>;
7498 defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7499 (ins _Src.ScalarMemOp:$src),
7500 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7501 (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7503 "${src}"##Broadcast, "${src}"##Broadcast,
7504 (_.VT (OpNode (_Src.VT
7505 (_Src.BroadcastLdFrag addr:$src))
7507 (vselect MaskRC:$mask,
7511 (_Src.BroadcastLdFrag addr:$src)))),
7513 vselect, "$src0 = $dst">,
7514 EVEX, EVEX_B, Sched<[sched.Folded]>;
7516 // Coversion with SAE - suppress all exceptions
7517 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7518 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7519 X86FoldableSchedWrite sched> {
7520 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7521 (ins _Src.RC:$src), OpcodeStr,
7522 "{sae}, $src", "$src, {sae}",
7523 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7524 EVEX, EVEX_B, Sched<[sched]>;
7527 // Conversion with rounding control (RC)
7528 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7529 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7530 X86FoldableSchedWrite sched> {
7531 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7532 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7533 "$rc, $src", "$src, $rc",
7534 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7535 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7538 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7539 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7540 X86VectorVTInfo _Src, SDNode OpNode,
7541 X86FoldableSchedWrite sched,
7542 string Broadcast = _.BroadcastStr,
7543 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7544 RegisterClass MaskRC = _.KRCWM>
7545 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
7547 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7549 // Extend Float to Double
7550 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7551 X86SchedWriteWidths sched> {
7552 let Predicates = [HasAVX512] in {
7553 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7554 fpextend, sched.ZMM>,
7555 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7556 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7558 let Predicates = [HasVLX] in {
7559 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7560 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7561 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7562 sched.YMM>, EVEX_V256;
7566 // Truncate Double to Float
7567 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7568 let Predicates = [HasAVX512] in {
7569 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
7570 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7571 X86vfproundRnd, sched.ZMM>, EVEX_V512;
7573 let Predicates = [HasVLX] in {
7574 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7575 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
7577 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
7578 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7581 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7582 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7583 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7584 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7585 VK2WM:$mask, VR128X:$src), 0, "att">;
7586 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
7587 "$dst {${mask}} {z}, $src}",
7588 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7589 VK2WM:$mask, VR128X:$src), 0, "att">;
7590 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7591 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7592 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7593 "$dst {${mask}}, ${src}{1to2}}",
7594 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7595 VK2WM:$mask, f64mem:$src), 0, "att">;
7596 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7597 "$dst {${mask}} {z}, ${src}{1to2}}",
7598 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7599 VK2WM:$mask, f64mem:$src), 0, "att">;
7601 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7602 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7603 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7604 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7605 VK4WM:$mask, VR256X:$src), 0, "att">;
7606 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
7607 "$dst {${mask}} {z}, $src}",
7608 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7609 VK4WM:$mask, VR256X:$src), 0, "att">;
7610 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7611 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7612 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7613 "$dst {${mask}}, ${src}{1to4}}",
7614 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7615 VK4WM:$mask, f64mem:$src), 0, "att">;
7616 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7617 "$dst {${mask}} {z}, ${src}{1to4}}",
7618 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7619 VK4WM:$mask, f64mem:$src), 0, "att">;
7622 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7623 VEX_W, PD, EVEX_CD8<64, CD8VF>;
7624 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7625 PS, EVEX_CD8<32, CD8VH>;
7627 let Predicates = [HasAVX512] in {
7628 def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
7629 (VCVTPD2PSZrr VR512:$src)>;
7630 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7632 (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
7633 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7634 v8f32x_info.ImmAllZerosV),
7635 (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
7637 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7638 (VCVTPD2PSZrm addr:$src)>;
7639 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7641 (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7642 def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7643 v8f32x_info.ImmAllZerosV),
7644 (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
7646 def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))),
7647 (VCVTPD2PSZrmb addr:$src)>;
7648 def : Pat<(vselect VK8WM:$mask,
7649 (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
7650 (v8f32 VR256X:$src0)),
7651 (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7652 def : Pat<(vselect VK8WM:$mask,
7653 (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
7654 v8f32x_info.ImmAllZerosV),
7655 (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
7658 let Predicates = [HasVLX] in {
7659 def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
7660 (VCVTPD2PSZ256rr VR256X:$src)>;
7661 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7663 (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
7664 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7665 v4f32x_info.ImmAllZerosV),
7666 (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
7668 def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
7669 (VCVTPD2PSZ256rm addr:$src)>;
7670 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7672 (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7673 def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7674 v4f32x_info.ImmAllZerosV),
7675 (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
7677 def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
7678 (VCVTPD2PSZ256rmb addr:$src)>;
7679 def : Pat<(vselect VK4WM:$mask,
7680 (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
7682 (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7683 def : Pat<(vselect VK4WM:$mask,
7684 (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
7685 v4f32x_info.ImmAllZerosV),
7686 (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
7688 // Special patterns to allow use of X86vmfpround for masking. Instruction
7689 // patterns have been disabled with null_frag.
7690 def : Pat<(X86vfpround (v2f64 VR128X:$src)),
7691 (VCVTPD2PSZ128rr VR128X:$src)>;
7692 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7694 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7695 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7697 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7699 def : Pat<(X86vfpround (loadv2f64 addr:$src)),
7700 (VCVTPD2PSZ128rm addr:$src)>;
7701 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7703 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7704 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7706 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7708 def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
7709 (VCVTPD2PSZ128rmb addr:$src)>;
7710 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7711 (v4f32 VR128X:$src0), VK2WM:$mask),
7712 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7713 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7714 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7715 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7718 // Convert Signed/Unsigned Doubleword to Double
7719 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7720 SDNode OpNode128, X86SchedWriteWidths sched> {
7721 // No rounding in this op
7722 let Predicates = [HasAVX512] in
7723 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7724 sched.ZMM>, EVEX_V512;
7726 let Predicates = [HasVLX] in {
7727 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7728 OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
7729 (v2f64 (OpNode128 (bc_v4i32
7731 (scalar_to_vector (loadi64 addr:$src))))))>,
7733 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7734 sched.YMM>, EVEX_V256;
7738 // Convert Signed/Unsigned Doubleword to Float
7739 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7740 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7741 let Predicates = [HasAVX512] in
7742 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7744 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7745 OpNodeRnd, sched.ZMM>, EVEX_V512;
7747 let Predicates = [HasVLX] in {
7748 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7749 sched.XMM>, EVEX_V128;
7750 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7751 sched.YMM>, EVEX_V256;
7755 // Convert Float to Signed/Unsigned Doubleword with truncation
7756 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7757 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7758 let Predicates = [HasAVX512] in {
7759 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7761 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7762 OpNodeSAE, sched.ZMM>, EVEX_V512;
7764 let Predicates = [HasVLX] in {
7765 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7766 sched.XMM>, EVEX_V128;
7767 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7768 sched.YMM>, EVEX_V256;
7772 // Convert Float to Signed/Unsigned Doubleword
7773 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7774 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7775 let Predicates = [HasAVX512] in {
7776 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7778 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7779 OpNodeRnd, sched.ZMM>, EVEX_V512;
7781 let Predicates = [HasVLX] in {
7782 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7783 sched.XMM>, EVEX_V128;
7784 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7785 sched.YMM>, EVEX_V256;
7789 // Convert Double to Signed/Unsigned Doubleword with truncation
7790 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7791 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7792 let Predicates = [HasAVX512] in {
7793 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7795 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7796 OpNodeSAE, sched.ZMM>, EVEX_V512;
7798 let Predicates = [HasVLX] in {
7799 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7800 // memory forms of these instructions in Asm Parser. They have the same
7801 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7802 // due to the same reason.
7803 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7804 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7806 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7807 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7810 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7811 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7812 VR128X:$src), 0, "att">;
7813 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7814 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7815 VK2WM:$mask, VR128X:$src), 0, "att">;
7816 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7817 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7818 VK2WM:$mask, VR128X:$src), 0, "att">;
7819 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7820 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7821 f64mem:$src), 0, "att">;
7822 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7823 "$dst {${mask}}, ${src}{1to2}}",
7824 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7825 VK2WM:$mask, f64mem:$src), 0, "att">;
7826 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7827 "$dst {${mask}} {z}, ${src}{1to2}}",
7828 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7829 VK2WM:$mask, f64mem:$src), 0, "att">;
7831 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7832 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7833 VR256X:$src), 0, "att">;
7834 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7835 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7836 VK4WM:$mask, VR256X:$src), 0, "att">;
7837 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7838 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7839 VK4WM:$mask, VR256X:$src), 0, "att">;
7840 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7841 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7842 f64mem:$src), 0, "att">;
7843 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7844 "$dst {${mask}}, ${src}{1to4}}",
7845 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7846 VK4WM:$mask, f64mem:$src), 0, "att">;
7847 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7848 "$dst {${mask}} {z}, ${src}{1to4}}",
7849 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7850 VK4WM:$mask, f64mem:$src), 0, "att">;
7853 // Convert Double to Signed/Unsigned Doubleword
7854 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7855 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7856 let Predicates = [HasAVX512] in {
7857 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7859 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7860 OpNodeRnd, sched.ZMM>, EVEX_V512;
7862 let Predicates = [HasVLX] in {
7863 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7864 // memory forms of these instructions in Asm Parcer. They have the same
7865 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7866 // due to the same reason.
7867 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7868 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7870 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7871 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7874 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7875 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7876 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7877 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7878 VK2WM:$mask, VR128X:$src), 0, "att">;
7879 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7880 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7881 VK2WM:$mask, VR128X:$src), 0, "att">;
7882 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7883 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7884 f64mem:$src), 0, "att">;
7885 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7886 "$dst {${mask}}, ${src}{1to2}}",
7887 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7888 VK2WM:$mask, f64mem:$src), 0, "att">;
7889 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7890 "$dst {${mask}} {z}, ${src}{1to2}}",
7891 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7892 VK2WM:$mask, f64mem:$src), 0, "att">;
7894 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7895 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7896 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7897 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7898 VK4WM:$mask, VR256X:$src), 0, "att">;
7899 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7900 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7901 VK4WM:$mask, VR256X:$src), 0, "att">;
7902 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7903 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7904 f64mem:$src), 0, "att">;
7905 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7906 "$dst {${mask}}, ${src}{1to4}}",
7907 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7908 VK4WM:$mask, f64mem:$src), 0, "att">;
7909 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7910 "$dst {${mask}} {z}, ${src}{1to4}}",
7911 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7912 VK4WM:$mask, f64mem:$src), 0, "att">;
7915 // Convert Double to Signed/Unsigned Quardword
7916 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7917 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7918 let Predicates = [HasDQI] in {
7919 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7921 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7922 OpNodeRnd, sched.ZMM>, EVEX_V512;
7924 let Predicates = [HasDQI, HasVLX] in {
7925 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7926 sched.XMM>, EVEX_V128;
7927 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7928 sched.YMM>, EVEX_V256;
7932 // Convert Double to Signed/Unsigned Quardword with truncation
7933 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7934 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7935 let Predicates = [HasDQI] in {
7936 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7938 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7939 OpNodeRnd, sched.ZMM>, EVEX_V512;
7941 let Predicates = [HasDQI, HasVLX] in {
7942 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7943 sched.XMM>, EVEX_V128;
7944 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7945 sched.YMM>, EVEX_V256;
7949 // Convert Signed/Unsigned Quardword to Double
7950 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7951 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7952 let Predicates = [HasDQI] in {
7953 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7955 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7956 OpNodeRnd, sched.ZMM>, EVEX_V512;
7958 let Predicates = [HasDQI, HasVLX] in {
7959 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7960 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7961 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7962 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7966 // Convert Float to Signed/Unsigned Quardword
7967 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7968 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7969 let Predicates = [HasDQI] in {
7970 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7972 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7973 OpNodeRnd, sched.ZMM>, EVEX_V512;
7975 let Predicates = [HasDQI, HasVLX] in {
7976 // Explicitly specified broadcast string, since we take only 2 elements
7977 // from v4f32x_info source
7978 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7979 sched.XMM, "{1to2}", "", f64mem, VK2WM,
7980 (v2i64 (OpNode (bc_v4f32
7982 (scalar_to_vector (loadf64 addr:$src))))))>,
7984 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7985 sched.YMM>, EVEX_V256;
7989 // Convert Float to Signed/Unsigned Quardword with truncation
7990 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7991 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7992 let Predicates = [HasDQI] in {
7993 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7994 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7995 OpNodeRnd, sched.ZMM>, EVEX_V512;
7997 let Predicates = [HasDQI, HasVLX] in {
7998 // Explicitly specified broadcast string, since we take only 2 elements
7999 // from v4f32x_info source
8000 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8001 sched.XMM, "{1to2}", "", f64mem, VK2WM,
8002 (v2i64 (OpNode (bc_v4f32
8004 (scalar_to_vector (loadf64 addr:$src))))))>,
8006 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8007 sched.YMM>, EVEX_V256;
8011 // Convert Signed/Unsigned Quardword to Float
8012 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8013 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8014 let Predicates = [HasDQI] in {
8015 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8017 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8018 OpNodeRnd, sched.ZMM>, EVEX_V512;
8020 let Predicates = [HasDQI, HasVLX] in {
8021 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8022 // memory forms of these instructions in Asm Parcer. They have the same
8023 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8024 // due to the same reason.
8025 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8026 sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8027 EVEX_V128, NotEVEX2VEXConvertible;
8028 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8029 sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8030 NotEVEX2VEXConvertible;
8033 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8034 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8035 VR128X:$src), 0, "att">;
8036 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8037 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8038 VK2WM:$mask, VR128X:$src), 0, "att">;
8039 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8040 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8041 VK2WM:$mask, VR128X:$src), 0, "att">;
8042 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8043 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8044 i64mem:$src), 0, "att">;
8045 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
8046 "$dst {${mask}}, ${src}{1to2}}",
8047 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8048 VK2WM:$mask, i64mem:$src), 0, "att">;
8049 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8050 "$dst {${mask}} {z}, ${src}{1to2}}",
8051 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8052 VK2WM:$mask, i64mem:$src), 0, "att">;
8054 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8055 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8056 VR256X:$src), 0, "att">;
8057 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
8058 "$dst {${mask}}, $src}",
8059 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8060 VK4WM:$mask, VR256X:$src), 0, "att">;
8061 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
8062 "$dst {${mask}} {z}, $src}",
8063 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8064 VK4WM:$mask, VR256X:$src), 0, "att">;
8065 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8066 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8067 i64mem:$src), 0, "att">;
8068 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
8069 "$dst {${mask}}, ${src}{1to4}}",
8070 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8071 VK4WM:$mask, i64mem:$src), 0, "att">;
8072 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8073 "$dst {${mask}} {z}, ${src}{1to4}}",
8074 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8075 VK4WM:$mask, i64mem:$src), 0, "att">;
8078 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
8079 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8081 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
8082 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8083 PS, EVEX_CD8<32, CD8VF>;
8085 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
8086 X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
8087 XS, EVEX_CD8<32, CD8VF>;
8089 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
8090 X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
8091 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8093 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8094 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
8095 EVEX_CD8<32, CD8VF>;
8097 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8098 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
8099 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8101 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8102 X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8103 EVEX_CD8<32, CD8VH>;
8105 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8106 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8107 EVEX_CD8<32, CD8VF>;
8109 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8110 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8111 EVEX_CD8<32, CD8VF>;
8113 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8114 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8115 VEX_W, EVEX_CD8<64, CD8VF>;
8117 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8118 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8119 PS, EVEX_CD8<32, CD8VF>;
8121 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8122 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8123 PS, EVEX_CD8<64, CD8VF>;
8125 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8126 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8127 PD, EVEX_CD8<64, CD8VF>;
8129 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8130 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8131 EVEX_CD8<32, CD8VH>;
8133 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8134 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8135 PD, EVEX_CD8<64, CD8VF>;
8137 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8138 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8139 EVEX_CD8<32, CD8VH>;
8141 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8142 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
8143 PD, EVEX_CD8<64, CD8VF>;
8145 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8146 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
8147 EVEX_CD8<32, CD8VH>;
8149 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8150 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
8151 PD, EVEX_CD8<64, CD8VF>;
8153 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8154 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
8155 EVEX_CD8<32, CD8VH>;
8157 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8158 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8159 EVEX_CD8<64, CD8VF>;
8161 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8162 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8163 EVEX_CD8<64, CD8VF>;
8165 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
8166 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8167 EVEX_CD8<64, CD8VF>;
8169 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
8170 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8171 EVEX_CD8<64, CD8VF>;
8173 let Predicates = [HasVLX] in {
8174 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8175 // patterns have been disabled with null_frag.
8176 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8177 (VCVTPD2DQZ128rr VR128X:$src)>;
8178 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8180 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8181 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8183 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8185 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8186 (VCVTPD2DQZ128rm addr:$src)>;
8187 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8189 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8190 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8192 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8194 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8195 (VCVTPD2DQZ128rmb addr:$src)>;
8196 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8197 (v4i32 VR128X:$src0), VK2WM:$mask),
8198 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8199 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8200 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8201 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8203 // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8204 // patterns have been disabled with null_frag.
8205 def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
8206 (VCVTTPD2DQZ128rr VR128X:$src)>;
8207 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8209 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8210 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8212 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8214 def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
8215 (VCVTTPD2DQZ128rm addr:$src)>;
8216 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8218 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8219 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8221 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8223 def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8224 (VCVTTPD2DQZ128rmb addr:$src)>;
8225 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8226 (v4i32 VR128X:$src0), VK2WM:$mask),
8227 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8228 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8229 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8230 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8232 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8233 // patterns have been disabled with null_frag.
8234 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8235 (VCVTPD2UDQZ128rr VR128X:$src)>;
8236 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8238 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8239 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8241 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8243 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8244 (VCVTPD2UDQZ128rm addr:$src)>;
8245 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8247 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8248 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8250 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8252 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8253 (VCVTPD2UDQZ128rmb addr:$src)>;
8254 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8255 (v4i32 VR128X:$src0), VK2WM:$mask),
8256 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8257 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8258 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8259 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8261 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8262 // patterns have been disabled with null_frag.
8263 def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
8264 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8265 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8267 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8268 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8270 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8272 def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
8273 (VCVTTPD2UDQZ128rm addr:$src)>;
8274 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8276 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8277 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8279 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8281 def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8282 (VCVTTPD2UDQZ128rmb addr:$src)>;
8283 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8284 (v4i32 VR128X:$src0), VK2WM:$mask),
8285 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8286 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8287 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8288 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8291 let Predicates = [HasDQI, HasVLX] in {
8292 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8293 (VCVTPS2QQZ128rm addr:$src)>;
8294 def : Pat<(v2i64 (vselect VK2WM:$mask,
8295 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8297 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8298 def : Pat<(v2i64 (vselect VK2WM:$mask,
8299 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8300 v2i64x_info.ImmAllZerosV)),
8301 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8303 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8304 (VCVTPS2UQQZ128rm addr:$src)>;
8305 def : Pat<(v2i64 (vselect VK2WM:$mask,
8306 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8308 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8309 def : Pat<(v2i64 (vselect VK2WM:$mask,
8310 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8311 v2i64x_info.ImmAllZerosV)),
8312 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8314 def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8315 (VCVTTPS2QQZ128rm addr:$src)>;
8316 def : Pat<(v2i64 (vselect VK2WM:$mask,
8317 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8319 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8320 def : Pat<(v2i64 (vselect VK2WM:$mask,
8321 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8322 v2i64x_info.ImmAllZerosV)),
8323 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8325 def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8326 (VCVTTPS2UQQZ128rm addr:$src)>;
8327 def : Pat<(v2i64 (vselect VK2WM:$mask,
8328 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8330 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8331 def : Pat<(v2i64 (vselect VK2WM:$mask,
8332 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8333 v2i64x_info.ImmAllZerosV)),
8334 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8337 let Predicates = [HasAVX512, NoVLX] in {
8338 def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))),
8339 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8340 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8341 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8343 def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))),
8344 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8345 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8346 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8348 def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
8349 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8350 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8351 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8353 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8354 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8355 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8356 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8358 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8359 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8360 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8361 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8363 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8364 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8365 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8366 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8368 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8369 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8370 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8371 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8374 let Predicates = [HasVLX] in {
8375 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8376 (VCVTDQ2PDZ128rm addr:$src)>;
8377 def : Pat<(v2f64 (vselect VK2WM:$mask,
8378 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8380 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8381 def : Pat<(v2f64 (vselect VK2WM:$mask,
8382 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8383 v2f64x_info.ImmAllZerosV)),
8384 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8386 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8387 (VCVTUDQ2PDZ128rm addr:$src)>;
8388 def : Pat<(v2f64 (vselect VK2WM:$mask,
8389 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8391 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8392 def : Pat<(v2f64 (vselect VK2WM:$mask,
8393 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8394 v2f64x_info.ImmAllZerosV)),
8395 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8398 let Predicates = [HasDQI, HasVLX] in {
8399 // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8400 // patterns have been disabled with null_frag.
8401 def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
8402 (VCVTQQ2PSZ128rr VR128X:$src)>;
8403 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8405 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8406 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8408 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8410 def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))),
8411 (VCVTQQ2PSZ128rm addr:$src)>;
8412 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8414 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8415 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8417 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8419 def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8420 (VCVTQQ2PSZ128rmb addr:$src)>;
8421 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8422 (v4f32 VR128X:$src0), VK2WM:$mask),
8423 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8424 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8425 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8426 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8428 // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8429 // patterns have been disabled with null_frag.
8430 def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))),
8431 (VCVTUQQ2PSZ128rr VR128X:$src)>;
8432 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8434 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8435 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8437 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8439 def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))),
8440 (VCVTUQQ2PSZ128rm addr:$src)>;
8441 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8443 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8444 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8446 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8448 def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8449 (VCVTUQQ2PSZ128rmb addr:$src)>;
8450 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8451 (v4f32 VR128X:$src0), VK2WM:$mask),
8452 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8453 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8454 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8455 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8458 let Predicates = [HasDQI, NoVLX] in {
8459 def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))),
8460 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8461 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8462 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8464 def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))),
8465 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8466 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8467 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8469 def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))),
8470 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8471 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8472 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8474 def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))),
8475 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8476 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8477 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8479 def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))),
8480 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8481 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8482 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8484 def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
8485 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8486 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8487 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8489 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8490 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8491 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8492 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8494 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8495 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8496 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8497 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8499 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8500 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8501 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8502 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8504 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8505 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8506 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8507 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8509 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8510 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8511 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8512 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8514 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8515 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8516 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8517 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8520 //===----------------------------------------------------------------------===//
8521 // Half precision conversion instructions
8522 //===----------------------------------------------------------------------===//
8524 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8525 X86MemOperand x86memop, PatFrag ld_frag,
8526 X86FoldableSchedWrite sched> {
8527 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8528 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8529 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8530 T8PD, Sched<[sched]>;
8531 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8532 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8533 (X86cvtph2ps (_src.VT
8534 (ld_frag addr:$src)))>,
8535 T8PD, Sched<[sched.Folded]>;
8538 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8539 X86FoldableSchedWrite sched> {
8540 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8541 (ins _src.RC:$src), "vcvtph2ps",
8542 "{sae}, $src", "$src, {sae}",
8543 (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8544 T8PD, EVEX_B, Sched<[sched]>;
8547 let Predicates = [HasAVX512] in
8548 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8550 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8551 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8553 let Predicates = [HasVLX] in {
8554 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8555 load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8556 EVEX_CD8<32, CD8VH>;
8557 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8558 load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8559 EVEX_CD8<32, CD8VH>;
8561 // Pattern match vcvtph2ps of a scalar i64 load.
8562 def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
8563 (VCVTPH2PSZ128rm addr:$src)>;
8564 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8565 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8566 (VCVTPH2PSZ128rm addr:$src)>;
8569 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8570 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8571 let ExeDomain = GenericDomain in {
8572 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8573 (ins _src.RC:$src1, i32u8imm:$src2),
8574 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8575 [(set _dest.RC:$dst,
8576 (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8578 let Constraints = "$src0 = $dst" in
8579 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8580 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8581 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8582 [(set _dest.RC:$dst,
8583 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8584 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8585 Sched<[RR]>, EVEX_K;
8586 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8587 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8588 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8589 [(set _dest.RC:$dst,
8590 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8591 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8592 Sched<[RR]>, EVEX_KZ;
8593 let hasSideEffects = 0, mayStore = 1 in {
8594 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8595 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8596 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8598 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8599 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8600 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8601 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8606 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8608 let hasSideEffects = 0 in
8609 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8610 (outs _dest.RC:$dst),
8611 (ins _src.RC:$src1, i32u8imm:$src2),
8612 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8613 EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8616 let Predicates = [HasAVX512] in {
8617 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8618 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8619 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8620 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8621 let Predicates = [HasVLX] in {
8622 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8623 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8624 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8625 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8626 WriteCvtPS2PH, WriteCvtPS2PHSt>,
8627 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8630 def : Pat<(store (f64 (extractelt
8631 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8632 (iPTR 0))), addr:$dst),
8633 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8634 def : Pat<(store (i64 (extractelt
8635 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8636 (iPTR 0))), addr:$dst),
8637 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8638 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
8639 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
8640 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8641 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8644 // Patterns for matching conversions from float to half-float and vice versa.
8645 let Predicates = [HasVLX] in {
8646 // Use MXCSR.RC for rounding instead of explicitly specifying the default
8647 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8648 // configurations we support (the default). However, falling back to MXCSR is
8649 // more consistent with other instructions, which are always controlled by it.
8650 // It's encoded as 0b100.
8651 def : Pat<(fp_to_f16 FR32X:$src),
8652 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8653 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8655 def : Pat<(f16_to_fp GR16:$src),
8656 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8657 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8659 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8660 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8661 (v8i16 (VCVTPS2PHZ128rr
8662 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8665 // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8666 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8667 string OpcodeStr, X86FoldableSchedWrite sched> {
8668 let hasSideEffects = 0 in
8669 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8670 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8671 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8674 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8675 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8676 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8677 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8678 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8679 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8680 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8681 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8682 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8685 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8686 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8687 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8688 EVEX_CD8<32, CD8VT1>;
8689 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8690 "ucomisd", WriteFCom>, PD, EVEX,
8691 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8692 let Pattern = []<dag> in {
8693 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8694 "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8695 EVEX_CD8<32, CD8VT1>;
8696 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8697 "comisd", WriteFCom>, PD, EVEX,
8698 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8700 let isCodeGenOnly = 1 in {
8701 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8702 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8703 EVEX_CD8<32, CD8VT1>;
8704 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8705 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8706 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8708 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8709 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8710 EVEX_CD8<32, CD8VT1>;
8711 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8712 sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8713 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8717 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8718 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8719 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8720 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8721 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8722 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8723 "$src2, $src1", "$src1, $src2",
8724 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8725 EVEX_4V, VEX_LIG, Sched<[sched]>;
8726 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8727 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8728 "$src2, $src1", "$src1, $src2",
8729 (OpNode (_.VT _.RC:$src1),
8730 _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
8731 Sched<[sched.Folded, sched.ReadAfterFold]>;
8735 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8736 f32x_info>, EVEX_CD8<32, CD8VT1>,
8738 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8739 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8741 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8742 SchedWriteFRsqrt.Scl, f32x_info>,
8743 EVEX_CD8<32, CD8VT1>, T8PD;
8744 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8745 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8746 EVEX_CD8<64, CD8VT1>, T8PD;
8748 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8749 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8750 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8751 let ExeDomain = _.ExeDomain in {
8752 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8753 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8754 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8756 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8757 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8759 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8760 Sched<[sched.Folded, sched.ReadAfterFold]>;
8761 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8762 (ins _.ScalarMemOp:$src), OpcodeStr,
8763 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8765 (_.BroadcastLdFrag addr:$src)))>,
8766 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8770 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8771 X86SchedWriteWidths sched> {
8772 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8773 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8774 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8775 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8777 // Define only if AVX512VL feature is present.
8778 let Predicates = [HasVLX] in {
8779 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8780 OpNode, sched.XMM, v4f32x_info>,
8781 EVEX_V128, EVEX_CD8<32, CD8VF>;
8782 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8783 OpNode, sched.YMM, v8f32x_info>,
8784 EVEX_V256, EVEX_CD8<32, CD8VF>;
8785 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8786 OpNode, sched.XMM, v2f64x_info>,
8787 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8788 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8789 OpNode, sched.YMM, v4f64x_info>,
8790 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8794 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8795 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8797 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8798 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8799 SDNode OpNode, SDNode OpNodeSAE,
8800 X86FoldableSchedWrite sched> {
8801 let ExeDomain = _.ExeDomain in {
8802 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8803 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8804 "$src2, $src1", "$src1, $src2",
8805 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8808 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8809 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8810 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8811 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8812 EVEX_B, Sched<[sched]>;
8814 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8815 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8816 "$src2, $src1", "$src1, $src2",
8817 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
8818 Sched<[sched.Folded, sched.ReadAfterFold]>;
8822 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8823 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8824 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8825 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8826 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8827 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8830 let Predicates = [HasERI] in {
8831 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8832 SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8833 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8834 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8837 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8838 SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8839 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8841 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8842 SDNode OpNode, X86FoldableSchedWrite sched> {
8843 let ExeDomain = _.ExeDomain in {
8844 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8845 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8846 (OpNode (_.VT _.RC:$src))>,
8849 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8850 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8852 (bitconvert (_.LdFrag addr:$src))))>,
8853 Sched<[sched.Folded, sched.ReadAfterFold]>;
8855 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8856 (ins _.ScalarMemOp:$src), OpcodeStr,
8857 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8859 (_.BroadcastLdFrag addr:$src)))>,
8860 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8863 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8864 SDNode OpNode, X86FoldableSchedWrite sched> {
8865 let ExeDomain = _.ExeDomain in
8866 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8867 (ins _.RC:$src), OpcodeStr,
8868 "{sae}, $src", "$src, {sae}",
8869 (OpNode (_.VT _.RC:$src))>,
8870 EVEX_B, Sched<[sched]>;
8873 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8874 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8875 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8876 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8877 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8878 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8879 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8880 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8883 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8884 SDNode OpNode, X86SchedWriteWidths sched> {
8885 // Define only if AVX512VL feature is present.
8886 let Predicates = [HasVLX] in {
8887 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8889 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8890 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8892 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8893 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8895 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8896 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8898 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8902 let Predicates = [HasERI] in {
8903 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8904 SchedWriteFRsqrt>, EVEX;
8905 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8906 SchedWriteFRcp>, EVEX;
8907 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8908 SchedWriteFAdd>, EVEX;
8910 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8912 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8913 SchedWriteFRnd>, EVEX;
8915 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8916 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8917 let ExeDomain = _.ExeDomain in
8918 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8919 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8920 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8921 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8924 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8925 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8926 let ExeDomain = _.ExeDomain in {
8927 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8928 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8929 (_.VT (fsqrt _.RC:$src))>, EVEX,
8931 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8932 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8934 (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8935 Sched<[sched.Folded, sched.ReadAfterFold]>;
8936 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8937 (ins _.ScalarMemOp:$src), OpcodeStr,
8938 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8940 (_.BroadcastLdFrag addr:$src)))>,
8941 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8945 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8946 X86SchedWriteSizes sched> {
8947 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8948 sched.PS.ZMM, v16f32_info>,
8949 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8950 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8951 sched.PD.ZMM, v8f64_info>,
8952 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8953 // Define only if AVX512VL feature is present.
8954 let Predicates = [HasVLX] in {
8955 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8956 sched.PS.XMM, v4f32x_info>,
8957 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8958 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8959 sched.PS.YMM, v8f32x_info>,
8960 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8961 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8962 sched.PD.XMM, v2f64x_info>,
8963 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8964 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8965 sched.PD.YMM, v4f64x_info>,
8966 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8970 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8971 X86SchedWriteSizes sched> {
8972 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8973 sched.PS.ZMM, v16f32_info>,
8974 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8975 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8976 sched.PD.ZMM, v8f64_info>,
8977 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8980 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8981 X86VectorVTInfo _, string Name> {
8982 let ExeDomain = _.ExeDomain in {
8983 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8984 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8985 "$src2, $src1", "$src1, $src2",
8986 (X86fsqrts (_.VT _.RC:$src1),
8987 (_.VT _.RC:$src2))>,
8989 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8990 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8991 "$src2, $src1", "$src1, $src2",
8992 (X86fsqrts (_.VT _.RC:$src1),
8993 _.ScalarIntMemCPat:$src2)>,
8994 Sched<[sched.Folded, sched.ReadAfterFold]>;
8995 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8996 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8997 "$rc, $src2, $src1", "$src1, $src2, $rc",
8998 (X86fsqrtRnds (_.VT _.RC:$src1),
9001 EVEX_B, EVEX_RC, Sched<[sched]>;
9003 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
9004 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9005 (ins _.FRC:$src1, _.FRC:$src2),
9006 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9009 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9010 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9011 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9012 Sched<[sched.Folded, sched.ReadAfterFold]>;
9016 let Predicates = [HasAVX512] in {
9017 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
9018 (!cast<Instruction>(Name#Zr)
9019 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9022 let Predicates = [HasAVX512, OptForSize] in {
9023 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
9024 (!cast<Instruction>(Name#Zm)
9025 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9029 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9030 X86SchedWriteSizes sched> {
9031 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9032 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9033 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9034 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9037 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9038 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9040 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9042 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9043 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9044 let ExeDomain = _.ExeDomain in {
9045 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9046 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9047 "$src3, $src2, $src1", "$src1, $src2, $src3",
9048 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9049 (i32 timm:$src3)))>,
9052 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9053 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9054 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9055 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9056 (i32 timm:$src3)))>, EVEX_B,
9059 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9060 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9062 "$src3, $src2, $src1", "$src1, $src2, $src3",
9063 (_.VT (X86RndScales _.RC:$src1,
9064 _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
9065 Sched<[sched.Folded, sched.ReadAfterFold]>;
9067 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9068 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9069 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9070 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9071 []>, Sched<[sched]>;
9074 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9075 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9076 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9077 []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
9081 let Predicates = [HasAVX512] in {
9082 def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2),
9083 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9084 _.FRC:$src1, timm:$src2))>;
9087 let Predicates = [HasAVX512, OptForSize] in {
9088 def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9089 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9090 addr:$src1, timm:$src2))>;
9094 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9095 SchedWriteFRnd.Scl, f32x_info>,
9096 AVX512AIi8Base, EVEX_4V, VEX_LIG,
9097 EVEX_CD8<32, CD8VT1>;
9099 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9100 SchedWriteFRnd.Scl, f64x_info>,
9101 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9102 EVEX_CD8<64, CD8VT1>;
9104 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9105 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9106 dag OutMask, Predicate BasePredicate> {
9107 let Predicates = [BasePredicate] in {
9108 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9109 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9110 (extractelt _.VT:$dst, (iPTR 0))))),
9111 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9112 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9114 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9115 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9117 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9118 OutMask, _.VT:$src2, _.VT:$src1)>;
9122 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9123 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9124 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9125 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9126 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9127 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9130 //-------------------------------------------------
9131 // Integer truncate and extend operations
9132 //-------------------------------------------------
9134 // PatFrags that contain a select and a truncate op. The take operands in the
9135 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9136 // either to the multiclasses.
9137 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9138 (vselect node:$mask,
9139 (trunc node:$src), node:$src0)>;
9140 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9141 (vselect node:$mask,
9142 (X86vtruncs node:$src), node:$src0)>;
9143 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9144 (vselect node:$mask,
9145 (X86vtruncus node:$src), node:$src0)>;
9147 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9148 SDPatternOperator MaskNode,
9149 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9150 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9151 let ExeDomain = DestInfo.ExeDomain in {
9152 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9153 (ins SrcInfo.RC:$src),
9154 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9155 [(set DestInfo.RC:$dst,
9156 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9157 EVEX, Sched<[sched]>;
9158 let Constraints = "$src0 = $dst" in
9159 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9160 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9161 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9162 [(set DestInfo.RC:$dst,
9163 (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9164 (DestInfo.VT DestInfo.RC:$src0),
9165 SrcInfo.KRCWM:$mask))]>,
9166 EVEX, EVEX_K, Sched<[sched]>;
9167 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9168 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9169 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9170 [(set DestInfo.RC:$dst,
9171 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9172 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9173 EVEX, EVEX_KZ, Sched<[sched]>;
9176 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9177 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9178 (ins x86memop:$dst, SrcInfo.RC:$src),
9179 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9180 EVEX, Sched<[sched.Folded]>;
9182 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9183 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9184 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9185 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9186 }//mayStore = 1, hasSideEffects = 0
9189 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9190 X86VectorVTInfo DestInfo,
9191 PatFrag truncFrag, PatFrag mtruncFrag,
9194 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9195 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9196 addr:$dst, SrcInfo.RC:$src)>;
9198 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9199 SrcInfo.KRCWM:$mask),
9200 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9201 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9204 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9205 SDNode OpNode256, SDNode OpNode512,
9206 SDPatternOperator MaskNode128,
9207 SDPatternOperator MaskNode256,
9208 SDPatternOperator MaskNode512,
9209 X86FoldableSchedWrite sched,
9210 AVX512VLVectorVTInfo VTSrcInfo,
9211 X86VectorVTInfo DestInfoZ128,
9212 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9213 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9214 X86MemOperand x86memopZ, PatFrag truncFrag,
9215 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9217 let Predicates = [HasVLX, prd] in {
9218 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9219 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9220 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9221 truncFrag, mtruncFrag, NAME>, EVEX_V128;
9223 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9224 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9225 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9226 truncFrag, mtruncFrag, NAME>, EVEX_V256;
9228 let Predicates = [prd] in
9229 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9230 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9231 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9232 truncFrag, mtruncFrag, NAME>, EVEX_V512;
9235 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9236 SDPatternOperator MaskNode,
9237 X86FoldableSchedWrite sched, PatFrag StoreNode,
9238 PatFrag MaskedStoreNode, SDNode InVecNode,
9239 SDPatternOperator InVecMaskNode> {
9240 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9241 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9242 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9243 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9244 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9247 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9248 SDPatternOperator MaskNode,
9249 X86FoldableSchedWrite sched, PatFrag StoreNode,
9250 PatFrag MaskedStoreNode, SDNode InVecNode,
9251 SDPatternOperator InVecMaskNode> {
9252 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9253 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9254 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9255 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9256 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9259 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9260 SDPatternOperator MaskNode,
9261 X86FoldableSchedWrite sched, PatFrag StoreNode,
9262 PatFrag MaskedStoreNode, SDNode InVecNode,
9263 SDPatternOperator InVecMaskNode> {
9264 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9265 InVecMaskNode, MaskNode, MaskNode, sched,
9266 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9267 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9268 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9271 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9272 SDPatternOperator MaskNode,
9273 X86FoldableSchedWrite sched, PatFrag StoreNode,
9274 PatFrag MaskedStoreNode, SDNode InVecNode,
9275 SDPatternOperator InVecMaskNode> {
9276 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9277 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9278 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9279 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9280 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9283 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9284 SDPatternOperator MaskNode,
9285 X86FoldableSchedWrite sched, PatFrag StoreNode,
9286 PatFrag MaskedStoreNode, SDNode InVecNode,
9287 SDPatternOperator InVecMaskNode> {
9288 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9289 InVecMaskNode, MaskNode, MaskNode, sched,
9290 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9291 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9292 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9295 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9296 SDPatternOperator MaskNode,
9297 X86FoldableSchedWrite sched, PatFrag StoreNode,
9298 PatFrag MaskedStoreNode, SDNode InVecNode,
9299 SDPatternOperator InVecMaskNode> {
9300 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9301 InVecMaskNode, MaskNode, MaskNode, sched,
9302 avx512vl_i16_info, v16i8x_info, v16i8x_info,
9303 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9304 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9307 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
9308 WriteShuffle256, truncstorevi8,
9309 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9310 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
9311 WriteShuffle256, truncstore_s_vi8,
9312 masked_truncstore_s_vi8, X86vtruncs,
9314 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9315 select_truncus, WriteShuffle256,
9316 truncstore_us_vi8, masked_truncstore_us_vi8,
9317 X86vtruncus, X86vmtruncus>;
9319 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9320 WriteShuffle256, truncstorevi16,
9321 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9322 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
9323 WriteShuffle256, truncstore_s_vi16,
9324 masked_truncstore_s_vi16, X86vtruncs,
9326 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9327 select_truncus, WriteShuffle256,
9328 truncstore_us_vi16, masked_truncstore_us_vi16,
9329 X86vtruncus, X86vmtruncus>;
9331 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9332 WriteShuffle256, truncstorevi32,
9333 masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9334 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
9335 WriteShuffle256, truncstore_s_vi32,
9336 masked_truncstore_s_vi32, X86vtruncs,
9338 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9339 select_truncus, WriteShuffle256,
9340 truncstore_us_vi32, masked_truncstore_us_vi32,
9341 X86vtruncus, X86vmtruncus>;
9343 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9344 WriteShuffle256, truncstorevi8,
9345 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9346 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9347 WriteShuffle256, truncstore_s_vi8,
9348 masked_truncstore_s_vi8, X86vtruncs,
9350 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
9351 select_truncus, WriteShuffle256,
9352 truncstore_us_vi8, masked_truncstore_us_vi8,
9353 X86vtruncus, X86vmtruncus>;
9355 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9356 WriteShuffle256, truncstorevi16,
9357 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9358 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9359 WriteShuffle256, truncstore_s_vi16,
9360 masked_truncstore_s_vi16, X86vtruncs,
9362 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9363 select_truncus, WriteShuffle256,
9364 truncstore_us_vi16, masked_truncstore_us_vi16,
9365 X86vtruncus, X86vmtruncus>;
9367 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9368 WriteShuffle256, truncstorevi8,
9369 masked_truncstorevi8, X86vtrunc,
9371 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9372 WriteShuffle256, truncstore_s_vi8,
9373 masked_truncstore_s_vi8, X86vtruncs,
9375 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9376 select_truncus, WriteShuffle256,
9377 truncstore_us_vi8, masked_truncstore_us_vi8,
9378 X86vtruncus, X86vmtruncus>;
9380 let Predicates = [HasAVX512, NoVLX] in {
9381 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9382 (v8i16 (EXTRACT_SUBREG
9383 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9384 VR256X:$src, sub_ymm)))), sub_xmm))>;
9385 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9386 (v4i32 (EXTRACT_SUBREG
9387 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9388 VR256X:$src, sub_ymm)))), sub_xmm))>;
9391 let Predicates = [HasBWI, NoVLX] in {
9392 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9393 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9394 VR256X:$src, sub_ymm))), sub_xmm))>;
9397 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9398 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9399 X86VectorVTInfo DestInfo,
9400 X86VectorVTInfo SrcInfo> {
9401 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9403 SrcInfo.KRCWM:$mask)),
9404 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9405 SrcInfo.KRCWM:$mask,
9408 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9409 DestInfo.ImmAllZerosV,
9410 SrcInfo.KRCWM:$mask)),
9411 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9415 let Predicates = [HasVLX] in {
9416 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9417 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9418 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9421 let Predicates = [HasAVX512] in {
9422 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9423 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9424 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9426 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9427 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9428 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9430 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9431 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9432 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9435 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9436 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9437 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9438 let ExeDomain = DestInfo.ExeDomain in {
9439 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9440 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9441 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9442 EVEX, Sched<[sched]>;
9444 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9445 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9446 (DestInfo.VT (LdFrag addr:$src))>,
9447 EVEX, Sched<[sched.Folded]>;
9451 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9452 SDNode OpNode, SDNode InVecNode, string ExtTy,
9453 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9454 let Predicates = [HasVLX, HasBWI] in {
9455 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9456 v16i8x_info, i64mem, LdFrag, InVecNode>,
9457 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9459 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9460 v16i8x_info, i128mem, LdFrag, OpNode>,
9461 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9463 let Predicates = [HasBWI] in {
9464 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9465 v32i8x_info, i256mem, LdFrag, OpNode>,
9466 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9470 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9471 SDNode OpNode, SDNode InVecNode, string ExtTy,
9472 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9473 let Predicates = [HasVLX, HasAVX512] in {
9474 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9475 v16i8x_info, i32mem, LdFrag, InVecNode>,
9476 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9478 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9479 v16i8x_info, i64mem, LdFrag, InVecNode>,
9480 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9482 let Predicates = [HasAVX512] in {
9483 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9484 v16i8x_info, i128mem, LdFrag, OpNode>,
9485 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9489 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9490 SDNode OpNode, SDNode InVecNode, string ExtTy,
9491 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9492 let Predicates = [HasVLX, HasAVX512] in {
9493 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9494 v16i8x_info, i16mem, LdFrag, InVecNode>,
9495 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9497 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9498 v16i8x_info, i32mem, LdFrag, InVecNode>,
9499 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9501 let Predicates = [HasAVX512] in {
9502 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9503 v16i8x_info, i64mem, LdFrag, InVecNode>,
9504 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9508 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9509 SDNode OpNode, SDNode InVecNode, string ExtTy,
9510 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9511 let Predicates = [HasVLX, HasAVX512] in {
9512 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9513 v8i16x_info, i64mem, LdFrag, InVecNode>,
9514 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9516 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9517 v8i16x_info, i128mem, LdFrag, OpNode>,
9518 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9520 let Predicates = [HasAVX512] in {
9521 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9522 v16i16x_info, i256mem, LdFrag, OpNode>,
9523 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9527 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9528 SDNode OpNode, SDNode InVecNode, string ExtTy,
9529 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9530 let Predicates = [HasVLX, HasAVX512] in {
9531 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9532 v8i16x_info, i32mem, LdFrag, InVecNode>,
9533 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9535 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9536 v8i16x_info, i64mem, LdFrag, InVecNode>,
9537 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9539 let Predicates = [HasAVX512] in {
9540 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9541 v8i16x_info, i128mem, LdFrag, OpNode>,
9542 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9546 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9547 SDNode OpNode, SDNode InVecNode, string ExtTy,
9548 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9550 let Predicates = [HasVLX, HasAVX512] in {
9551 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9552 v4i32x_info, i64mem, LdFrag, InVecNode>,
9553 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9555 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9556 v4i32x_info, i128mem, LdFrag, OpNode>,
9557 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9559 let Predicates = [HasAVX512] in {
9560 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9561 v8i32x_info, i256mem, LdFrag, OpNode>,
9562 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9566 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9567 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9568 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9569 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9570 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9571 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9573 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9574 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9575 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9576 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9577 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9578 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9581 // Patterns that we also need any extend versions of. aext_vector_inreg
9582 // is currently legalized to zext_vector_inreg.
9583 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9585 let Predicates = [HasVLX, HasBWI] in {
9586 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9587 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9590 let Predicates = [HasVLX] in {
9591 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9592 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9594 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9595 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9599 let Predicates = [HasBWI] in {
9600 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9601 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9603 let Predicates = [HasAVX512] in {
9604 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9605 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9606 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9607 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9609 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9610 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9612 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9613 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9617 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9619 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9621 let Predicates = [HasVLX, HasBWI] in {
9622 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9623 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9624 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9625 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9626 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9627 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9629 let Predicates = [HasVLX] in {
9630 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9631 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9632 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9633 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9635 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9636 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9638 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9639 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9640 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9641 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9642 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9643 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9645 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9646 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9647 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9648 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9650 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9651 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9652 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9653 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9654 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9655 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9657 let Predicates = [HasVLX] in {
9658 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9659 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9660 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9661 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9663 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9664 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9665 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9666 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9668 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9669 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9670 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9671 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9674 let Predicates = [HasAVX512] in {
9675 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9676 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9680 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9681 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9683 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9684 // ext+trunc aggresively making it impossible to legalize the DAG to this
9685 // pattern directly.
9686 let Predicates = [HasAVX512, NoBWI] in {
9687 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9688 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9689 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9690 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9693 //===----------------------------------------------------------------------===//
9694 // GATHER - SCATTER Operations
9696 // FIXME: Improve scheduling of gather/scatter instructions.
9697 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9698 X86MemOperand memop, PatFrag GatherNode,
9699 RegisterClass MaskRC = _.KRCWM> {
9700 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9701 ExeDomain = _.ExeDomain in
9702 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9703 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9704 !strconcat(OpcodeStr#_.Suffix,
9705 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9706 [(set _.RC:$dst, MaskRC:$mask_wb,
9707 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
9708 vectoraddr:$src2))]>, EVEX, EVEX_K,
9709 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9712 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9713 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9714 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9715 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9716 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9717 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
9718 let Predicates = [HasVLX] in {
9719 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9720 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9721 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9722 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9723 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9724 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9725 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9726 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9730 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9731 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9732 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9733 mgatherv16i32>, EVEX_V512;
9734 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9735 mgatherv8i64>, EVEX_V512;
9736 let Predicates = [HasVLX] in {
9737 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9738 vy256xmem, mgatherv8i32>, EVEX_V256;
9739 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9740 vy128xmem, mgatherv4i64>, EVEX_V256;
9741 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9742 vx128xmem, mgatherv4i32>, EVEX_V128;
9743 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9744 vx64xmem, mgatherv2i64, VK2WM>,
9750 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9751 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9753 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9754 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9756 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9757 X86MemOperand memop, PatFrag ScatterNode,
9758 RegisterClass MaskRC = _.KRCWM> {
9760 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9762 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9763 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9764 !strconcat(OpcodeStr#_.Suffix,
9765 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9766 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9767 MaskRC:$mask, vectoraddr:$dst))]>,
9768 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9769 Sched<[WriteStore]>;
9772 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9773 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9774 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9775 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9776 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9777 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
9778 let Predicates = [HasVLX] in {
9779 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9780 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9781 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9782 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9783 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9784 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9785 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9786 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9790 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9791 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9792 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9793 mscatterv16i32>, EVEX_V512;
9794 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9795 mscatterv8i64>, EVEX_V512;
9796 let Predicates = [HasVLX] in {
9797 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9798 vy256xmem, mscatterv8i32>, EVEX_V256;
9799 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9800 vy128xmem, mscatterv4i64>, EVEX_V256;
9801 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9802 vx128xmem, mscatterv4i32>, EVEX_V128;
9803 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9804 vx64xmem, mscatterv2i64, VK2WM>,
9809 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9810 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9812 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9813 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9816 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9817 RegisterClass KRC, X86MemOperand memop> {
9818 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9819 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9820 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9821 EVEX, EVEX_K, Sched<[WriteLoad]>;
9824 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9825 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9827 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9828 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9830 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9831 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9833 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9834 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9836 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9837 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9839 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9840 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9842 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9843 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9845 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9846 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9848 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9849 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9851 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9852 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9854 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9855 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9857 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9858 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9860 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9861 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9863 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9864 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9866 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9867 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9869 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9870 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9872 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9873 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9874 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9875 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9876 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9878 // Also need a pattern for anyextend.
9879 def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
9880 (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
9883 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9884 string OpcodeStr, Predicate prd> {
9885 let Predicates = [prd] in
9886 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9888 let Predicates = [prd, HasVLX] in {
9889 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9890 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9894 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9895 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9896 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9897 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9899 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9900 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9901 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9902 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9903 EVEX, Sched<[WriteMove]>;
9906 // Use 512bit version to implement 128/256 bit in case NoVLX.
9907 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9911 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9912 (_.KVT (COPY_TO_REGCLASS
9913 (!cast<Instruction>(Name#"Zrr")
9914 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9915 _.RC:$src, _.SubRegIdx)),
9919 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9920 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9921 let Predicates = [prd] in
9922 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9925 let Predicates = [prd, HasVLX] in {
9926 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9928 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9931 let Predicates = [prd, NoVLX] in {
9932 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9933 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9937 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9938 avx512vl_i8_info, HasBWI>;
9939 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9940 avx512vl_i16_info, HasBWI>, VEX_W;
9941 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9942 avx512vl_i32_info, HasDQI>;
9943 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9944 avx512vl_i64_info, HasDQI>, VEX_W;
9946 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9947 // is available, but BWI is not. We can't handle this in lowering because
9948 // a target independent DAG combine likes to combine sext and trunc.
9949 let Predicates = [HasDQI, NoBWI] in {
9950 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9951 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9952 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9953 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9955 def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
9956 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9957 def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
9958 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9961 let Predicates = [HasDQI, NoBWI, HasVLX] in {
9962 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9963 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9965 def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
9966 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9969 //===----------------------------------------------------------------------===//
9970 // AVX-512 - COMPRESS and EXPAND
9973 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9974 string OpcodeStr, X86FoldableSchedWrite sched> {
9975 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9976 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9977 (null_frag)>, AVX5128IBase,
9980 let mayStore = 1, hasSideEffects = 0 in
9981 def mr : AVX5128I<opc, MRMDestMem, (outs),
9982 (ins _.MemOp:$dst, _.RC:$src),
9983 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9984 []>, EVEX_CD8<_.EltSize, CD8VT1>,
9985 Sched<[sched.Folded]>;
9987 def mrk : AVX5128I<opc, MRMDestMem, (outs),
9988 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9989 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9991 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9992 Sched<[sched.Folded]>;
9995 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9996 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9997 (!cast<Instruction>(Name#_.ZSuffix##mrk)
9998 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10000 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10001 (!cast<Instruction>(Name#_.ZSuffix##rrk)
10002 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10003 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10004 (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10005 _.KRCWM:$mask, _.RC:$src)>;
10008 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10009 X86FoldableSchedWrite sched,
10010 AVX512VLVectorVTInfo VTInfo,
10011 Predicate Pred = HasAVX512> {
10012 let Predicates = [Pred] in
10013 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10014 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10016 let Predicates = [Pred, HasVLX] in {
10017 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10018 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10019 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10020 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10024 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10025 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10026 avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10027 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10028 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10029 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10030 avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10031 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10032 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10035 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10036 string OpcodeStr, X86FoldableSchedWrite sched> {
10037 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10038 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10039 (null_frag)>, AVX5128IBase,
10042 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10043 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10045 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10046 Sched<[sched.Folded, sched.ReadAfterFold]>;
10049 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10051 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10052 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10053 _.KRCWM:$mask, addr:$src)>;
10055 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10056 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10057 _.KRCWM:$mask, addr:$src)>;
10059 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10060 (_.VT _.RC:$src0))),
10061 (!cast<Instruction>(Name#_.ZSuffix##rmk)
10062 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10064 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10065 (!cast<Instruction>(Name#_.ZSuffix##rrk)
10066 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10067 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10068 (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10069 _.KRCWM:$mask, _.RC:$src)>;
10072 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10073 X86FoldableSchedWrite sched,
10074 AVX512VLVectorVTInfo VTInfo,
10075 Predicate Pred = HasAVX512> {
10076 let Predicates = [Pred] in
10077 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10078 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10080 let Predicates = [Pred, HasVLX] in {
10081 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10082 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10083 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10084 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10088 // FIXME: Is there a better scheduler class for VPEXPAND?
10089 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10090 avx512vl_i32_info>, EVEX;
10091 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10092 avx512vl_i64_info>, EVEX, VEX_W;
10093 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10094 avx512vl_f32_info>, EVEX;
10095 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10096 avx512vl_f64_info>, EVEX, VEX_W;
10098 //handle instruction reg_vec1 = op(reg_vec,imm)
10100 // op(broadcast(eltVt),imm)
10101 //all instruction created with FROUND_CURRENT
10102 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10103 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10104 let ExeDomain = _.ExeDomain in {
10105 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10106 (ins _.RC:$src1, i32u8imm:$src2),
10107 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10108 (OpNode (_.VT _.RC:$src1),
10109 (i32 timm:$src2))>, Sched<[sched]>;
10110 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10111 (ins _.MemOp:$src1, i32u8imm:$src2),
10112 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10113 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10114 (i32 timm:$src2))>,
10115 Sched<[sched.Folded, sched.ReadAfterFold]>;
10116 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10117 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10118 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10119 "${src1}"##_.BroadcastStr##", $src2",
10120 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10121 (i32 timm:$src2))>, EVEX_B,
10122 Sched<[sched.Folded, sched.ReadAfterFold]>;
10126 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10127 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10128 SDNode OpNode, X86FoldableSchedWrite sched,
10129 X86VectorVTInfo _> {
10130 let ExeDomain = _.ExeDomain in
10131 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10132 (ins _.RC:$src1, i32u8imm:$src2),
10133 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10134 "$src1, {sae}, $src2",
10135 (OpNode (_.VT _.RC:$src1),
10136 (i32 timm:$src2))>,
10137 EVEX_B, Sched<[sched]>;
10140 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10141 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10142 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10143 let Predicates = [prd] in {
10144 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10146 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10147 sched.ZMM, _.info512>, EVEX_V512;
10149 let Predicates = [prd, HasVLX] in {
10150 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10151 _.info128>, EVEX_V128;
10152 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10153 _.info256>, EVEX_V256;
10157 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10158 // op(reg_vec2,mem_vec,imm)
10159 // op(reg_vec2,broadcast(eltVt),imm)
10160 //all instruction created with FROUND_CURRENT
10161 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10162 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10163 let ExeDomain = _.ExeDomain in {
10164 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10165 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10166 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10167 (OpNode (_.VT _.RC:$src1),
10169 (i32 timm:$src3))>,
10171 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10172 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10173 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10174 (OpNode (_.VT _.RC:$src1),
10175 (_.VT (bitconvert (_.LdFrag addr:$src2))),
10176 (i32 timm:$src3))>,
10177 Sched<[sched.Folded, sched.ReadAfterFold]>;
10178 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10179 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10180 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10181 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10182 (OpNode (_.VT _.RC:$src1),
10183 (_.VT (_.BroadcastLdFrag addr:$src2)),
10184 (i32 timm:$src3))>, EVEX_B,
10185 Sched<[sched.Folded, sched.ReadAfterFold]>;
10189 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10190 // op(reg_vec2,mem_vec,imm)
10191 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10192 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10193 X86VectorVTInfo SrcInfo>{
10194 let ExeDomain = DestInfo.ExeDomain in {
10195 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10196 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10197 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10198 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10199 (SrcInfo.VT SrcInfo.RC:$src2),
10200 (i8 timm:$src3)))>,
10202 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10203 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10204 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10205 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10206 (SrcInfo.VT (bitconvert
10207 (SrcInfo.LdFrag addr:$src2))),
10208 (i8 timm:$src3)))>,
10209 Sched<[sched.Folded, sched.ReadAfterFold]>;
10213 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10214 // op(reg_vec2,mem_vec,imm)
10215 // op(reg_vec2,broadcast(eltVt),imm)
10216 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10217 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10218 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10220 let ExeDomain = _.ExeDomain in
10221 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10222 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10223 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10224 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10225 (OpNode (_.VT _.RC:$src1),
10226 (_.VT (_.BroadcastLdFrag addr:$src2)),
10227 (i8 timm:$src3))>, EVEX_B,
10228 Sched<[sched.Folded, sched.ReadAfterFold]>;
10231 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10232 // op(reg_vec2,mem_scalar,imm)
10233 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10234 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10235 let ExeDomain = _.ExeDomain in {
10236 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10237 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10238 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10239 (OpNode (_.VT _.RC:$src1),
10241 (i32 timm:$src3))>,
10243 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10244 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10245 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10246 (OpNode (_.VT _.RC:$src1),
10247 (_.VT _.ScalarIntMemCPat:$src2),
10248 (i32 timm:$src3))>,
10249 Sched<[sched.Folded, sched.ReadAfterFold]>;
10253 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10254 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10255 SDNode OpNode, X86FoldableSchedWrite sched,
10256 X86VectorVTInfo _> {
10257 let ExeDomain = _.ExeDomain in
10258 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10259 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10260 OpcodeStr, "$src3, {sae}, $src2, $src1",
10261 "$src1, $src2, {sae}, $src3",
10262 (OpNode (_.VT _.RC:$src1),
10264 (i32 timm:$src3))>,
10265 EVEX_B, Sched<[sched]>;
10268 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10269 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10270 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10271 let ExeDomain = _.ExeDomain in
10272 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10273 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10274 OpcodeStr, "$src3, {sae}, $src2, $src1",
10275 "$src1, $src2, {sae}, $src3",
10276 (OpNode (_.VT _.RC:$src1),
10278 (i32 timm:$src3))>,
10279 EVEX_B, Sched<[sched]>;
10282 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10283 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10284 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10285 let Predicates = [prd] in {
10286 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10287 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10291 let Predicates = [prd, HasVLX] in {
10292 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10294 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10299 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10300 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10301 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10302 let Predicates = [Pred] in {
10303 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10304 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10306 let Predicates = [Pred, HasVLX] in {
10307 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10308 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10309 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10310 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10314 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10315 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10316 Predicate Pred = HasAVX512> {
10317 let Predicates = [Pred] in {
10318 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10321 let Predicates = [Pred, HasVLX] in {
10322 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10324 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10329 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10330 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10331 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10332 let Predicates = [prd] in {
10333 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10334 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10338 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10339 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10340 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10341 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10342 opcPs, OpNode, OpNodeSAE, sched, prd>,
10343 EVEX_CD8<32, CD8VF>;
10344 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10345 opcPd, OpNode, OpNodeSAE, sched, prd>,
10346 EVEX_CD8<64, CD8VF>, VEX_W;
10349 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10350 X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
10351 AVX512AIi8Base, EVEX;
10352 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10353 X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
10354 AVX512AIi8Base, EVEX;
10355 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10356 X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
10357 AVX512AIi8Base, EVEX;
10359 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10360 0x50, X86VRange, X86VRangeSAE,
10361 SchedWriteFAdd, HasDQI>,
10362 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10363 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10364 0x50, X86VRange, X86VRangeSAE,
10365 SchedWriteFAdd, HasDQI>,
10366 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10368 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10369 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10370 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10371 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10372 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10373 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10375 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10376 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10377 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10378 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10379 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10380 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10382 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10383 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10384 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10385 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10386 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10387 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10389 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10390 X86FoldableSchedWrite sched,
10392 X86VectorVTInfo CastInfo,
10393 string EVEX2VEXOvrd> {
10394 let ExeDomain = _.ExeDomain in {
10395 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10396 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10397 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10399 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10400 (i8 timm:$src3)))))>,
10401 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10402 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10403 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10404 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10407 (CastInfo.VT (X86Shuf128 _.RC:$src1,
10408 (CastInfo.LdFrag addr:$src2),
10409 (i8 timm:$src3)))))>,
10410 Sched<[sched.Folded, sched.ReadAfterFold]>,
10411 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10412 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10413 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10414 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10415 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10419 (X86Shuf128 _.RC:$src1,
10420 (_.BroadcastLdFrag addr:$src2),
10421 (i8 timm:$src3)))))>, EVEX_B,
10422 Sched<[sched.Folded, sched.ReadAfterFold]>;
10426 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10427 AVX512VLVectorVTInfo _,
10428 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10429 string EVEX2VEXOvrd>{
10430 let Predicates = [HasAVX512] in
10431 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10432 _.info512, CastInfo.info512, "">, EVEX_V512;
10434 let Predicates = [HasAVX512, HasVLX] in
10435 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10436 _.info256, CastInfo.info256,
10437 EVEX2VEXOvrd>, EVEX_V256;
10440 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10441 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10442 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10443 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10444 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10445 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10446 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10447 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10449 let Predicates = [HasAVX512] in {
10450 // Provide fallback in case the load node that is used in the broadcast
10451 // patterns above is used by additional users, which prevents the pattern
10453 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10454 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10455 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10457 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10458 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10459 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10462 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10463 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10464 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10466 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10467 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10468 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10471 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10472 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10473 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10476 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10477 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10478 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10482 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10483 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10484 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10485 // instantiation of this class.
10486 let ExeDomain = _.ExeDomain in {
10487 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10488 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10489 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10490 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10491 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10492 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10493 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10494 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10495 (_.VT (X86VAlign _.RC:$src1,
10496 (bitconvert (_.LdFrag addr:$src2)),
10497 (i8 timm:$src3)))>,
10498 Sched<[sched.Folded, sched.ReadAfterFold]>,
10499 EVEX2VEXOverride<"VPALIGNRrmi">;
10501 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10502 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10503 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10504 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10505 (X86VAlign _.RC:$src1,
10506 (_.VT (_.BroadcastLdFrag addr:$src2)),
10507 (i8 timm:$src3))>, EVEX_B,
10508 Sched<[sched.Folded, sched.ReadAfterFold]>;
10512 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10513 AVX512VLVectorVTInfo _> {
10514 let Predicates = [HasAVX512] in {
10515 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10516 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10518 let Predicates = [HasAVX512, HasVLX] in {
10519 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10520 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10521 // We can't really override the 256-bit version so change it back to unset.
10522 let EVEX2VEXOverride = ? in
10523 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10524 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10528 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10529 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10530 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10531 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10534 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10535 SchedWriteShuffle, avx512vl_i8_info,
10536 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10538 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10540 def ValignqImm32XForm : SDNodeXForm<timm, [{
10541 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10543 def ValignqImm8XForm : SDNodeXForm<timm, [{
10544 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10546 def ValigndImm8XForm : SDNodeXForm<timm, [{
10547 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10550 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10551 X86VectorVTInfo From, X86VectorVTInfo To,
10552 SDNodeXForm ImmXForm> {
10553 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10555 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10558 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10559 To.RC:$src1, To.RC:$src2,
10560 (ImmXForm timm:$src3))>;
10562 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10564 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10567 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10568 To.RC:$src1, To.RC:$src2,
10569 (ImmXForm timm:$src3))>;
10571 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10573 (From.VT (OpNode From.RC:$src1,
10574 (From.LdFrag addr:$src2),
10577 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10578 To.RC:$src1, addr:$src2,
10579 (ImmXForm timm:$src3))>;
10581 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10583 (From.VT (OpNode From.RC:$src1,
10584 (From.LdFrag addr:$src2),
10587 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10588 To.RC:$src1, addr:$src2,
10589 (ImmXForm timm:$src3))>;
10592 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10593 X86VectorVTInfo From,
10594 X86VectorVTInfo To,
10595 SDNodeXForm ImmXForm> :
10596 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10597 def : Pat<(From.VT (OpNode From.RC:$src1,
10598 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
10600 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10601 (ImmXForm timm:$src3))>;
10603 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10605 (From.VT (OpNode From.RC:$src1,
10607 (To.VT (To.BroadcastLdFrag addr:$src2))),
10610 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10611 To.RC:$src1, addr:$src2,
10612 (ImmXForm timm:$src3))>;
10614 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10616 (From.VT (OpNode From.RC:$src1,
10618 (To.VT (To.BroadcastLdFrag addr:$src2))),
10621 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10622 To.RC:$src1, addr:$src2,
10623 (ImmXForm timm:$src3))>;
10626 let Predicates = [HasAVX512] in {
10627 // For 512-bit we lower to the widest element type we can. So we only need
10628 // to handle converting valignq to valignd.
10629 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10630 v16i32_info, ValignqImm32XForm>;
10633 let Predicates = [HasVLX] in {
10634 // For 128-bit we lower to the widest element type we can. So we only need
10635 // to handle converting valignq to valignd.
10636 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10637 v4i32x_info, ValignqImm32XForm>;
10638 // For 256-bit we lower to the widest element type we can. So we only need
10639 // to handle converting valignq to valignd.
10640 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10641 v8i32x_info, ValignqImm32XForm>;
10644 let Predicates = [HasVLX, HasBWI] in {
10645 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10646 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10647 v16i8x_info, ValignqImm8XForm>;
10648 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10649 v16i8x_info, ValigndImm8XForm>;
10652 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10653 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10654 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10656 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10657 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10658 let ExeDomain = _.ExeDomain in {
10659 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10660 (ins _.RC:$src1), OpcodeStr,
10662 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
10665 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10666 (ins _.MemOp:$src1), OpcodeStr,
10668 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
10669 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10670 Sched<[sched.Folded]>;
10674 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10675 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10676 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10677 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10678 (ins _.ScalarMemOp:$src1), OpcodeStr,
10679 "${src1}"##_.BroadcastStr,
10680 "${src1}"##_.BroadcastStr,
10681 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
10682 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10683 Sched<[sched.Folded]>;
10686 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10687 X86SchedWriteWidths sched,
10688 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10689 let Predicates = [prd] in
10690 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10693 let Predicates = [prd, HasVLX] in {
10694 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10696 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10701 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10702 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10704 let Predicates = [prd] in
10705 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10708 let Predicates = [prd, HasVLX] in {
10709 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10711 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10716 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10717 SDNode OpNode, X86SchedWriteWidths sched,
10719 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10720 avx512vl_i64_info, prd>, VEX_W;
10721 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10722 avx512vl_i32_info, prd>;
10725 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10726 SDNode OpNode, X86SchedWriteWidths sched,
10728 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10729 avx512vl_i16_info, prd>, VEX_WIG;
10730 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10731 avx512vl_i8_info, prd>, VEX_WIG;
10734 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10735 bits<8> opc_d, bits<8> opc_q,
10736 string OpcodeStr, SDNode OpNode,
10737 X86SchedWriteWidths sched> {
10738 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10740 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10744 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10747 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10748 let Predicates = [HasAVX512, NoVLX] in {
10749 def : Pat<(v4i64 (abs VR256X:$src)),
10752 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10754 def : Pat<(v2i64 (abs VR128X:$src)),
10757 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10761 // Use 512bit version to implement 128/256 bit.
10762 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10763 AVX512VLVectorVTInfo _, Predicate prd> {
10764 let Predicates = [prd, NoVLX] in {
10765 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
10767 (!cast<Instruction>(InstrStr # "Zrr")
10768 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10769 _.info256.RC:$src1,
10770 _.info256.SubRegIdx)),
10771 _.info256.SubRegIdx)>;
10773 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
10775 (!cast<Instruction>(InstrStr # "Zrr")
10776 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10777 _.info128.RC:$src1,
10778 _.info128.SubRegIdx)),
10779 _.info128.SubRegIdx)>;
10783 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10784 SchedWriteVecIMul, HasCDI>;
10786 // FIXME: Is there a better scheduler class for VPCONFLICT?
10787 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10788 SchedWriteVecALU, HasCDI>;
10790 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10791 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10792 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10794 //===---------------------------------------------------------------------===//
10795 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10796 //===---------------------------------------------------------------------===//
10798 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10799 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10800 SchedWriteVecALU, HasVPOPCNTDQ>;
10802 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10803 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10805 //===---------------------------------------------------------------------===//
10806 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10807 //===---------------------------------------------------------------------===//
10809 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10810 X86SchedWriteWidths sched> {
10811 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10812 avx512vl_f32_info, HasAVX512>, XS;
10815 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10816 SchedWriteFShuffle>;
10817 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10818 SchedWriteFShuffle>;
10820 //===----------------------------------------------------------------------===//
10821 // AVX-512 - MOVDDUP
10822 //===----------------------------------------------------------------------===//
10824 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
10825 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10826 let ExeDomain = _.ExeDomain in {
10827 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10828 (ins _.RC:$src), OpcodeStr, "$src", "$src",
10829 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
10831 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10832 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10833 (_.VT (_.BroadcastLdFrag addr:$src))>,
10834 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10835 Sched<[sched.Folded]>;
10839 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10840 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10841 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10842 VTInfo.info512>, EVEX_V512;
10844 let Predicates = [HasAVX512, HasVLX] in {
10845 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10846 VTInfo.info256>, EVEX_V256;
10847 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
10848 VTInfo.info128>, EVEX_V128;
10852 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10853 X86SchedWriteWidths sched> {
10854 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10855 avx512vl_f64_info>, XD, VEX_W;
10858 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10860 let Predicates = [HasVLX] in {
10861 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10862 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10863 def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10864 (VMOVDDUPZ128rm addr:$src)>;
10865 def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
10866 (VMOVDDUPZ128rm addr:$src)>;
10868 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10869 (v2f64 VR128X:$src0)),
10870 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10871 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10872 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10874 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10876 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10877 (v2f64 VR128X:$src0)),
10878 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10879 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10881 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10883 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10884 (v2f64 VR128X:$src0)),
10885 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10886 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10888 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10891 //===----------------------------------------------------------------------===//
10892 // AVX-512 - Unpack Instructions
10893 //===----------------------------------------------------------------------===//
10895 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10896 SchedWriteFShuffleSizes, 0, 1>;
10897 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10898 SchedWriteFShuffleSizes>;
10900 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10901 SchedWriteShuffle, HasBWI>;
10902 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10903 SchedWriteShuffle, HasBWI>;
10904 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10905 SchedWriteShuffle, HasBWI>;
10906 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10907 SchedWriteShuffle, HasBWI>;
10909 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10910 SchedWriteShuffle, HasAVX512>;
10911 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10912 SchedWriteShuffle, HasAVX512>;
10913 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10914 SchedWriteShuffle, HasAVX512>;
10915 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10916 SchedWriteShuffle, HasAVX512>;
10918 //===----------------------------------------------------------------------===//
10919 // AVX-512 - Extract & Insert Integer Instructions
10920 //===----------------------------------------------------------------------===//
10922 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10923 X86VectorVTInfo _> {
10924 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10925 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10926 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10927 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10929 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10932 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10933 let Predicates = [HasBWI] in {
10934 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10935 (ins _.RC:$src1, u8imm:$src2),
10936 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10937 [(set GR32orGR64:$dst,
10938 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10939 EVEX, TAPD, Sched<[WriteVecExtract]>;
10941 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10945 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10946 let Predicates = [HasBWI] in {
10947 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10948 (ins _.RC:$src1, u8imm:$src2),
10949 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10950 [(set GR32orGR64:$dst,
10951 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10952 EVEX, PD, Sched<[WriteVecExtract]>;
10954 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10955 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10956 (ins _.RC:$src1, u8imm:$src2),
10957 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10958 EVEX, TAPD, FoldGenData<NAME#rr>,
10959 Sched<[WriteVecExtract]>;
10961 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10965 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10966 RegisterClass GRC> {
10967 let Predicates = [HasDQI] in {
10968 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10969 (ins _.RC:$src1, u8imm:$src2),
10970 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10972 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10973 EVEX, TAPD, Sched<[WriteVecExtract]>;
10975 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10976 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10977 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10978 [(store (extractelt (_.VT _.RC:$src1),
10979 imm:$src2),addr:$dst)]>,
10980 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10981 Sched<[WriteVecExtractSt]>;
10985 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10986 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10987 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10988 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10990 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10991 X86VectorVTInfo _, PatFrag LdFrag> {
10992 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10993 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10994 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10996 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10997 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11000 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11001 X86VectorVTInfo _, PatFrag LdFrag> {
11002 let Predicates = [HasBWI] in {
11003 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11004 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11005 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11007 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
11008 Sched<[WriteVecInsert]>;
11010 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
11014 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11015 X86VectorVTInfo _, RegisterClass GRC> {
11016 let Predicates = [HasDQI] in {
11017 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11018 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11019 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11021 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11022 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11024 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11025 _.ScalarLdFrag>, TAPD;
11029 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11030 extloadi8>, TAPD, VEX_WIG;
11031 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11032 extloadi16>, PD, VEX_WIG;
11033 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11034 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11036 //===----------------------------------------------------------------------===//
11037 // VSHUFPS - VSHUFPD Operations
11038 //===----------------------------------------------------------------------===//
11040 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11041 AVX512VLVectorVTInfo VTInfo_FP>{
11042 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11043 SchedWriteFShuffle>,
11044 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11045 AVX512AIi8Base, EVEX_4V;
11048 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11049 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11051 //===----------------------------------------------------------------------===//
11052 // AVX-512 - Byte shift Left/Right
11053 //===----------------------------------------------------------------------===//
11055 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
11056 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11057 Format MRMm, string OpcodeStr,
11058 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11059 def rr : AVX512<opc, MRMr,
11060 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11061 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11062 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11064 def rm : AVX512<opc, MRMm,
11065 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11066 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11067 [(set _.RC:$dst,(_.VT (OpNode
11068 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11069 (i8 timm:$src2))))]>,
11070 Sched<[sched.Folded, sched.ReadAfterFold]>;
11073 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11074 Format MRMm, string OpcodeStr,
11075 X86SchedWriteWidths sched, Predicate prd>{
11076 let Predicates = [prd] in
11077 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11078 sched.ZMM, v64i8_info>, EVEX_V512;
11079 let Predicates = [prd, HasVLX] in {
11080 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11081 sched.YMM, v32i8x_info>, EVEX_V256;
11082 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11083 sched.XMM, v16i8x_info>, EVEX_V128;
11086 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11087 SchedWriteShuffle, HasBWI>,
11088 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11089 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11090 SchedWriteShuffle, HasBWI>,
11091 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11093 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11094 string OpcodeStr, X86FoldableSchedWrite sched,
11095 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11096 let isCommutable = 1 in
11097 def rr : AVX512BI<opc, MRMSrcReg,
11098 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11099 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11100 [(set _dst.RC:$dst,(_dst.VT
11101 (OpNode (_src.VT _src.RC:$src1),
11102 (_src.VT _src.RC:$src2))))]>,
11104 def rm : AVX512BI<opc, MRMSrcMem,
11105 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11106 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11107 [(set _dst.RC:$dst,(_dst.VT
11108 (OpNode (_src.VT _src.RC:$src1),
11109 (_src.VT (bitconvert
11110 (_src.LdFrag addr:$src2))))))]>,
11111 Sched<[sched.Folded, sched.ReadAfterFold]>;
11114 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11115 string OpcodeStr, X86SchedWriteWidths sched,
11117 let Predicates = [prd] in
11118 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11119 v8i64_info, v64i8_info>, EVEX_V512;
11120 let Predicates = [prd, HasVLX] in {
11121 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11122 v4i64x_info, v32i8x_info>, EVEX_V256;
11123 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11124 v2i64x_info, v16i8x_info>, EVEX_V128;
11128 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11129 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11131 // Transforms to swizzle an immediate to enable better matching when
11132 // memory operand isn't in the right place.
11133 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11134 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11135 uint8_t Imm = N->getZExtValue();
11136 // Swap bits 1/4 and 3/6.
11137 uint8_t NewImm = Imm & 0xa5;
11138 if (Imm & 0x02) NewImm |= 0x10;
11139 if (Imm & 0x10) NewImm |= 0x02;
11140 if (Imm & 0x08) NewImm |= 0x40;
11141 if (Imm & 0x40) NewImm |= 0x08;
11142 return getI8Imm(NewImm, SDLoc(N));
11144 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11145 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11146 uint8_t Imm = N->getZExtValue();
11147 // Swap bits 2/4 and 3/5.
11148 uint8_t NewImm = Imm & 0xc3;
11149 if (Imm & 0x04) NewImm |= 0x10;
11150 if (Imm & 0x10) NewImm |= 0x04;
11151 if (Imm & 0x08) NewImm |= 0x20;
11152 if (Imm & 0x20) NewImm |= 0x08;
11153 return getI8Imm(NewImm, SDLoc(N));
11155 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11156 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11157 uint8_t Imm = N->getZExtValue();
11158 // Swap bits 1/2 and 5/6.
11159 uint8_t NewImm = Imm & 0x99;
11160 if (Imm & 0x02) NewImm |= 0x04;
11161 if (Imm & 0x04) NewImm |= 0x02;
11162 if (Imm & 0x20) NewImm |= 0x40;
11163 if (Imm & 0x40) NewImm |= 0x20;
11164 return getI8Imm(NewImm, SDLoc(N));
11166 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11167 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11168 uint8_t Imm = N->getZExtValue();
11169 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11170 uint8_t NewImm = Imm & 0x81;
11171 if (Imm & 0x02) NewImm |= 0x04;
11172 if (Imm & 0x04) NewImm |= 0x10;
11173 if (Imm & 0x08) NewImm |= 0x40;
11174 if (Imm & 0x10) NewImm |= 0x02;
11175 if (Imm & 0x20) NewImm |= 0x08;
11176 if (Imm & 0x40) NewImm |= 0x20;
11177 return getI8Imm(NewImm, SDLoc(N));
11179 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11180 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11181 uint8_t Imm = N->getZExtValue();
11182 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11183 uint8_t NewImm = Imm & 0x81;
11184 if (Imm & 0x02) NewImm |= 0x10;
11185 if (Imm & 0x04) NewImm |= 0x02;
11186 if (Imm & 0x08) NewImm |= 0x20;
11187 if (Imm & 0x10) NewImm |= 0x04;
11188 if (Imm & 0x20) NewImm |= 0x40;
11189 if (Imm & 0x40) NewImm |= 0x08;
11190 return getI8Imm(NewImm, SDLoc(N));
11193 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11194 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11196 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11197 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11198 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11199 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11200 (OpNode (_.VT _.RC:$src1),
11203 (i8 timm:$src4)), 1, 1>,
11204 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11205 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11206 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11207 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11208 (OpNode (_.VT _.RC:$src1),
11210 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11211 (i8 timm:$src4)), 1, 0>,
11212 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11213 Sched<[sched.Folded, sched.ReadAfterFold]>;
11214 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11215 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11216 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11217 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11218 (OpNode (_.VT _.RC:$src1),
11220 (_.VT (_.BroadcastLdFrag addr:$src3)),
11221 (i8 timm:$src4)), 1, 0>, EVEX_B,
11222 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11223 Sched<[sched.Folded, sched.ReadAfterFold]>;
11224 }// Constraints = "$src1 = $dst"
11226 // Additional patterns for matching passthru operand in other positions.
11227 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11228 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11230 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11231 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11232 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11233 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11235 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11236 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11238 // Additional patterns for matching loads in other positions.
11239 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11240 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11241 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11242 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11243 def : Pat<(_.VT (OpNode _.RC:$src1,
11244 (bitconvert (_.LdFrag addr:$src3)),
11245 _.RC:$src2, (i8 timm:$src4))),
11246 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11247 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11249 // Additional patterns for matching zero masking with loads in other
11251 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11252 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11253 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11255 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11256 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11257 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11258 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11259 _.RC:$src2, (i8 timm:$src4)),
11261 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11262 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11264 // Additional patterns for matching masked loads with different
11266 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11267 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11268 _.RC:$src2, (i8 timm:$src4)),
11270 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11271 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11272 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11273 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11274 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11276 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11277 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11278 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11279 (OpNode _.RC:$src2, _.RC:$src1,
11280 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11282 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11283 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11284 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11285 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11286 _.RC:$src1, (i8 timm:$src4)),
11288 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11289 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11290 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11291 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11292 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11294 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11295 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11297 // Additional patterns for matching broadcasts in other positions.
11298 def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
11299 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11300 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11301 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11302 def : Pat<(_.VT (OpNode _.RC:$src1,
11303 (_.BroadcastLdFrag addr:$src3),
11304 _.RC:$src2, (i8 timm:$src4))),
11305 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11306 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11308 // Additional patterns for matching zero masking with broadcasts in other
11310 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11311 (OpNode (_.BroadcastLdFrag addr:$src3),
11312 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11314 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11315 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11316 (VPTERNLOG321_imm8 timm:$src4))>;
11317 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11318 (OpNode _.RC:$src1,
11319 (_.BroadcastLdFrag addr:$src3),
11320 _.RC:$src2, (i8 timm:$src4)),
11322 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11323 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11324 (VPTERNLOG132_imm8 timm:$src4))>;
11326 // Additional patterns for matching masked broadcasts with different
11328 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11329 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11330 _.RC:$src2, (i8 timm:$src4)),
11332 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11333 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11334 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11335 (OpNode (_.BroadcastLdFrag addr:$src3),
11336 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11338 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11339 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11340 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11341 (OpNode _.RC:$src2, _.RC:$src1,
11342 (_.BroadcastLdFrag addr:$src3),
11343 (i8 timm:$src4)), _.RC:$src1)),
11344 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11345 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11346 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11347 (OpNode _.RC:$src2,
11348 (_.BroadcastLdFrag addr:$src3),
11349 _.RC:$src1, (i8 timm:$src4)),
11351 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11352 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11353 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11354 (OpNode (_.BroadcastLdFrag addr:$src3),
11355 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11357 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11358 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11361 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11362 AVX512VLVectorVTInfo _> {
11363 let Predicates = [HasAVX512] in
11364 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11365 _.info512, NAME>, EVEX_V512;
11366 let Predicates = [HasAVX512, HasVLX] in {
11367 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11368 _.info128, NAME>, EVEX_V128;
11369 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11370 _.info256, NAME>, EVEX_V256;
11374 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11375 avx512vl_i32_info>;
11376 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11377 avx512vl_i64_info>, VEX_W;
11379 // Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
11380 let Predicates = [HasVLX] in {
11381 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11383 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11385 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11386 (loadv16i8 addr:$src3), (i8 timm:$src4))),
11387 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11389 def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
11390 VR128X:$src1, (i8 timm:$src4))),
11391 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11392 (VPTERNLOG321_imm8 timm:$src4))>;
11393 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
11394 VR128X:$src2, (i8 timm:$src4))),
11395 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11396 (VPTERNLOG132_imm8 timm:$src4))>;
11398 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11400 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11402 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11403 (loadv8i16 addr:$src3), (i8 timm:$src4))),
11404 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11406 def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
11407 VR128X:$src1, (i8 timm:$src4))),
11408 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11409 (VPTERNLOG321_imm8 timm:$src4))>;
11410 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
11411 VR128X:$src2, (i8 timm:$src4))),
11412 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11413 (VPTERNLOG132_imm8 timm:$src4))>;
11415 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11417 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11419 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11420 (loadv32i8 addr:$src3), (i8 timm:$src4))),
11421 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11423 def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
11424 VR256X:$src1, (i8 timm:$src4))),
11425 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11426 (VPTERNLOG321_imm8 timm:$src4))>;
11427 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
11428 VR256X:$src2, (i8 timm:$src4))),
11429 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11430 (VPTERNLOG132_imm8 timm:$src4))>;
11432 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11434 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11436 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11437 (loadv16i16 addr:$src3), (i8 timm:$src4))),
11438 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11440 def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
11441 VR256X:$src1, (i8 timm:$src4))),
11442 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11443 (VPTERNLOG321_imm8 timm:$src4))>;
11444 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
11445 VR256X:$src2, (i8 timm:$src4))),
11446 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11447 (VPTERNLOG132_imm8 timm:$src4))>;
11450 let Predicates = [HasAVX512] in {
11451 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11453 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11455 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11456 (loadv64i8 addr:$src3), (i8 timm:$src4))),
11457 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11459 def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
11460 VR512:$src1, (i8 timm:$src4))),
11461 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11462 (VPTERNLOG321_imm8 timm:$src4))>;
11463 def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
11464 VR512:$src2, (i8 timm:$src4))),
11465 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11466 (VPTERNLOG132_imm8 timm:$src4))>;
11468 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11470 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11472 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11473 (loadv32i16 addr:$src3), (i8 timm:$src4))),
11474 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11476 def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
11477 VR512:$src1, (i8 timm:$src4))),
11478 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11479 (VPTERNLOG321_imm8 timm:$src4))>;
11480 def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
11481 VR512:$src2, (i8 timm:$src4))),
11482 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11483 (VPTERNLOG132_imm8 timm:$src4))>;
11486 // Patterns to implement vnot using vpternlog instead of creating all ones
11487 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11488 // so that the result is only dependent on src0. But we use the same source
11489 // for all operands to prevent a false dependency.
11490 // TODO: We should maybe have a more generalized algorithm for folding to
11492 let Predicates = [HasAVX512] in {
11493 def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11494 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11495 def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11496 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11497 def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11498 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11499 def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11500 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11503 let Predicates = [HasAVX512, NoVLX] in {
11504 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11507 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11508 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11509 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11510 (i8 15)), sub_xmm)>;
11511 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11514 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11515 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11516 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11517 (i8 15)), sub_xmm)>;
11518 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11521 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11522 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11523 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11524 (i8 15)), sub_xmm)>;
11525 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11528 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11529 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11530 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11531 (i8 15)), sub_xmm)>;
11533 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11536 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11537 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11538 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11539 (i8 15)), sub_ymm)>;
11540 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11543 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11544 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11545 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11546 (i8 15)), sub_ymm)>;
11547 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11550 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11551 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11552 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11553 (i8 15)), sub_ymm)>;
11554 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11557 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11558 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11559 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11560 (i8 15)), sub_ymm)>;
11563 let Predicates = [HasVLX] in {
11564 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11565 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11566 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11567 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11568 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11569 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11570 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11571 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11573 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11574 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11575 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11576 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11577 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11578 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11579 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11580 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11583 //===----------------------------------------------------------------------===//
11584 // AVX-512 - FixupImm
11585 //===----------------------------------------------------------------------===//
11587 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11588 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11589 X86VectorVTInfo TblVT>{
11590 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11591 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11592 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11593 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11594 (X86VFixupimm (_.VT _.RC:$src1),
11596 (TblVT.VT _.RC:$src3),
11597 (i32 timm:$src4))>, Sched<[sched]>;
11598 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11599 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11600 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11601 (X86VFixupimm (_.VT _.RC:$src1),
11603 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11604 (i32 timm:$src4))>,
11605 Sched<[sched.Folded, sched.ReadAfterFold]>;
11606 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11607 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11608 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11609 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11610 (X86VFixupimm (_.VT _.RC:$src1),
11612 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11613 (i32 timm:$src4))>,
11614 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11615 } // Constraints = "$src1 = $dst"
11618 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11619 X86FoldableSchedWrite sched,
11620 X86VectorVTInfo _, X86VectorVTInfo TblVT>
11621 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11622 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11623 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11624 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11625 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11626 "$src2, $src3, {sae}, $src4",
11627 (X86VFixupimmSAE (_.VT _.RC:$src1),
11629 (TblVT.VT _.RC:$src3),
11630 (i32 timm:$src4))>,
11631 EVEX_B, Sched<[sched]>;
11635 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11636 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11637 X86VectorVTInfo _src3VT> {
11638 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11639 ExeDomain = _.ExeDomain in {
11640 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11641 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11642 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11643 (X86VFixupimms (_.VT _.RC:$src1),
11645 (_src3VT.VT _src3VT.RC:$src3),
11646 (i32 timm:$src4))>, Sched<[sched]>;
11647 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11648 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11649 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11650 "$src2, $src3, {sae}, $src4",
11651 (X86VFixupimmSAEs (_.VT _.RC:$src1),
11653 (_src3VT.VT _src3VT.RC:$src3),
11654 (i32 timm:$src4))>,
11655 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11656 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11657 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11658 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11659 (X86VFixupimms (_.VT _.RC:$src1),
11661 (_src3VT.VT (scalar_to_vector
11662 (_src3VT.ScalarLdFrag addr:$src3))),
11663 (i32 timm:$src4))>,
11664 Sched<[sched.Folded, sched.ReadAfterFold]>;
11668 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11669 AVX512VLVectorVTInfo _Vec,
11670 AVX512VLVectorVTInfo _Tbl> {
11671 let Predicates = [HasAVX512] in
11672 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11673 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11674 EVEX_4V, EVEX_V512;
11675 let Predicates = [HasAVX512, HasVLX] in {
11676 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11677 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11678 EVEX_4V, EVEX_V128;
11679 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11680 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11681 EVEX_4V, EVEX_V256;
11685 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11686 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11687 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11688 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11689 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11690 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11691 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11692 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11693 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11694 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11696 // Patterns used to select SSE scalar fp arithmetic instructions from
11699 // (1) a scalar fp operation followed by a blend
11701 // The effect is that the backend no longer emits unnecessary vector
11702 // insert instructions immediately after SSE scalar fp instructions
11703 // like addss or mulss.
11705 // For example, given the following code:
11706 // __m128 foo(__m128 A, __m128 B) {
11711 // Previously we generated:
11712 // addss %xmm0, %xmm1
11713 // movss %xmm1, %xmm0
11715 // We now generate:
11716 // addss %xmm1, %xmm0
11718 // (2) a vector packed single/double fp operation followed by a vector insert
11720 // The effect is that the backend converts the packed fp instruction
11721 // followed by a vector insert into a single SSE scalar fp instruction.
11723 // For example, given the following code:
11724 // __m128 foo(__m128 A, __m128 B) {
11725 // __m128 C = A + B;
11726 // return (__m128) {c[0], a[1], a[2], a[3]};
11729 // Previously we generated:
11730 // addps %xmm0, %xmm1
11731 // movss %xmm1, %xmm0
11733 // We now generate:
11734 // addss %xmm1, %xmm0
11736 // TODO: Some canonicalization in lowering would simplify the number of
11737 // patterns we have to try to match.
11738 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11739 X86VectorVTInfo _, PatLeaf ZeroFP> {
11740 let Predicates = [HasAVX512] in {
11741 // extracted scalar math op with insert via movss
11742 def : Pat<(MoveNode
11743 (_.VT VR128X:$dst),
11744 (_.VT (scalar_to_vector
11745 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11747 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11748 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11749 def : Pat<(MoveNode
11750 (_.VT VR128X:$dst),
11751 (_.VT (scalar_to_vector
11752 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11753 (_.ScalarLdFrag addr:$src))))),
11754 (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11756 // extracted masked scalar math op with insert via movss
11757 def : Pat<(MoveNode (_.VT VR128X:$src1),
11759 (X86selects VK1WM:$mask,
11761 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11764 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11765 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11766 VK1WM:$mask, _.VT:$src1,
11767 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11768 def : Pat<(MoveNode (_.VT VR128X:$src1),
11770 (X86selects VK1WM:$mask,
11772 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11773 (_.ScalarLdFrag addr:$src2)),
11775 (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11776 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11777 VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11779 // extracted masked scalar math op with insert via movss
11780 def : Pat<(MoveNode (_.VT VR128X:$src1),
11782 (X86selects VK1WM:$mask,
11784 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11785 _.FRC:$src2), (_.EltVT ZeroFP)))),
11786 (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11787 VK1WM:$mask, _.VT:$src1,
11788 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11789 def : Pat<(MoveNode (_.VT VR128X:$src1),
11791 (X86selects VK1WM:$mask,
11793 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11794 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11795 (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11799 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11800 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11801 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11802 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11804 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11805 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11806 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11807 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11809 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11810 SDNode Move, X86VectorVTInfo _> {
11811 let Predicates = [HasAVX512] in {
11812 def : Pat<(_.VT (Move _.VT:$dst,
11813 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11814 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11818 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11819 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11821 //===----------------------------------------------------------------------===//
11822 // AES instructions
11823 //===----------------------------------------------------------------------===//
11825 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11826 let Predicates = [HasVLX, HasVAES] in {
11827 defm Z128 : AESI_binop_rm_int<Op, OpStr,
11828 !cast<Intrinsic>(IntPrefix),
11829 loadv2i64, 0, VR128X, i128mem>,
11830 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11831 defm Z256 : AESI_binop_rm_int<Op, OpStr,
11832 !cast<Intrinsic>(IntPrefix##"_256"),
11833 loadv4i64, 0, VR256X, i256mem>,
11834 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11836 let Predicates = [HasAVX512, HasVAES] in
11837 defm Z : AESI_binop_rm_int<Op, OpStr,
11838 !cast<Intrinsic>(IntPrefix##"_512"),
11839 loadv8i64, 0, VR512, i512mem>,
11840 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11843 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11844 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11845 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11846 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11848 //===----------------------------------------------------------------------===//
11849 // PCLMUL instructions - Carry less multiplication
11850 //===----------------------------------------------------------------------===//
11852 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11853 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11854 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11856 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11857 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11858 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11860 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11861 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11862 EVEX_CD8<64, CD8VF>, VEX_WIG;
11866 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11867 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11868 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11870 //===----------------------------------------------------------------------===//
11872 //===----------------------------------------------------------------------===//
11874 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11875 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11876 let Constraints = "$src1 = $dst",
11877 ExeDomain = VTI.ExeDomain in {
11878 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11879 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11880 "$src3, $src2", "$src2, $src3",
11881 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11882 AVX512FMA3Base, Sched<[sched]>;
11883 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11884 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11885 "$src3, $src2", "$src2, $src3",
11886 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11887 (VTI.VT (VTI.LdFrag addr:$src3))))>,
11889 Sched<[sched.Folded, sched.ReadAfterFold]>;
11893 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11894 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11895 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11896 let Constraints = "$src1 = $dst",
11897 ExeDomain = VTI.ExeDomain in
11898 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11899 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11900 "${src3}"##VTI.BroadcastStr##", $src2",
11901 "$src2, ${src3}"##VTI.BroadcastStr,
11902 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11903 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11904 AVX512FMA3Base, EVEX_B,
11905 Sched<[sched.Folded, sched.ReadAfterFold]>;
11908 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11909 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11910 let Predicates = [HasVBMI2] in
11911 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11913 let Predicates = [HasVBMI2, HasVLX] in {
11914 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11916 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11921 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11922 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11923 let Predicates = [HasVBMI2] in
11924 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11926 let Predicates = [HasVBMI2, HasVLX] in {
11927 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11929 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11933 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11934 SDNode OpNode, X86SchedWriteWidths sched> {
11935 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11936 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11937 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11938 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11939 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11940 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11943 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11944 SDNode OpNode, X86SchedWriteWidths sched> {
11945 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11946 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11947 VEX_W, EVEX_CD8<16, CD8VF>;
11948 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11949 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11950 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11951 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11955 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11956 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11957 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11958 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11961 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11962 avx512vl_i8_info, HasVBMI2>, EVEX,
11964 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11965 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11968 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11969 avx512vl_i8_info, HasVBMI2>, EVEX;
11970 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11971 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11973 //===----------------------------------------------------------------------===//
11975 //===----------------------------------------------------------------------===//
11977 let Constraints = "$src1 = $dst" in
11978 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11979 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11980 bit IsCommutable> {
11981 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11982 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11983 "$src3, $src2", "$src2, $src3",
11984 (VTI.VT (OpNode VTI.RC:$src1,
11985 VTI.RC:$src2, VTI.RC:$src3)),
11986 IsCommutable, IsCommutable>,
11987 EVEX_4V, T8PD, Sched<[sched]>;
11988 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11989 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11990 "$src3, $src2", "$src2, $src3",
11991 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11992 (VTI.VT (VTI.LdFrag addr:$src3))))>,
11993 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11994 Sched<[sched.Folded, sched.ReadAfterFold]>;
11995 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11996 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11997 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11998 "$src2, ${src3}"##VTI.BroadcastStr,
11999 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12000 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12001 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12002 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12005 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12006 X86SchedWriteWidths sched, bit IsCommutable> {
12007 let Predicates = [HasVNNI] in
12008 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12009 IsCommutable>, EVEX_V512;
12010 let Predicates = [HasVNNI, HasVLX] in {
12011 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12012 IsCommutable>, EVEX_V256;
12013 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12014 IsCommutable>, EVEX_V128;
12018 // FIXME: Is there a better scheduler class for VPDP?
12019 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12020 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12021 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12022 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12024 def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
12025 (X86vpmaddwd node:$lhs, node:$rhs), [{
12026 return N->hasOneUse();
12029 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12030 let Predicates = [HasVNNI] in {
12031 def : Pat<(v16i32 (add VR512:$src1,
12032 (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12033 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12034 def : Pat<(v16i32 (add VR512:$src1,
12035 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12036 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12038 let Predicates = [HasVNNI,HasVLX] in {
12039 def : Pat<(v8i32 (add VR256X:$src1,
12040 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12041 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12042 def : Pat<(v8i32 (add VR256X:$src1,
12043 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12044 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12045 def : Pat<(v4i32 (add VR128X:$src1,
12046 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12047 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12048 def : Pat<(v4i32 (add VR128X:$src1,
12049 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12050 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12053 //===----------------------------------------------------------------------===//
12055 //===----------------------------------------------------------------------===//
12057 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12058 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12059 avx512vl_i8_info, HasBITALG>;
12060 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12061 avx512vl_i16_info, HasBITALG>, VEX_W;
12063 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12064 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12066 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12067 (X86Vpshufbitqmb node:$src1, node:$src2), [{
12068 return N->hasOneUse();
12071 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12072 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12073 (ins VTI.RC:$src1, VTI.RC:$src2),
12075 "$src2, $src1", "$src1, $src2",
12076 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12077 (VTI.VT VTI.RC:$src2)),
12078 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12079 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12081 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12082 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12084 "$src2, $src1", "$src1, $src2",
12085 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12086 (VTI.VT (VTI.LdFrag addr:$src2))),
12087 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12088 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12089 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12090 Sched<[sched.Folded, sched.ReadAfterFold]>;
12093 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12094 let Predicates = [HasBITALG] in
12095 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12096 let Predicates = [HasBITALG, HasVLX] in {
12097 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12098 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12102 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12103 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12105 //===----------------------------------------------------------------------===//
12107 //===----------------------------------------------------------------------===//
12109 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12110 X86SchedWriteWidths sched> {
12111 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12112 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12114 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12115 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12117 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12122 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12124 EVEX_CD8<8, CD8VF>, T8PD;
12126 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12127 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12128 X86VectorVTInfo BcstVTI>
12129 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12130 let ExeDomain = VTI.ExeDomain in
12131 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12132 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12133 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
12134 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
12135 (OpNode (VTI.VT VTI.RC:$src1),
12136 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12137 (i8 timm:$src3))>, EVEX_B,
12138 Sched<[sched.Folded, sched.ReadAfterFold]>;
12141 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12142 X86SchedWriteWidths sched> {
12143 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12144 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12145 v64i8_info, v8i64_info>, EVEX_V512;
12146 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12147 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12148 v32i8x_info, v4i64x_info>, EVEX_V256;
12149 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12150 v16i8x_info, v2i64x_info>, EVEX_V128;
12154 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12155 X86GF2P8affineinvqb, SchedWriteVecIMul>,
12156 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12157 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12158 X86GF2P8affineqb, SchedWriteVecIMul>,
12159 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12162 //===----------------------------------------------------------------------===//
12164 //===----------------------------------------------------------------------===//
12166 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12167 Constraints = "$src1 = $dst" in {
12168 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12169 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12170 "v4fmaddps", "$src3, $src2", "$src2, $src3",
12171 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12172 Sched<[SchedWriteFMA.ZMM.Folded]>;
12174 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12175 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12176 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12177 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12178 Sched<[SchedWriteFMA.ZMM.Folded]>;
12180 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12181 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12182 "v4fmaddss", "$src3, $src2", "$src2, $src3",
12183 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12184 Sched<[SchedWriteFMA.Scl.Folded]>;
12186 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12187 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12188 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12189 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12190 Sched<[SchedWriteFMA.Scl.Folded]>;
12193 //===----------------------------------------------------------------------===//
12195 //===----------------------------------------------------------------------===//
12197 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12198 Constraints = "$src1 = $dst" in {
12199 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12200 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12201 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12202 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12203 Sched<[SchedWriteFMA.ZMM.Folded]>;
12205 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12206 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12207 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12208 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12209 Sched<[SchedWriteFMA.ZMM.Folded]>;
12212 let hasSideEffects = 0 in {
12213 let mayStore = 1 in
12214 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12216 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12219 //===----------------------------------------------------------------------===//
12221 //===----------------------------------------------------------------------===//
12223 multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
12224 def rr : I<0x68, MRMSrcReg,
12225 (outs _.KRPC:$dst),
12226 (ins _.RC:$src1, _.RC:$src2),
12227 !strconcat("vp2intersect", _.Suffix,
12228 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12229 [(set _.KRPC:$dst, (X86vp2intersect
12230 _.RC:$src1, (_.VT _.RC:$src2)))]>,
12233 def rm : I<0x68, MRMSrcMem,
12234 (outs _.KRPC:$dst),
12235 (ins _.RC:$src1, _.MemOp:$src2),
12236 !strconcat("vp2intersect", _.Suffix,
12237 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12238 [(set _.KRPC:$dst, (X86vp2intersect
12239 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12240 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
12242 def rmb : I<0x68, MRMSrcMem,
12243 (outs _.KRPC:$dst),
12244 (ins _.RC:$src1, _.ScalarMemOp:$src2),
12245 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12246 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12247 [(set _.KRPC:$dst, (X86vp2intersect
12248 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12249 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
12252 multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
12253 let Predicates = [HasAVX512, HasVP2INTERSECT] in
12254 defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
12256 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12257 defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
12258 defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
12262 defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
12263 defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
12265 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12266 X86SchedWriteWidths sched,
12267 AVX512VLVectorVTInfo _SrcVTInfo,
12268 AVX512VLVectorVTInfo _DstVTInfo,
12269 SDNode OpNode, Predicate prd,
12270 bit IsCommutable = 0> {
12271 let Predicates = [prd] in
12272 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12273 _SrcVTInfo.info512, _DstVTInfo.info512,
12274 _SrcVTInfo.info512, IsCommutable>,
12275 EVEX_V512, EVEX_CD8<32, CD8VF>;
12276 let Predicates = [HasVLX, prd] in {
12277 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12278 _SrcVTInfo.info256, _DstVTInfo.info256,
12279 _SrcVTInfo.info256, IsCommutable>,
12280 EVEX_V256, EVEX_CD8<32, CD8VF>;
12281 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12282 _SrcVTInfo.info128, _DstVTInfo.info128,
12283 _SrcVTInfo.info128, IsCommutable>,
12284 EVEX_V128, EVEX_CD8<32, CD8VF>;
12288 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12289 SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
12290 avx512vl_f32_info, avx512vl_i16_info,
12291 X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12293 // Truncate Float to BFloat16
12294 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12295 X86SchedWriteWidths sched> {
12296 let Predicates = [HasBF16] in {
12297 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12298 X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12300 let Predicates = [HasBF16, HasVLX] in {
12301 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12302 null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12304 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12306 sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12308 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12309 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12311 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12312 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12313 f128mem:$src), 0, "intel">;
12314 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12315 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12317 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12318 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12319 f256mem:$src), 0, "intel">;
12323 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12324 SchedWriteCvtPD2PS>, T8XS,
12325 EVEX_CD8<32, CD8VF>;
12327 let Predicates = [HasBF16, HasVLX] in {
12328 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12329 // patterns have been disabled with null_frag.
12330 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12331 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12332 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12334 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12335 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12337 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12339 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12340 (VCVTNEPS2BF16Z128rm addr:$src)>;
12341 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12343 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12344 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12346 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12348 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12349 (X86VBroadcastld32 addr:$src)))),
12350 (VCVTNEPS2BF16Z128rmb addr:$src)>;
12351 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12352 (v8i16 VR128X:$src0), VK4WM:$mask),
12353 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12354 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12355 v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12356 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12359 let Constraints = "$src1 = $dst" in {
12360 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12361 X86VectorVTInfo _, X86VectorVTInfo src_v> {
12362 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12363 (ins _.RC:$src2, _.RC:$src3),
12364 OpcodeStr, "$src3, $src2", "$src2, $src3",
12365 (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
12368 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12369 (ins _.RC:$src2, _.MemOp:$src3),
12370 OpcodeStr, "$src3, $src2", "$src2, $src3",
12371 (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12372 (src_v.VT (bitconvert
12373 (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
12375 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12376 (ins _.RC:$src2, _.ScalarMemOp:$src3),
12378 !strconcat("${src3}", _.BroadcastStr,", $src2"),
12379 !strconcat("$src2, ${src3}", _.BroadcastStr),
12380 (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12381 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12385 } // Constraints = "$src1 = $dst"
12387 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12388 AVX512VLVectorVTInfo _,
12389 AVX512VLVectorVTInfo src_v, Predicate prd> {
12390 let Predicates = [prd] in {
12391 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
12392 src_v.info512>, EVEX_V512;
12394 let Predicates = [HasVLX, prd] in {
12395 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
12396 src_v.info256>, EVEX_V256;
12397 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
12398 src_v.info128>, EVEX_V128;
12402 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
12403 avx512vl_f32_info, avx512vl_i32_info,
12404 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;