1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT). These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22 RegisterClass RC = rc;
23 ValueType EltVT = eltvt;
24 int NumElts = numelts;
26 // Corresponding mask register class.
27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29 // Corresponding mask register pair class.
30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31 !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33 // Corresponding write-mask register class.
34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39 // Suffix used in the instruction mnemonic.
40 string Suffix = suffix;
42 // VTName is a string name for vector VT. For vector types it will be
43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44 // It is a little bit complex for scalar types, where NumElts = 1.
45 // In this case we build v4f32 or v2f64
46 string VTName = "v" # !if (!eq (NumElts, 1),
47 !if (!eq (EltVT.Size, 16), 8,
48 !if (!eq (EltVT.Size, 32), 4,
49 !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
52 ValueType VT = !cast<ValueType>(VTName);
54 string EltTypeName = !cast<string>(EltVT);
55 // Size of the element type in bits, e.g. 32 for v16i32.
56 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
57 int EltSize = EltVT.Size;
59 // "i" for integer types and "f" for floating-point types
60 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
62 // Size of RC in bits, e.g. 512 for VR512.
65 // The corresponding memory operand, e.g. i512mem for VR512.
66 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
67 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
68 // FP scalar memory operand for intrinsics - ssmem/sdmem.
69 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
70 !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
71 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)));
74 PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
76 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
78 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
79 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
81 PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
82 !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
83 !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?)));
85 // The string to specify embedded broadcast in assembly.
86 string BroadcastStr = "{1to" # NumElts # "}";
88 // 8-bit compressed displacement tuple/subvector format. This is only
89 // defined for NumElts <= 8.
90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91 !cast<CD8VForm>("CD8VT" # NumElts), ?);
93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94 !if (!eq (Size, 256), sub_ymm, ?));
96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98 !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
101 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
102 !if (!eq (EltTypeName, "f16"), FR16X,
105 dag ImmAllZerosV = (VT immAllZerosV);
107 string ZSuffix = !if (!eq (Size, 128), "Z128",
108 !if (!eq (Size, 256), "Z256", "Z"));
111 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
112 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
113 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
114 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
115 def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
116 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
117 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
119 // "x" in v32i8x_info means RC = VR256X
120 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
121 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
122 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
123 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
124 def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
125 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
126 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
128 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
129 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
130 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
131 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
132 def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">;
133 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
134 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
136 // We map scalar types to the smallest (128-bit) vector type
137 // with the appropriate element type. This allows to use the same masking logic.
138 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
139 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
140 def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">;
141 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
142 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
144 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
145 X86VectorVTInfo i128> {
146 X86VectorVTInfo info512 = i512;
147 X86VectorVTInfo info256 = i256;
148 X86VectorVTInfo info128 = i128;
151 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
153 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
155 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
157 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
159 def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
161 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
163 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
166 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
168 RegisterClass KRC = _krc;
169 RegisterClass KRCWM = _krcwm;
173 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
174 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
175 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
176 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
177 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
178 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
179 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
181 // Used for matching masked operations. Ensures the operation part only has a
183 def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
184 (vselect node:$mask, node:$src1, node:$src2), [{
185 return isProfitableToFormMaskedOp(N);
188 def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
189 (X86selects node:$mask, node:$src1, node:$src2), [{
190 return isProfitableToFormMaskedOp(N);
193 // This multiclass generates the masking variants from the non-masking
194 // variant. It only provides the assembly pieces for the masking variants.
195 // It assumes custom ISel patterns for masking which can be provided as
196 // template arguments.
197 multiclass AVX512_maskable_custom<bits<8> O, Format F,
199 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
201 string AttSrcAsm, string IntelSrcAsm,
203 list<dag> MaskingPattern,
204 list<dag> ZeroMaskingPattern,
205 string MaskingConstraint = "",
206 bit IsCommutable = 0,
207 bit IsKCommutable = 0,
208 bit IsKZCommutable = IsCommutable> {
209 let isCommutable = IsCommutable in
210 def NAME: AVX512<O, F, Outs, Ins,
211 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
212 "$dst, "#IntelSrcAsm#"}",
215 // Prefer over VMOV*rrk Pat<>
216 let isCommutable = IsKCommutable in
217 def NAME#k: AVX512<O, F, Outs, MaskingIns,
218 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
219 "$dst {${mask}}, "#IntelSrcAsm#"}",
222 // In case of the 3src subclass this is overridden with a let.
223 string Constraints = MaskingConstraint;
226 // Zero mask does not add any restrictions to commute operands transformation.
227 // So, it is Ok to use IsCommutable instead of IsKCommutable.
228 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
229 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
230 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
231 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
237 // Common base class of AVX512_maskable and AVX512_maskable_3src.
238 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
240 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
242 string AttSrcAsm, string IntelSrcAsm,
243 dag RHS, dag MaskingRHS,
244 SDPatternOperator Select = vselect_mask,
245 string MaskingConstraint = "",
246 bit IsCommutable = 0,
247 bit IsKCommutable = 0,
248 bit IsKZCommutable = IsCommutable> :
249 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
250 AttSrcAsm, IntelSrcAsm,
251 [(set _.RC:$dst, RHS)],
252 [(set _.RC:$dst, MaskingRHS)],
254 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
255 MaskingConstraint, IsCommutable,
256 IsKCommutable, IsKZCommutable>;
258 // This multiclass generates the unconditional/non-masking, the masking and
259 // the zero-masking variant of the vector instruction. In the masking case, the
260 // preserved vector elements come from a new dummy input operand tied to $dst.
261 // This version uses a separate dag for non-masking and masking.
262 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
263 dag Outs, dag Ins, string OpcodeStr,
264 string AttSrcAsm, string IntelSrcAsm,
265 dag RHS, dag MaskRHS,
266 bit IsCommutable = 0, bit IsKCommutable = 0,
267 bit IsKZCommutable = IsCommutable> :
268 AVX512_maskable_custom<O, F, Outs, Ins,
269 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
270 !con((ins _.KRCWM:$mask), Ins),
271 OpcodeStr, AttSrcAsm, IntelSrcAsm,
272 [(set _.RC:$dst, RHS)],
274 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
276 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
277 "$src0 = $dst", IsCommutable, IsKCommutable,
280 // This multiclass generates the unconditional/non-masking, the masking and
281 // the zero-masking variant of the vector instruction. In the masking case, the
282 // preserved vector elements come from a new dummy input operand tied to $dst.
283 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
284 dag Outs, dag Ins, string OpcodeStr,
285 string AttSrcAsm, string IntelSrcAsm,
287 bit IsCommutable = 0, bit IsKCommutable = 0,
288 bit IsKZCommutable = IsCommutable,
289 SDPatternOperator Select = vselect_mask> :
290 AVX512_maskable_common<O, F, _, Outs, Ins,
291 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
292 !con((ins _.KRCWM:$mask), Ins),
293 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
294 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
295 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
298 // This multiclass generates the unconditional/non-masking, the masking and
299 // the zero-masking variant of the scalar instruction.
300 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
301 dag Outs, dag Ins, string OpcodeStr,
302 string AttSrcAsm, string IntelSrcAsm,
304 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
305 RHS, 0, 0, 0, X86selects_mask>;
307 // Similar to AVX512_maskable but in this case one of the source operands
308 // ($src1) is already tied to $dst so we just use that for the preserved
309 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
311 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
312 dag Outs, dag NonTiedIns, string OpcodeStr,
313 string AttSrcAsm, string IntelSrcAsm,
315 bit IsCommutable = 0,
316 bit IsKCommutable = 0,
317 SDPatternOperator Select = vselect_mask,
319 AVX512_maskable_common<O, F, _, Outs,
320 !con((ins _.RC:$src1), NonTiedIns),
321 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
322 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
323 OpcodeStr, AttSrcAsm, IntelSrcAsm,
324 !if(MaskOnly, (null_frag), RHS),
325 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
326 Select, "", IsCommutable, IsKCommutable>;
328 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
329 // operand differs from the output VT. This requires a bitconvert on
330 // the preserved vector going into the vselect.
331 // NOTE: The unmasked pattern is disabled.
332 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
333 X86VectorVTInfo InVT,
334 dag Outs, dag NonTiedIns, string OpcodeStr,
335 string AttSrcAsm, string IntelSrcAsm,
336 dag RHS, bit IsCommutable = 0> :
337 AVX512_maskable_common<O, F, OutVT, Outs,
338 !con((ins InVT.RC:$src1), NonTiedIns),
339 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
340 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
341 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
342 (vselect_mask InVT.KRCWM:$mask, RHS,
343 (bitconvert InVT.RC:$src1)),
344 vselect_mask, "", IsCommutable>;
346 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
347 dag Outs, dag NonTiedIns, string OpcodeStr,
348 string AttSrcAsm, string IntelSrcAsm,
350 bit IsCommutable = 0,
351 bit IsKCommutable = 0,
353 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
354 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
355 X86selects_mask, MaskOnly>;
357 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
360 string AttSrcAsm, string IntelSrcAsm,
362 AVX512_maskable_custom<O, F, Outs, Ins,
363 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
364 !con((ins _.KRCWM:$mask), Ins),
365 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
368 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
369 dag Outs, dag NonTiedIns,
371 string AttSrcAsm, string IntelSrcAsm,
373 AVX512_maskable_custom<O, F, Outs,
374 !con((ins _.RC:$src1), NonTiedIns),
375 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
376 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
377 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
380 // Instruction with mask that puts result in mask register,
381 // like "compare" and "vptest"
382 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
384 dag Ins, dag MaskingIns,
386 string AttSrcAsm, string IntelSrcAsm,
388 list<dag> MaskingPattern,
389 bit IsCommutable = 0> {
390 let isCommutable = IsCommutable in {
391 def NAME: AVX512<O, F, Outs, Ins,
392 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
393 "$dst, "#IntelSrcAsm#"}",
396 def NAME#k: AVX512<O, F, Outs, MaskingIns,
397 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
398 "$dst {${mask}}, "#IntelSrcAsm#"}",
399 MaskingPattern>, EVEX_K;
403 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
405 dag Ins, dag MaskingIns,
407 string AttSrcAsm, string IntelSrcAsm,
408 dag RHS, dag MaskingRHS,
409 bit IsCommutable = 0> :
410 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
411 AttSrcAsm, IntelSrcAsm,
412 [(set _.KRC:$dst, RHS)],
413 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
415 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
416 dag Outs, dag Ins, string OpcodeStr,
417 string AttSrcAsm, string IntelSrcAsm,
418 dag RHS, dag RHS_su, bit IsCommutable = 0> :
419 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
420 !con((ins _.KRCWM:$mask), Ins),
421 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
422 (and _.KRCWM:$mask, RHS_su), IsCommutable>;
424 // Used by conversion instructions.
425 multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
427 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
429 string AttSrcAsm, string IntelSrcAsm,
430 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
431 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
432 AttSrcAsm, IntelSrcAsm,
433 [(set _.RC:$dst, RHS)],
434 [(set _.RC:$dst, MaskingRHS)],
435 [(set _.RC:$dst, ZeroMaskingRHS)],
438 multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
439 dag Outs, dag NonTiedIns, string OpcodeStr,
440 string AttSrcAsm, string IntelSrcAsm,
441 dag RHS, dag MaskingRHS, bit IsCommutable,
443 AVX512_maskable_custom<O, F, Outs,
444 !con((ins _.RC:$src1), NonTiedIns),
445 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
446 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
447 OpcodeStr, AttSrcAsm, IntelSrcAsm,
448 [(set _.RC:$dst, RHS)],
450 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
452 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
453 "", IsCommutable, IsKCommutable>;
455 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
456 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
457 // swizzled by ExecutionDomainFix to pxor.
458 // We set canFoldAsLoad because this can be converted to a constant-pool
459 // load of an all-zeros value if folding it would be beneficial.
460 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
461 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
462 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
463 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
464 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
465 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
468 let Predicates = [HasAVX512] in {
469 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
470 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
471 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
472 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
473 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
476 // Alias instructions that allow VPTERNLOG to be used with a mask to create
477 // a mix of all ones and all zeros elements. This is done this way to force
478 // the same register to be used as input for all three sources.
479 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
480 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
481 (ins VK16WM:$mask), "",
482 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
483 (v16i32 immAllOnesV),
484 (v16i32 immAllZerosV)))]>;
485 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
486 (ins VK8WM:$mask), "",
487 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
489 (v8i64 immAllZerosV)))]>;
492 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
493 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
494 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
495 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
496 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
497 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
500 let Predicates = [HasAVX512] in {
501 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
502 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
503 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
504 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
505 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
506 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
507 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
508 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
509 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
510 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
513 let Predicates = [HasFP16] in {
514 def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
515 def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
516 def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
519 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
520 // This is expanded by ExpandPostRAPseudos.
521 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
522 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
523 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
524 [(set FR32X:$dst, fp32imm0)]>;
525 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
526 [(set FR64X:$dst, fp64imm0)]>;
527 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
528 [(set VR128X:$dst, fp128imm0)]>;
531 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
532 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasFP16] in {
533 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
534 [(set FR16X:$dst, fp16imm0)]>;
537 //===----------------------------------------------------------------------===//
538 // AVX-512 - VECTOR INSERT
541 // Supports two different pattern operators for mask and unmasked ops. Allows
542 // null_frag to be passed for one.
543 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
545 SDPatternOperator vinsert_insert,
546 SDPatternOperator vinsert_for_mask,
547 X86FoldableSchedWrite sched> {
548 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
549 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
550 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
551 "vinsert" # From.EltTypeName # "x" # From.NumElts,
552 "$src3, $src2, $src1", "$src1, $src2, $src3",
553 (vinsert_insert:$src3 (To.VT To.RC:$src1),
554 (From.VT From.RC:$src2),
556 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
557 (From.VT From.RC:$src2),
559 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
561 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
562 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
563 "vinsert" # From.EltTypeName # "x" # From.NumElts,
564 "$src3, $src2, $src1", "$src1, $src2, $src3",
565 (vinsert_insert:$src3 (To.VT To.RC:$src1),
566 (From.VT (From.LdFrag addr:$src2)),
568 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
569 (From.VT (From.LdFrag addr:$src2)),
570 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
571 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
572 Sched<[sched.Folded, sched.ReadAfterFold]>;
576 // Passes the same pattern operator for masked and unmasked ops.
577 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
579 SDPatternOperator vinsert_insert,
580 X86FoldableSchedWrite sched> :
581 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
583 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
584 X86VectorVTInfo To, PatFrag vinsert_insert,
585 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
586 let Predicates = p in {
587 def : Pat<(vinsert_insert:$ins
588 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
589 (To.VT (!cast<Instruction>(InstrStr#"rr")
590 To.RC:$src1, From.RC:$src2,
591 (INSERT_get_vinsert_imm To.RC:$ins)))>;
593 def : Pat<(vinsert_insert:$ins
595 (From.VT (From.LdFrag addr:$src2)),
597 (To.VT (!cast<Instruction>(InstrStr#"rm")
598 To.RC:$src1, addr:$src2,
599 (INSERT_get_vinsert_imm To.RC:$ins)))>;
603 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
604 ValueType EltVT64, int Opcode256,
605 X86FoldableSchedWrite sched> {
607 let Predicates = [HasVLX] in
608 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
609 X86VectorVTInfo< 4, EltVT32, VR128X>,
610 X86VectorVTInfo< 8, EltVT32, VR256X>,
611 vinsert128_insert, sched>, EVEX_V256;
613 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
614 X86VectorVTInfo< 4, EltVT32, VR128X>,
615 X86VectorVTInfo<16, EltVT32, VR512>,
616 vinsert128_insert, sched>, EVEX_V512;
618 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
619 X86VectorVTInfo< 4, EltVT64, VR256X>,
620 X86VectorVTInfo< 8, EltVT64, VR512>,
621 vinsert256_insert, sched>, VEX_W, EVEX_V512;
623 // Even with DQI we'd like to only use these instructions for masking.
624 let Predicates = [HasVLX, HasDQI] in
625 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
626 X86VectorVTInfo< 2, EltVT64, VR128X>,
627 X86VectorVTInfo< 4, EltVT64, VR256X>,
628 null_frag, vinsert128_insert, sched>,
631 // Even with DQI we'd like to only use these instructions for masking.
632 let Predicates = [HasDQI] in {
633 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
634 X86VectorVTInfo< 2, EltVT64, VR128X>,
635 X86VectorVTInfo< 8, EltVT64, VR512>,
636 null_frag, vinsert128_insert, sched>,
639 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
640 X86VectorVTInfo< 8, EltVT32, VR256X>,
641 X86VectorVTInfo<16, EltVT32, VR512>,
642 null_frag, vinsert256_insert, sched>,
647 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
648 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
649 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
651 // Codegen pattern with the alternative types,
652 // Even with AVX512DQ we'll still use these for unmasked operations.
653 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
654 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
655 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
656 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
658 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
659 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
660 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
661 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
663 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
664 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
665 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
666 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
668 // Codegen pattern with the alternative types insert VEC128 into VEC256
669 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
670 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
671 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
672 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
673 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
674 vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16, HasVLX]>;
675 // Codegen pattern with the alternative types insert VEC128 into VEC512
676 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
677 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
678 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
679 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
680 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
681 vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16]>;
682 // Codegen pattern with the alternative types insert VEC256 into VEC512
683 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
684 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
685 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
686 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
687 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
688 vinsert256_insert, INSERT_get_vinsert256_imm, [HasFP16]>;
691 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
692 X86VectorVTInfo To, X86VectorVTInfo Cast,
693 PatFrag vinsert_insert,
694 SDNodeXForm INSERT_get_vinsert_imm,
696 let Predicates = p in {
698 (vselect_mask Cast.KRCWM:$mask,
700 (vinsert_insert:$ins (To.VT To.RC:$src1),
701 (From.VT From.RC:$src2),
704 (!cast<Instruction>(InstrStr#"rrk")
705 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
706 (INSERT_get_vinsert_imm To.RC:$ins))>;
708 (vselect_mask Cast.KRCWM:$mask,
710 (vinsert_insert:$ins (To.VT To.RC:$src1),
713 (From.LdFrag addr:$src2))),
716 (!cast<Instruction>(InstrStr#"rmk")
717 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
718 (INSERT_get_vinsert_imm To.RC:$ins))>;
721 (vselect_mask Cast.KRCWM:$mask,
723 (vinsert_insert:$ins (To.VT To.RC:$src1),
724 (From.VT From.RC:$src2),
727 (!cast<Instruction>(InstrStr#"rrkz")
728 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
729 (INSERT_get_vinsert_imm To.RC:$ins))>;
731 (vselect_mask Cast.KRCWM:$mask,
733 (vinsert_insert:$ins (To.VT To.RC:$src1),
734 (From.VT (From.LdFrag addr:$src2)),
737 (!cast<Instruction>(InstrStr#"rmkz")
738 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
739 (INSERT_get_vinsert_imm To.RC:$ins))>;
743 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
744 v8f32x_info, vinsert128_insert,
745 INSERT_get_vinsert128_imm, [HasVLX]>;
746 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
747 v4f64x_info, vinsert128_insert,
748 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
750 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
751 v8i32x_info, vinsert128_insert,
752 INSERT_get_vinsert128_imm, [HasVLX]>;
753 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
754 v8i32x_info, vinsert128_insert,
755 INSERT_get_vinsert128_imm, [HasVLX]>;
756 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
757 v8i32x_info, vinsert128_insert,
758 INSERT_get_vinsert128_imm, [HasVLX]>;
759 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
760 v4i64x_info, vinsert128_insert,
761 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
762 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
763 v4i64x_info, vinsert128_insert,
764 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
765 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
766 v4i64x_info, vinsert128_insert,
767 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
769 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
770 v16f32_info, vinsert128_insert,
771 INSERT_get_vinsert128_imm, [HasAVX512]>;
772 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
773 v8f64_info, vinsert128_insert,
774 INSERT_get_vinsert128_imm, [HasDQI]>;
776 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
777 v16i32_info, vinsert128_insert,
778 INSERT_get_vinsert128_imm, [HasAVX512]>;
779 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
780 v16i32_info, vinsert128_insert,
781 INSERT_get_vinsert128_imm, [HasAVX512]>;
782 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
783 v16i32_info, vinsert128_insert,
784 INSERT_get_vinsert128_imm, [HasAVX512]>;
785 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
786 v8i64_info, vinsert128_insert,
787 INSERT_get_vinsert128_imm, [HasDQI]>;
788 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
789 v8i64_info, vinsert128_insert,
790 INSERT_get_vinsert128_imm, [HasDQI]>;
791 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
792 v8i64_info, vinsert128_insert,
793 INSERT_get_vinsert128_imm, [HasDQI]>;
795 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
796 v16f32_info, vinsert256_insert,
797 INSERT_get_vinsert256_imm, [HasDQI]>;
798 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
799 v8f64_info, vinsert256_insert,
800 INSERT_get_vinsert256_imm, [HasAVX512]>;
802 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
803 v16i32_info, vinsert256_insert,
804 INSERT_get_vinsert256_imm, [HasDQI]>;
805 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
806 v16i32_info, vinsert256_insert,
807 INSERT_get_vinsert256_imm, [HasDQI]>;
808 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
809 v16i32_info, vinsert256_insert,
810 INSERT_get_vinsert256_imm, [HasDQI]>;
811 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
812 v8i64_info, vinsert256_insert,
813 INSERT_get_vinsert256_imm, [HasAVX512]>;
814 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
815 v8i64_info, vinsert256_insert,
816 INSERT_get_vinsert256_imm, [HasAVX512]>;
817 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
818 v8i64_info, vinsert256_insert,
819 INSERT_get_vinsert256_imm, [HasAVX512]>;
821 // vinsertps - insert f32 to XMM
822 let ExeDomain = SSEPackedSingle in {
823 let isCommutable = 1 in
824 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
825 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
826 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
827 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
828 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
829 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
830 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
831 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
832 [(set VR128X:$dst, (X86insertps VR128X:$src1,
833 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
835 EVEX_4V, EVEX_CD8<32, CD8VT1>,
836 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
839 //===----------------------------------------------------------------------===//
840 // AVX-512 VECTOR EXTRACT
843 // Supports two different pattern operators for mask and unmasked ops. Allows
844 // null_frag to be passed for one.
845 multiclass vextract_for_size_split<int Opcode,
846 X86VectorVTInfo From, X86VectorVTInfo To,
847 SDPatternOperator vextract_extract,
848 SDPatternOperator vextract_for_mask,
849 SchedWrite SchedRR, SchedWrite SchedMR> {
851 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
852 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
853 (ins From.RC:$src1, u8imm:$idx),
854 "vextract" # To.EltTypeName # "x" # To.NumElts,
855 "$idx, $src1", "$src1, $idx",
856 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
857 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
858 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
860 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
861 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
862 "vextract" # To.EltTypeName # "x" # To.NumElts #
863 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
864 [(store (To.VT (vextract_extract:$idx
865 (From.VT From.RC:$src1), (iPTR imm))),
869 let mayStore = 1, hasSideEffects = 0 in
870 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
871 (ins To.MemOp:$dst, To.KRCWM:$mask,
872 From.RC:$src1, u8imm:$idx),
873 "vextract" # To.EltTypeName # "x" # To.NumElts #
874 "\t{$idx, $src1, $dst {${mask}}|"
875 "$dst {${mask}}, $src1, $idx}", []>,
876 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
880 // Passes the same pattern operator for masked and unmasked ops.
881 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
883 SDPatternOperator vextract_extract,
884 SchedWrite SchedRR, SchedWrite SchedMR> :
885 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
887 // Codegen pattern for the alternative types
888 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
889 X86VectorVTInfo To, PatFrag vextract_extract,
890 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
891 let Predicates = p in {
892 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
893 (To.VT (!cast<Instruction>(InstrStr#"rr")
895 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
896 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
897 (iPTR imm))), addr:$dst),
898 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
899 (EXTRACT_get_vextract_imm To.RC:$ext))>;
903 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
904 ValueType EltVT64, int Opcode256,
905 SchedWrite SchedRR, SchedWrite SchedMR> {
906 let Predicates = [HasAVX512] in {
907 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
908 X86VectorVTInfo<16, EltVT32, VR512>,
909 X86VectorVTInfo< 4, EltVT32, VR128X>,
910 vextract128_extract, SchedRR, SchedMR>,
911 EVEX_V512, EVEX_CD8<32, CD8VT4>;
912 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
913 X86VectorVTInfo< 8, EltVT64, VR512>,
914 X86VectorVTInfo< 4, EltVT64, VR256X>,
915 vextract256_extract, SchedRR, SchedMR>,
916 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
918 let Predicates = [HasVLX] in
919 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
920 X86VectorVTInfo< 8, EltVT32, VR256X>,
921 X86VectorVTInfo< 4, EltVT32, VR128X>,
922 vextract128_extract, SchedRR, SchedMR>,
923 EVEX_V256, EVEX_CD8<32, CD8VT4>;
925 // Even with DQI we'd like to only use these instructions for masking.
926 let Predicates = [HasVLX, HasDQI] in
927 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
928 X86VectorVTInfo< 4, EltVT64, VR256X>,
929 X86VectorVTInfo< 2, EltVT64, VR128X>,
930 null_frag, vextract128_extract, SchedRR, SchedMR>,
931 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
933 // Even with DQI we'd like to only use these instructions for masking.
934 let Predicates = [HasDQI] in {
935 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
936 X86VectorVTInfo< 8, EltVT64, VR512>,
937 X86VectorVTInfo< 2, EltVT64, VR128X>,
938 null_frag, vextract128_extract, SchedRR, SchedMR>,
939 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
940 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
941 X86VectorVTInfo<16, EltVT32, VR512>,
942 X86VectorVTInfo< 8, EltVT32, VR256X>,
943 null_frag, vextract256_extract, SchedRR, SchedMR>,
944 EVEX_V512, EVEX_CD8<32, CD8VT8>;
948 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
949 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
950 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
952 // extract_subvector codegen patterns with the alternative types.
953 // Even with AVX512DQ we'll still use these for unmasked operations.
954 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
955 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
956 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
957 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
959 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
960 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
961 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
962 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
964 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
965 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
966 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
967 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
969 // Codegen pattern with the alternative types extract VEC128 from VEC256
970 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
971 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
972 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
973 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
974 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
975 vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16, HasVLX]>;
977 // Codegen pattern with the alternative types extract VEC128 from VEC512
978 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
979 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
980 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
981 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
982 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
983 vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16]>;
984 // Codegen pattern with the alternative types extract VEC256 from VEC512
985 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
986 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
987 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
988 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
989 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
990 vextract256_extract, EXTRACT_get_vextract256_imm, [HasFP16]>;
993 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
994 // smaller extract to enable EVEX->VEX.
995 let Predicates = [NoVLX] in {
996 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
997 (v2i64 (VEXTRACTI128rr
998 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1000 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1001 (v2f64 (VEXTRACTF128rr
1002 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1004 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1005 (v4i32 (VEXTRACTI128rr
1006 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1008 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1009 (v4f32 (VEXTRACTF128rr
1010 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1012 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1013 (v8i16 (VEXTRACTI128rr
1014 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1016 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1017 (v16i8 (VEXTRACTI128rr
1018 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1022 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1023 // smaller extract to enable EVEX->VEX.
1024 let Predicates = [HasVLX] in {
1025 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1026 (v2i64 (VEXTRACTI32x4Z256rr
1027 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1029 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1030 (v2f64 (VEXTRACTF32x4Z256rr
1031 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1033 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1034 (v4i32 (VEXTRACTI32x4Z256rr
1035 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1037 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1038 (v4f32 (VEXTRACTF32x4Z256rr
1039 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1041 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1042 (v8i16 (VEXTRACTI32x4Z256rr
1043 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1045 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1046 (v16i8 (VEXTRACTI32x4Z256rr
1047 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1051 let Predicates = [HasFP16, HasVLX] in
1052 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1053 (v8f16 (VEXTRACTF32x4Z256rr
1054 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1058 // Additional patterns for handling a bitcast between the vselect and the
1059 // extract_subvector.
1060 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1061 X86VectorVTInfo To, X86VectorVTInfo Cast,
1062 PatFrag vextract_extract,
1063 SDNodeXForm EXTRACT_get_vextract_imm,
1064 list<Predicate> p> {
1065 let Predicates = p in {
1066 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1068 (To.VT (vextract_extract:$ext
1069 (From.VT From.RC:$src), (iPTR imm)))),
1071 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1072 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1073 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1075 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1077 (To.VT (vextract_extract:$ext
1078 (From.VT From.RC:$src), (iPTR imm)))),
1079 Cast.ImmAllZerosV)),
1080 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1081 Cast.KRCWM:$mask, From.RC:$src,
1082 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1086 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1087 v4f32x_info, vextract128_extract,
1088 EXTRACT_get_vextract128_imm, [HasVLX]>;
1089 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1090 v2f64x_info, vextract128_extract,
1091 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1093 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1094 v4i32x_info, vextract128_extract,
1095 EXTRACT_get_vextract128_imm, [HasVLX]>;
1096 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1097 v4i32x_info, vextract128_extract,
1098 EXTRACT_get_vextract128_imm, [HasVLX]>;
1099 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1100 v4i32x_info, vextract128_extract,
1101 EXTRACT_get_vextract128_imm, [HasVLX]>;
1102 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1103 v2i64x_info, vextract128_extract,
1104 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1105 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1106 v2i64x_info, vextract128_extract,
1107 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1108 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1109 v2i64x_info, vextract128_extract,
1110 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1112 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1113 v4f32x_info, vextract128_extract,
1114 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1115 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1116 v2f64x_info, vextract128_extract,
1117 EXTRACT_get_vextract128_imm, [HasDQI]>;
1119 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1120 v4i32x_info, vextract128_extract,
1121 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1122 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1123 v4i32x_info, vextract128_extract,
1124 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1125 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1126 v4i32x_info, vextract128_extract,
1127 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1128 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1129 v2i64x_info, vextract128_extract,
1130 EXTRACT_get_vextract128_imm, [HasDQI]>;
1131 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1132 v2i64x_info, vextract128_extract,
1133 EXTRACT_get_vextract128_imm, [HasDQI]>;
1134 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1135 v2i64x_info, vextract128_extract,
1136 EXTRACT_get_vextract128_imm, [HasDQI]>;
1138 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1139 v8f32x_info, vextract256_extract,
1140 EXTRACT_get_vextract256_imm, [HasDQI]>;
1141 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1142 v4f64x_info, vextract256_extract,
1143 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1145 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1146 v8i32x_info, vextract256_extract,
1147 EXTRACT_get_vextract256_imm, [HasDQI]>;
1148 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1149 v8i32x_info, vextract256_extract,
1150 EXTRACT_get_vextract256_imm, [HasDQI]>;
1151 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1152 v8i32x_info, vextract256_extract,
1153 EXTRACT_get_vextract256_imm, [HasDQI]>;
1154 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1155 v4i64x_info, vextract256_extract,
1156 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1157 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1158 v4i64x_info, vextract256_extract,
1159 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1160 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1161 v4i64x_info, vextract256_extract,
1162 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1164 // vextractps - extract 32 bits from XMM
1165 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1166 (ins VR128X:$src1, u8imm:$src2),
1167 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1168 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1169 EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1171 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1172 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1173 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1174 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1176 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1178 //===---------------------------------------------------------------------===//
1179 // AVX-512 BROADCAST
1181 // broadcast with a scalar argument.
1182 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1184 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1185 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1186 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1187 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1188 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1189 (X86VBroadcast SrcInfo.FRC:$src),
1190 DestInfo.RC:$src0)),
1191 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1192 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1193 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1194 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1195 (X86VBroadcast SrcInfo.FRC:$src),
1196 DestInfo.ImmAllZerosV)),
1197 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1198 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1201 // Split version to allow mask and broadcast node to be different types. This
1202 // helps support the 32x2 broadcasts.
1203 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1205 SchedWrite SchedRR, SchedWrite SchedRM,
1206 X86VectorVTInfo MaskInfo,
1207 X86VectorVTInfo DestInfo,
1208 X86VectorVTInfo SrcInfo,
1209 bit IsConvertibleToThreeAddress,
1210 SDPatternOperator UnmaskedOp = X86VBroadcast,
1211 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1212 let hasSideEffects = 0 in
1213 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1214 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1215 [(set MaskInfo.RC:$dst,
1219 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1220 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1221 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1222 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1223 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1224 "${dst} {${mask}} {z}, $src}"),
1225 [(set MaskInfo.RC:$dst,
1226 (vselect_mask MaskInfo.KRCWM:$mask,
1230 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1231 MaskInfo.ImmAllZerosV))],
1232 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1233 let Constraints = "$src0 = $dst" in
1234 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1235 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1237 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1238 "${dst} {${mask}}, $src}"),
1239 [(set MaskInfo.RC:$dst,
1240 (vselect_mask MaskInfo.KRCWM:$mask,
1244 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1245 MaskInfo.RC:$src0))],
1246 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1248 let hasSideEffects = 0, mayLoad = 1 in
1249 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1250 (ins SrcInfo.ScalarMemOp:$src),
1251 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1252 [(set MaskInfo.RC:$dst,
1256 (UnmaskedBcastOp addr:$src)))))],
1257 DestInfo.ExeDomain>, T8PD, EVEX,
1258 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1260 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1261 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1262 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1263 "${dst} {${mask}} {z}, $src}"),
1264 [(set MaskInfo.RC:$dst,
1265 (vselect_mask MaskInfo.KRCWM:$mask,
1269 (SrcInfo.BroadcastLdFrag addr:$src)))),
1270 MaskInfo.ImmAllZerosV))],
1271 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1272 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1274 let Constraints = "$src0 = $dst",
1275 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1276 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1277 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1278 SrcInfo.ScalarMemOp:$src),
1279 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1280 "${dst} {${mask}}, $src}"),
1281 [(set MaskInfo.RC:$dst,
1282 (vselect_mask MaskInfo.KRCWM:$mask,
1286 (SrcInfo.BroadcastLdFrag addr:$src)))),
1287 MaskInfo.RC:$src0))],
1288 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1289 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1292 // Helper class to force mask and broadcast result to same type.
1293 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1294 SchedWrite SchedRR, SchedWrite SchedRM,
1295 X86VectorVTInfo DestInfo,
1296 X86VectorVTInfo SrcInfo,
1297 bit IsConvertibleToThreeAddress> :
1298 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1299 DestInfo, DestInfo, SrcInfo,
1300 IsConvertibleToThreeAddress>;
1302 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1303 AVX512VLVectorVTInfo _> {
1304 let Predicates = [HasAVX512] in {
1305 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1306 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1307 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1312 let Predicates = [HasVLX] in {
1313 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1314 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1315 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1321 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1322 AVX512VLVectorVTInfo _> {
1323 let Predicates = [HasAVX512] in {
1324 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1325 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1326 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1331 let Predicates = [HasVLX] in {
1332 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1333 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1334 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1337 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1338 WriteFShuffle256Ld, _.info128, _.info128, 1>,
1339 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1344 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1346 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1347 avx512vl_f64_info>, VEX_W1X;
1349 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1350 X86VectorVTInfo _, SDPatternOperator OpNode,
1351 RegisterClass SrcRC> {
1352 // Fold with a mask even if it has multiple uses since it is cheap.
1353 let ExeDomain = _.ExeDomain in
1354 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1356 "vpbroadcast"#_.Suffix, "$src", "$src",
1357 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1358 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1359 T8PD, EVEX, Sched<[SchedRR]>;
1362 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1363 X86VectorVTInfo _, SDPatternOperator OpNode,
1364 RegisterClass SrcRC, SubRegIndex Subreg> {
1365 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1366 defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1367 (outs _.RC:$dst), (ins GR32:$src),
1368 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1369 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1370 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1371 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1373 def : Pat <(_.VT (OpNode SrcRC:$src)),
1374 (!cast<Instruction>(Name#rr)
1375 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1377 // Fold with a mask even if it has multiple uses since it is cheap.
1378 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1379 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1380 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1382 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1383 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1384 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1387 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1388 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1389 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1390 let Predicates = [prd] in
1391 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1392 OpNode, SrcRC, Subreg>, EVEX_V512;
1393 let Predicates = [prd, HasVLX] in {
1394 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1395 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1396 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1397 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1401 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1402 SDPatternOperator OpNode,
1403 RegisterClass SrcRC, Predicate prd> {
1404 let Predicates = [prd] in
1405 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1407 let Predicates = [prd, HasVLX] in {
1408 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1410 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1415 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1416 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1417 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1418 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1420 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1421 X86VBroadcast, GR32, HasAVX512>;
1422 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1423 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1425 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1426 AVX512VLVectorVTInfo _, Predicate prd,
1427 bit IsConvertibleToThreeAddress> {
1428 let Predicates = [prd] in {
1429 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1430 WriteShuffle256Ld, _.info512, _.info128,
1431 IsConvertibleToThreeAddress>,
1434 let Predicates = [prd, HasVLX] in {
1435 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1436 WriteShuffle256Ld, _.info256, _.info128,
1437 IsConvertibleToThreeAddress>,
1439 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1440 WriteShuffleXLd, _.info128, _.info128,
1441 IsConvertibleToThreeAddress>,
1446 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1447 avx512vl_i8_info, HasBWI, 0>;
1448 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1449 avx512vl_i16_info, HasBWI, 0>;
1450 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1451 avx512vl_i32_info, HasAVX512, 1>;
1452 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1453 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1455 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1456 SDPatternOperator OpNode,
1457 X86VectorVTInfo _Dst,
1458 X86VectorVTInfo _Src> {
1459 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1460 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1461 (_Dst.VT (OpNode addr:$src))>,
1462 Sched<[SchedWriteShuffle.YMM.Folded]>,
1466 // This should be used for the AVX512DQ broadcast instructions. It disables
1467 // the unmasked patterns so that we only use the DQ instructions when masking
1469 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1470 SDPatternOperator OpNode,
1471 X86VectorVTInfo _Dst,
1472 X86VectorVTInfo _Src> {
1473 let hasSideEffects = 0, mayLoad = 1 in
1474 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1475 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1477 (_Dst.VT (OpNode addr:$src))>,
1478 Sched<[SchedWriteShuffle.YMM.Folded]>,
1481 let Predicates = [HasFP16] in {
1482 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1483 (VPBROADCASTWZrm addr:$src)>;
1485 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1486 (VPBROADCASTWZrr VR128X:$src)>;
1487 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1488 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1490 let Predicates = [HasVLX, HasFP16] in {
1491 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1492 (VPBROADCASTWZ128rm addr:$src)>;
1493 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1494 (VPBROADCASTWZ256rm addr:$src)>;
1496 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1497 (VPBROADCASTWZ128rr VR128X:$src)>;
1498 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1499 (VPBROADCASTWZ256rr VR128X:$src)>;
1501 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1502 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1503 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1504 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1507 //===----------------------------------------------------------------------===//
1508 // AVX-512 BROADCAST SUBVECTORS
1511 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1512 X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1513 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1514 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1515 X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1516 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1517 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1518 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
1519 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1520 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1521 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
1522 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1524 let Predicates = [HasAVX512] in {
1525 def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1526 (VBROADCASTF64X4rm addr:$src)>;
1527 def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1528 (VBROADCASTF64X4rm addr:$src)>;
1529 def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1530 (VBROADCASTF64X4rm addr:$src)>;
1531 def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1532 (VBROADCASTI64X4rm addr:$src)>;
1533 def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1534 (VBROADCASTI64X4rm addr:$src)>;
1535 def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1536 (VBROADCASTI64X4rm addr:$src)>;
1537 def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1538 (VBROADCASTI64X4rm addr:$src)>;
1540 def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1541 (VBROADCASTF32X4rm addr:$src)>;
1542 def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1543 (VBROADCASTF32X4rm addr:$src)>;
1544 def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1545 (VBROADCASTF32X4rm addr:$src)>;
1546 def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1547 (VBROADCASTI32X4rm addr:$src)>;
1548 def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1549 (VBROADCASTI32X4rm addr:$src)>;
1550 def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1551 (VBROADCASTI32X4rm addr:$src)>;
1552 def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1553 (VBROADCASTI32X4rm addr:$src)>;
1555 // Patterns for selects of bitcasted operations.
1556 def : Pat<(vselect_mask VK16WM:$mask,
1557 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1558 (v16f32 immAllZerosV)),
1559 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1560 def : Pat<(vselect_mask VK16WM:$mask,
1561 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1563 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1564 def : Pat<(vselect_mask VK16WM:$mask,
1565 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1566 (v16i32 immAllZerosV)),
1567 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1568 def : Pat<(vselect_mask VK16WM:$mask,
1569 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1571 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1573 def : Pat<(vselect_mask VK8WM:$mask,
1574 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1575 (v8f64 immAllZerosV)),
1576 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1577 def : Pat<(vselect_mask VK8WM:$mask,
1578 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1580 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1581 def : Pat<(vselect_mask VK8WM:$mask,
1582 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1583 (v8i64 immAllZerosV)),
1584 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1585 def : Pat<(vselect_mask VK8WM:$mask,
1586 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1588 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1591 let Predicates = [HasVLX] in {
1592 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1593 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1594 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1595 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1596 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1597 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1599 def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1600 (VBROADCASTF32X4Z256rm addr:$src)>;
1601 def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1602 (VBROADCASTF32X4Z256rm addr:$src)>;
1603 def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1604 (VBROADCASTF32X4Z256rm addr:$src)>;
1605 def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1606 (VBROADCASTI32X4Z256rm addr:$src)>;
1607 def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1608 (VBROADCASTI32X4Z256rm addr:$src)>;
1609 def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1610 (VBROADCASTI32X4Z256rm addr:$src)>;
1611 def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1612 (VBROADCASTI32X4Z256rm addr:$src)>;
1614 // Patterns for selects of bitcasted operations.
1615 def : Pat<(vselect_mask VK8WM:$mask,
1616 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1617 (v8f32 immAllZerosV)),
1618 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1619 def : Pat<(vselect_mask VK8WM:$mask,
1620 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1622 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1623 def : Pat<(vselect_mask VK8WM:$mask,
1624 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1625 (v8i32 immAllZerosV)),
1626 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1627 def : Pat<(vselect_mask VK8WM:$mask,
1628 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1630 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1633 let Predicates = [HasVLX, HasDQI] in {
1634 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1635 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
1636 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1637 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1638 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
1639 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1641 // Patterns for selects of bitcasted operations.
1642 def : Pat<(vselect_mask VK4WM:$mask,
1643 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1644 (v4f64 immAllZerosV)),
1645 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1646 def : Pat<(vselect_mask VK4WM:$mask,
1647 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1649 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1650 def : Pat<(vselect_mask VK4WM:$mask,
1651 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1652 (v4i64 immAllZerosV)),
1653 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1654 def : Pat<(vselect_mask VK4WM:$mask,
1655 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1657 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1660 let Predicates = [HasDQI] in {
1661 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1662 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
1663 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1664 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1665 X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1666 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1667 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1668 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
1669 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1670 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1671 X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1672 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1674 // Patterns for selects of bitcasted operations.
1675 def : Pat<(vselect_mask VK16WM:$mask,
1676 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1677 (v16f32 immAllZerosV)),
1678 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1679 def : Pat<(vselect_mask VK16WM:$mask,
1680 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1682 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1683 def : Pat<(vselect_mask VK16WM:$mask,
1684 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1685 (v16i32 immAllZerosV)),
1686 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1687 def : Pat<(vselect_mask VK16WM:$mask,
1688 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1690 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1692 def : Pat<(vselect_mask VK8WM:$mask,
1693 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1694 (v8f64 immAllZerosV)),
1695 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1696 def : Pat<(vselect_mask VK8WM:$mask,
1697 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1699 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1700 def : Pat<(vselect_mask VK8WM:$mask,
1701 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1702 (v8i64 immAllZerosV)),
1703 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1704 def : Pat<(vselect_mask VK8WM:$mask,
1705 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1707 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1710 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1711 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1712 let Predicates = [HasDQI] in
1713 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1714 WriteShuffle256Ld, _Dst.info512,
1715 _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1717 let Predicates = [HasDQI, HasVLX] in
1718 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1719 WriteShuffle256Ld, _Dst.info256,
1720 _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1724 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1725 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1726 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1728 let Predicates = [HasDQI, HasVLX] in
1729 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1730 WriteShuffleXLd, _Dst.info128,
1731 _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1735 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1736 avx512vl_i32_info, avx512vl_i64_info>;
1737 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1738 avx512vl_f32_info, avx512vl_f64_info>;
1740 //===----------------------------------------------------------------------===//
1741 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1743 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1744 X86VectorVTInfo _, RegisterClass KRC> {
1745 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1746 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1747 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1748 EVEX, Sched<[WriteShuffle]>;
1751 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1752 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1753 let Predicates = [HasCDI] in
1754 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1755 let Predicates = [HasCDI, HasVLX] in {
1756 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1757 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1761 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1762 avx512vl_i32_info, VK16>;
1763 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1764 avx512vl_i64_info, VK8>, VEX_W;
1766 //===----------------------------------------------------------------------===//
1767 // -- VPERMI2 - 3 source operands form --
1768 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1769 X86FoldableSchedWrite sched,
1770 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1771 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1772 hasSideEffects = 0 in {
1773 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1774 (ins _.RC:$src2, _.RC:$src3),
1775 OpcodeStr, "$src3, $src2", "$src2, $src3",
1776 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1777 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1780 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1781 (ins _.RC:$src2, _.MemOp:$src3),
1782 OpcodeStr, "$src3, $src2", "$src2, $src3",
1783 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1784 (_.VT (_.LdFrag addr:$src3)))), 1>,
1785 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1789 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1790 X86FoldableSchedWrite sched,
1791 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1792 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1793 hasSideEffects = 0, mayLoad = 1 in
1794 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1795 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1796 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1797 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1798 (_.VT (X86VPermt2 _.RC:$src2,
1799 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1800 AVX5128IBase, EVEX_4V, EVEX_B,
1801 Sched<[sched.Folded, sched.ReadAfterFold]>;
1804 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1805 X86FoldableSchedWrite sched,
1806 AVX512VLVectorVTInfo VTInfo,
1807 AVX512VLVectorVTInfo ShuffleMask> {
1808 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1809 ShuffleMask.info512>,
1810 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1811 ShuffleMask.info512>, EVEX_V512;
1812 let Predicates = [HasVLX] in {
1813 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1814 ShuffleMask.info128>,
1815 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1816 ShuffleMask.info128>, EVEX_V128;
1817 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1818 ShuffleMask.info256>,
1819 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1820 ShuffleMask.info256>, EVEX_V256;
1824 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1825 X86FoldableSchedWrite sched,
1826 AVX512VLVectorVTInfo VTInfo,
1827 AVX512VLVectorVTInfo Idx,
1829 let Predicates = [Prd] in
1830 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1831 Idx.info512>, EVEX_V512;
1832 let Predicates = [Prd, HasVLX] in {
1833 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1834 Idx.info128>, EVEX_V128;
1835 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1836 Idx.info256>, EVEX_V256;
1840 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1841 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1842 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1843 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1844 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1845 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1846 VEX_W, EVEX_CD8<16, CD8VF>;
1847 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1848 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1850 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1851 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1852 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1853 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1855 // Extra patterns to deal with extra bitcasts due to passthru and index being
1856 // different types on the fp versions.
1857 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1858 X86VectorVTInfo IdxVT,
1859 X86VectorVTInfo CastVT> {
1860 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1861 (X86VPermt2 (_.VT _.RC:$src2),
1862 (IdxVT.VT (bitconvert
1863 (CastVT.VT _.RC:$src1))),
1865 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1866 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1867 _.RC:$src2, _.RC:$src3)>;
1868 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1869 (X86VPermt2 _.RC:$src2,
1870 (IdxVT.VT (bitconvert
1871 (CastVT.VT _.RC:$src1))),
1872 (_.LdFrag addr:$src3)),
1873 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1874 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1875 _.RC:$src2, addr:$src3)>;
1876 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1877 (X86VPermt2 _.RC:$src2,
1878 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1879 (_.BroadcastLdFrag addr:$src3)),
1880 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1881 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1882 _.RC:$src2, addr:$src3)>;
1885 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1886 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1887 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1888 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1891 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1892 X86FoldableSchedWrite sched,
1893 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1894 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1895 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1896 (ins IdxVT.RC:$src2, _.RC:$src3),
1897 OpcodeStr, "$src3, $src2", "$src2, $src3",
1898 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1899 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1901 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1902 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1903 OpcodeStr, "$src3, $src2", "$src2, $src3",
1904 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1905 (_.LdFrag addr:$src3))), 1>,
1906 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1909 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1910 X86FoldableSchedWrite sched,
1911 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1912 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1913 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1914 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1915 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1916 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1917 (_.VT (X86VPermt2 _.RC:$src1,
1918 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1919 AVX5128IBase, EVEX_4V, EVEX_B,
1920 Sched<[sched.Folded, sched.ReadAfterFold]>;
1923 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1924 X86FoldableSchedWrite sched,
1925 AVX512VLVectorVTInfo VTInfo,
1926 AVX512VLVectorVTInfo ShuffleMask> {
1927 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1928 ShuffleMask.info512>,
1929 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1930 ShuffleMask.info512>, EVEX_V512;
1931 let Predicates = [HasVLX] in {
1932 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1933 ShuffleMask.info128>,
1934 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1935 ShuffleMask.info128>, EVEX_V128;
1936 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1937 ShuffleMask.info256>,
1938 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1939 ShuffleMask.info256>, EVEX_V256;
1943 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1944 X86FoldableSchedWrite sched,
1945 AVX512VLVectorVTInfo VTInfo,
1946 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1947 let Predicates = [Prd] in
1948 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1949 Idx.info512>, EVEX_V512;
1950 let Predicates = [Prd, HasVLX] in {
1951 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1952 Idx.info128>, EVEX_V128;
1953 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1954 Idx.info256>, EVEX_V256;
1958 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1959 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1960 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1961 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1962 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1963 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1964 VEX_W, EVEX_CD8<16, CD8VF>;
1965 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1966 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1968 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1969 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1970 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1971 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1973 //===----------------------------------------------------------------------===//
1974 // AVX-512 - BLEND using mask
1977 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1978 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1979 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1980 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1981 (ins _.RC:$src1, _.RC:$src2),
1982 !strconcat(OpcodeStr,
1983 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1984 EVEX_4V, Sched<[sched]>;
1985 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1986 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1987 !strconcat(OpcodeStr,
1988 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1989 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1990 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1991 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1992 !strconcat(OpcodeStr,
1993 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1994 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1995 let mayLoad = 1 in {
1996 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1997 (ins _.RC:$src1, _.MemOp:$src2),
1998 !strconcat(OpcodeStr,
1999 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
2000 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
2001 Sched<[sched.Folded, sched.ReadAfterFold]>;
2002 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2003 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2004 !strconcat(OpcodeStr,
2005 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
2006 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
2007 Sched<[sched.Folded, sched.ReadAfterFold]>;
2008 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2009 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2010 !strconcat(OpcodeStr,
2011 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2012 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
2013 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2017 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
2018 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2019 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
2020 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2021 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2022 !strconcat(OpcodeStr,
2023 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2024 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2025 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2026 Sched<[sched.Folded, sched.ReadAfterFold]>;
2028 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2029 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2030 !strconcat(OpcodeStr,
2031 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2032 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2033 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2034 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2036 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2037 (ins _.RC:$src1, _.ScalarMemOp:$src2),
2038 !strconcat(OpcodeStr,
2039 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2040 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2041 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2042 Sched<[sched.Folded, sched.ReadAfterFold]>;
2046 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2047 AVX512VLVectorVTInfo VTInfo> {
2048 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2049 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2052 let Predicates = [HasVLX] in {
2053 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2054 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2056 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2057 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2062 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2063 AVX512VLVectorVTInfo VTInfo> {
2064 let Predicates = [HasBWI] in
2065 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2068 let Predicates = [HasBWI, HasVLX] in {
2069 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2071 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2076 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2078 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2079 avx512vl_f64_info>, VEX_W;
2080 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2082 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2083 avx512vl_i64_info>, VEX_W;
2084 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2086 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2087 avx512vl_i16_info>, VEX_W;
2089 //===----------------------------------------------------------------------===//
2090 // Compare Instructions
2091 //===----------------------------------------------------------------------===//
2093 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2095 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2096 PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2097 X86FoldableSchedWrite sched> {
2098 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2100 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2102 "$cc, $src2, $src1", "$src1, $src2, $cc",
2103 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2104 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2105 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2107 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2109 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2111 "$cc, $src2, $src1", "$src1, $src2, $cc",
2112 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2114 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2115 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2116 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2118 let Uses = [MXCSR] in
2119 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2121 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2123 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2124 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2126 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2128 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2130 let isCodeGenOnly = 1 in {
2131 let isCommutable = 1 in
2132 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2133 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2134 !strconcat("vcmp", _.Suffix,
2135 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2136 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2139 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2140 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2142 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2143 !strconcat("vcmp", _.Suffix,
2144 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2145 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2146 (_.ScalarLdFrag addr:$src2),
2148 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2149 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2153 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2154 (X86cmpms node:$src1, node:$src2, node:$cc), [{
2155 return N->hasOneUse();
2157 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2158 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2159 return N->hasOneUse();
2162 let Predicates = [HasAVX512] in {
2163 let ExeDomain = SSEPackedSingle in
2164 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2165 X86cmpms_su, X86cmpmsSAE_su,
2166 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2167 let ExeDomain = SSEPackedDouble in
2168 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2169 X86cmpms_su, X86cmpmsSAE_su,
2170 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2172 let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2173 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2174 X86cmpms_su, X86cmpmsSAE_su,
2175 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2177 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2178 X86FoldableSchedWrite sched,
2179 X86VectorVTInfo _, bit IsCommutable> {
2180 let isCommutable = IsCommutable, hasSideEffects = 0 in
2181 def rr : AVX512BI<opc, MRMSrcReg,
2182 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2183 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2184 []>, EVEX_4V, Sched<[sched]>;
2185 let mayLoad = 1, hasSideEffects = 0 in
2186 def rm : AVX512BI<opc, MRMSrcMem,
2187 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2188 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2189 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2190 let isCommutable = IsCommutable, hasSideEffects = 0 in
2191 def rrk : AVX512BI<opc, MRMSrcReg,
2192 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2193 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2194 "$dst {${mask}}, $src1, $src2}"),
2195 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2196 let mayLoad = 1, hasSideEffects = 0 in
2197 def rmk : AVX512BI<opc, MRMSrcMem,
2198 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2199 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2200 "$dst {${mask}}, $src1, $src2}"),
2201 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2204 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2205 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2207 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2208 let mayLoad = 1, hasSideEffects = 0 in {
2209 def rmb : AVX512BI<opc, MRMSrcMem,
2210 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2211 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2212 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2213 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2214 def rmbk : AVX512BI<opc, MRMSrcMem,
2215 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2216 _.ScalarMemOp:$src2),
2217 !strconcat(OpcodeStr,
2218 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2219 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2220 []>, EVEX_4V, EVEX_K, EVEX_B,
2221 Sched<[sched.Folded, sched.ReadAfterFold]>;
2225 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2226 X86SchedWriteWidths sched,
2227 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2228 bit IsCommutable = 0> {
2229 let Predicates = [prd] in
2230 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2231 VTInfo.info512, IsCommutable>, EVEX_V512;
2233 let Predicates = [prd, HasVLX] in {
2234 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2235 VTInfo.info256, IsCommutable>, EVEX_V256;
2236 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2237 VTInfo.info128, IsCommutable>, EVEX_V128;
2241 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2242 X86SchedWriteWidths sched,
2243 AVX512VLVectorVTInfo VTInfo,
2244 Predicate prd, bit IsCommutable = 0> {
2245 let Predicates = [prd] in
2246 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2247 VTInfo.info512, IsCommutable>, EVEX_V512;
2249 let Predicates = [prd, HasVLX] in {
2250 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2251 VTInfo.info256, IsCommutable>, EVEX_V256;
2252 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2253 VTInfo.info128, IsCommutable>, EVEX_V128;
2257 // This fragment treats X86cmpm as commutable to help match loads in both
2258 // operands for PCMPEQ.
2259 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2260 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2261 (setcc node:$src1, node:$src2, SETGT)>;
2263 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2264 // increase the pattern complexity the way an immediate would.
2265 let AddedComplexity = 2 in {
2266 // FIXME: Is there a better scheduler class for VPCMP?
2267 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2268 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2269 EVEX_CD8<8, CD8VF>, VEX_WIG;
2271 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2272 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2273 EVEX_CD8<16, CD8VF>, VEX_WIG;
2275 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2276 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2277 EVEX_CD8<32, CD8VF>;
2279 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2280 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2281 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2283 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2284 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2285 EVEX_CD8<8, CD8VF>, VEX_WIG;
2287 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2288 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2289 EVEX_CD8<16, CD8VF>, VEX_WIG;
2291 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2292 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2293 EVEX_CD8<32, CD8VF>;
2295 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2296 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2297 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2300 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2301 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2302 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2303 return getI8Imm(SSECC, SDLoc(N));
2306 // Swapped operand version of the above.
2307 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2308 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2309 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2310 SSECC = X86::getSwappedVPCMPImm(SSECC);
2311 return getI8Imm(SSECC, SDLoc(N));
2314 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2316 X86FoldableSchedWrite sched,
2317 X86VectorVTInfo _, string Name> {
2318 let isCommutable = 1 in
2319 def rri : AVX512AIi8<opc, MRMSrcReg,
2320 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2321 !strconcat("vpcmp", Suffix,
2322 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2323 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2326 EVEX_4V, Sched<[sched]>;
2327 def rmi : AVX512AIi8<opc, MRMSrcMem,
2328 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2329 !strconcat("vpcmp", Suffix,
2330 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2331 [(set _.KRC:$dst, (_.KVT
2334 (_.VT (_.LdFrag addr:$src2)),
2336 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2337 let isCommutable = 1 in
2338 def rrik : AVX512AIi8<opc, MRMSrcReg,
2339 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2341 !strconcat("vpcmp", Suffix,
2342 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2343 "$dst {${mask}}, $src1, $src2, $cc}"),
2344 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2345 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2348 EVEX_4V, EVEX_K, Sched<[sched]>;
2349 def rmik : AVX512AIi8<opc, MRMSrcMem,
2350 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2352 !strconcat("vpcmp", Suffix,
2353 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2354 "$dst {${mask}}, $src1, $src2, $cc}"),
2355 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2359 (_.VT (_.LdFrag addr:$src2)),
2361 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2363 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2364 (_.VT _.RC:$src1), cond)),
2365 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2366 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2368 def : Pat<(and _.KRCWM:$mask,
2369 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2370 (_.VT _.RC:$src1), cond))),
2371 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2372 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2373 (X86pcmpm_imm_commute $cc))>;
2376 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2377 PatFrag Frag_su, X86FoldableSchedWrite sched,
2378 X86VectorVTInfo _, string Name> :
2379 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2380 def rmib : AVX512AIi8<opc, MRMSrcMem,
2381 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2383 !strconcat("vpcmp", Suffix,
2384 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2385 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2386 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2388 (_.BroadcastLdFrag addr:$src2),
2390 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2391 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2392 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2393 _.ScalarMemOp:$src2, u8imm:$cc),
2394 !strconcat("vpcmp", Suffix,
2395 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2396 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2397 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2400 (_.BroadcastLdFrag addr:$src2),
2402 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2404 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2405 (_.VT _.RC:$src1), cond)),
2406 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2407 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2409 def : Pat<(and _.KRCWM:$mask,
2410 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2411 (_.VT _.RC:$src1), cond))),
2412 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2413 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2414 (X86pcmpm_imm_commute $cc))>;
2417 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2418 PatFrag Frag_su, X86SchedWriteWidths sched,
2419 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2420 let Predicates = [prd] in
2421 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2422 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2424 let Predicates = [prd, HasVLX] in {
2425 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2426 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2427 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2428 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2432 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2433 PatFrag Frag_su, X86SchedWriteWidths sched,
2434 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2435 let Predicates = [prd] in
2436 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2437 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2439 let Predicates = [prd, HasVLX] in {
2440 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2441 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2442 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2443 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2447 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2448 (setcc node:$src1, node:$src2, node:$cc), [{
2449 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2450 return !ISD::isUnsignedIntSetCC(CC);
2453 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2454 (setcc node:$src1, node:$src2, node:$cc), [{
2455 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2456 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2459 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2460 (setcc node:$src1, node:$src2, node:$cc), [{
2461 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2462 return ISD::isUnsignedIntSetCC(CC);
2465 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2466 (setcc node:$src1, node:$src2, node:$cc), [{
2467 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2468 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2471 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2472 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2473 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2475 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2476 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2479 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2480 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2481 VEX_W, EVEX_CD8<16, CD8VF>;
2482 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2483 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2484 VEX_W, EVEX_CD8<16, CD8VF>;
2486 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2487 SchedWriteVecALU, avx512vl_i32_info,
2488 HasAVX512>, EVEX_CD8<32, CD8VF>;
2489 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2490 SchedWriteVecALU, avx512vl_i32_info,
2491 HasAVX512>, EVEX_CD8<32, CD8VF>;
2493 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2494 SchedWriteVecALU, avx512vl_i64_info,
2495 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2496 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2497 SchedWriteVecALU, avx512vl_i64_info,
2498 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2500 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2501 (X86cmpm node:$src1, node:$src2, node:$cc), [{
2502 return N->hasOneUse();
2505 def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2506 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2507 return getI8Imm(Imm, SDLoc(N));
2510 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2512 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2513 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2514 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2516 "$cc, $src2, $src1", "$src1, $src2, $cc",
2517 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2518 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2521 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2522 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2524 "$cc, $src2, $src1", "$src1, $src2, $cc",
2525 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2527 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2529 Sched<[sched.Folded, sched.ReadAfterFold]>;
2531 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2533 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2535 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2536 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2537 (X86any_cmpm (_.VT _.RC:$src1),
2538 (_.VT (_.BroadcastLdFrag addr:$src2)),
2540 (X86cmpm_su (_.VT _.RC:$src1),
2541 (_.VT (_.BroadcastLdFrag addr:$src2)),
2543 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2546 // Patterns for selecting with loads in other operand.
2547 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2549 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2550 (X86cmpm_imm_commute timm:$cc))>;
2552 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2555 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2556 _.RC:$src1, addr:$src2,
2557 (X86cmpm_imm_commute timm:$cc))>;
2559 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2560 (_.VT _.RC:$src1), timm:$cc),
2561 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2562 (X86cmpm_imm_commute timm:$cc))>;
2564 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2567 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2568 _.RC:$src1, addr:$src2,
2569 (X86cmpm_imm_commute timm:$cc))>;
2571 // Patterns for mask intrinsics.
2572 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2573 (_.KVT immAllOnesV)),
2574 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2576 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2577 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2578 _.RC:$src2, timm:$cc)>;
2580 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2581 (_.KVT immAllOnesV)),
2582 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2584 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2586 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2587 addr:$src2, timm:$cc)>;
2589 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2590 (_.KVT immAllOnesV)),
2591 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2593 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2595 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2596 addr:$src2, timm:$cc)>;
2598 // Patterns for mask intrinsics with loads in other operand.
2599 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2600 (_.KVT immAllOnesV)),
2601 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2602 (X86cmpm_imm_commute timm:$cc))>;
2604 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2606 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2607 _.RC:$src1, addr:$src2,
2608 (X86cmpm_imm_commute timm:$cc))>;
2610 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2611 (_.KVT immAllOnesV)),
2612 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2613 (X86cmpm_imm_commute timm:$cc))>;
2615 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2617 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2618 _.RC:$src1, addr:$src2,
2619 (X86cmpm_imm_commute timm:$cc))>;
2622 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2623 // comparison code form (VCMP[EQ/LT/LE/...]
2624 let Uses = [MXCSR] in
2625 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2626 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2627 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2629 "$cc, {sae}, $src2, $src1",
2630 "$src1, $src2, {sae}, $cc",
2631 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2632 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2633 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2634 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2635 EVEX_B, Sched<[sched]>;
2638 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2639 Predicate Pred = HasAVX512> {
2640 let Predicates = [Pred] in {
2641 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2642 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2645 let Predicates = [Pred,HasVLX] in {
2646 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2647 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2651 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2652 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2653 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2654 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2655 defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2656 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
2658 // Patterns to select fp compares with load as first operand.
2659 let Predicates = [HasAVX512] in {
2660 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2661 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2663 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2664 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2667 let Predicates = [HasFP16] in {
2668 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2669 (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2672 // ----------------------------------------------------------------
2675 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2676 (X86Vfpclasss node:$src1, node:$src2), [{
2677 return N->hasOneUse();
2680 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2681 (X86Vfpclass node:$src1, node:$src2), [{
2682 return N->hasOneUse();
2685 //handle fpclass instruction mask = op(reg_scalar,imm)
2686 // op(mem_scalar,imm)
2687 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2688 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2690 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2691 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2692 (ins _.RC:$src1, i32u8imm:$src2),
2693 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2694 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2695 (i32 timm:$src2)))]>,
2697 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2698 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2700 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2701 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2702 (X86Vfpclasss_su (_.VT _.RC:$src1),
2703 (i32 timm:$src2))))]>,
2704 EVEX_K, Sched<[sched]>;
2705 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2706 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2708 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2710 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2711 (i32 timm:$src2)))]>,
2712 Sched<[sched.Folded, sched.ReadAfterFold]>;
2713 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2714 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2716 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2717 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2718 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2719 (i32 timm:$src2))))]>,
2720 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2724 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2725 // fpclass(reg_vec, mem_vec, imm)
2726 // fpclass(reg_vec, broadcast(eltVt), imm)
2727 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2728 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2730 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2731 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2732 (ins _.RC:$src1, i32u8imm:$src2),
2733 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2734 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2735 (i32 timm:$src2)))]>,
2737 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2738 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2740 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2741 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2742 (X86Vfpclass_su (_.VT _.RC:$src1),
2743 (i32 timm:$src2))))]>,
2744 EVEX_K, Sched<[sched]>;
2745 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2746 (ins _.MemOp:$src1, i32u8imm:$src2),
2747 OpcodeStr#_.Suffix#"{"#mem#"}"#
2748 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2749 [(set _.KRC:$dst,(X86Vfpclass
2750 (_.VT (_.LdFrag addr:$src1)),
2751 (i32 timm:$src2)))]>,
2752 Sched<[sched.Folded, sched.ReadAfterFold]>;
2753 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2754 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2755 OpcodeStr#_.Suffix#"{"#mem#"}"#
2756 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2757 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2758 (_.VT (_.LdFrag addr:$src1)),
2759 (i32 timm:$src2))))]>,
2760 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2761 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2762 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2763 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2764 _.BroadcastStr#", $dst|$dst, ${src1}"
2765 #_.BroadcastStr#", $src2}",
2766 [(set _.KRC:$dst,(X86Vfpclass
2767 (_.VT (_.BroadcastLdFrag addr:$src1)),
2768 (i32 timm:$src2)))]>,
2769 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2770 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2771 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2772 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2773 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2774 _.BroadcastStr#", $src2}",
2775 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2776 (_.VT (_.BroadcastLdFrag addr:$src1)),
2777 (i32 timm:$src2))))]>,
2778 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2781 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2783 def : InstAlias<OpcodeStr#_.Suffix#mem#
2784 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2785 (!cast<Instruction>(NAME#"rr")
2786 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2787 def : InstAlias<OpcodeStr#_.Suffix#mem#
2788 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2789 (!cast<Instruction>(NAME#"rrk")
2790 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2791 def : InstAlias<OpcodeStr#_.Suffix#mem#
2792 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2793 _.BroadcastStr#", $src2}",
2794 (!cast<Instruction>(NAME#"rmb")
2795 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2796 def : InstAlias<OpcodeStr#_.Suffix#mem#
2797 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2798 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2799 (!cast<Instruction>(NAME#"rmbk")
2800 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2803 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2804 bits<8> opc, X86SchedWriteWidths sched,
2806 let Predicates = [prd] in {
2807 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2808 _.info512, "z">, EVEX_V512;
2810 let Predicates = [prd, HasVLX] in {
2811 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2812 _.info128, "x">, EVEX_V128;
2813 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2814 _.info256, "y">, EVEX_V256;
2818 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2819 bits<8> opcScalar, X86SchedWriteWidths sched> {
2820 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec,
2822 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2823 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2824 sched.Scl, f16x_info, HasFP16>,
2825 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2826 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2828 EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2829 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2831 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W;
2832 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2833 sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2834 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2835 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2836 sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2837 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W;
2840 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2842 //-----------------------------------------------------------------
2843 // Mask register copy, including
2844 // - copy between mask registers
2845 // - load/store mask registers
2846 // - copy from GPR to mask register and vice versa
2848 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2849 string OpcodeStr, RegisterClass KRC,
2850 ValueType vvt, X86MemOperand x86memop> {
2851 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2852 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2853 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2855 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2856 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2857 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2859 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2860 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2861 [(store KRC:$src, addr:$dst)]>,
2862 Sched<[WriteStore]>;
2865 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2867 RegisterClass KRC, RegisterClass GRC> {
2868 let hasSideEffects = 0 in {
2869 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2870 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2872 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2873 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2878 let Predicates = [HasDQI] in
2879 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2880 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2883 let Predicates = [HasAVX512] in
2884 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2885 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2888 let Predicates = [HasBWI] in {
2889 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2891 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2893 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2895 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2899 // GR from/to mask register
2900 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2901 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2902 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2903 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2904 def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2905 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2907 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2908 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2909 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2910 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2912 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2913 (KMOVWrk VK16:$src)>;
2914 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2915 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2916 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2917 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2918 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2919 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2921 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2922 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2923 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2924 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2925 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2926 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2927 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2928 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2930 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2931 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2932 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2933 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2934 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2935 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2936 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2937 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2940 let Predicates = [HasDQI] in {
2941 def : Pat<(v1i1 (load addr:$src)),
2942 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2943 def : Pat<(v2i1 (load addr:$src)),
2944 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2945 def : Pat<(v4i1 (load addr:$src)),
2946 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2949 let Predicates = [HasAVX512] in {
2950 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2951 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2952 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2953 (KMOVWkm addr:$src)>;
2956 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2957 SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2958 SDTCVecEltisVT<1, i1>,
2961 let Predicates = [HasAVX512] in {
2962 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2963 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2964 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2966 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2967 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2969 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2970 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2972 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2973 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2976 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2977 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2978 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2979 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2980 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2981 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2982 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2984 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2985 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2987 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2991 // Mask unary operation
2993 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2994 RegisterClass KRC, SDPatternOperator OpNode,
2995 X86FoldableSchedWrite sched, Predicate prd> {
2996 let Predicates = [prd] in
2997 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2998 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2999 [(set KRC:$dst, (OpNode KRC:$src))]>,
3003 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
3004 SDPatternOperator OpNode,
3005 X86FoldableSchedWrite sched> {
3006 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3007 sched, HasDQI>, VEX, PD;
3008 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3009 sched, HasAVX512>, VEX, PS;
3010 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3011 sched, HasBWI>, VEX, PD, VEX_W;
3012 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3013 sched, HasBWI>, VEX, PS, VEX_W;
3016 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3017 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
3019 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
3020 let Predicates = [HasAVX512, NoDQI] in
3021 def : Pat<(vnot VK8:$src),
3022 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
3024 def : Pat<(vnot VK4:$src),
3025 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
3026 def : Pat<(vnot VK2:$src),
3027 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
3028 def : Pat<(vnot VK1:$src),
3029 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
3031 // Mask binary operation
3032 // - KAND, KANDN, KOR, KXNOR, KXOR
3033 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
3034 RegisterClass KRC, SDPatternOperator OpNode,
3035 X86FoldableSchedWrite sched, Predicate prd,
3037 let Predicates = [prd], isCommutable = IsCommutable in
3038 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
3039 !strconcat(OpcodeStr,
3040 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3041 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
3045 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
3046 SDPatternOperator OpNode,
3047 X86FoldableSchedWrite sched, bit IsCommutable,
3048 Predicate prdW = HasAVX512> {
3049 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3050 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3051 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3052 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3053 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3054 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3055 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3056 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3059 // These nodes use 'vnot' instead of 'not' to support vectors.
3060 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3061 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3063 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3064 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
3065 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
3066 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
3067 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
3068 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
3069 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3071 multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3073 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3074 // for the DQI set, this type is legal and KxxxB instruction is used
3075 let Predicates = [NoDQI] in
3076 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3078 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3079 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3081 // All types smaller than 8 bits require conversion anyway
3082 def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3083 (COPY_TO_REGCLASS (Inst
3084 (COPY_TO_REGCLASS VK1:$src1, VK16),
3085 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3086 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3087 (COPY_TO_REGCLASS (Inst
3088 (COPY_TO_REGCLASS VK2:$src1, VK16),
3089 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3090 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3091 (COPY_TO_REGCLASS (Inst
3092 (COPY_TO_REGCLASS VK4:$src1, VK16),
3093 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3096 defm : avx512_binop_pat<and, KANDWrr>;
3097 defm : avx512_binop_pat<vandn, KANDNWrr>;
3098 defm : avx512_binop_pat<or, KORWrr>;
3099 defm : avx512_binop_pat<vxnor, KXNORWrr>;
3100 defm : avx512_binop_pat<xor, KXORWrr>;
3103 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3104 X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3106 let Predicates = [prd] in {
3107 let hasSideEffects = 0 in
3108 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3109 (ins Src.KRC:$src1, Src.KRC:$src2),
3110 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3111 VEX_4V, VEX_L, Sched<[sched]>;
3113 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3114 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3118 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
3119 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3120 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3123 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3124 SDNode OpNode, X86FoldableSchedWrite sched,
3126 let Predicates = [prd], Defs = [EFLAGS] in
3127 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3128 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3129 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3133 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3134 X86FoldableSchedWrite sched,
3135 Predicate prdW = HasAVX512> {
3136 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3138 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3140 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3142 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3146 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3147 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3148 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3151 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3152 SDNode OpNode, X86FoldableSchedWrite sched> {
3153 let Predicates = [HasAVX512] in
3154 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3155 !strconcat(OpcodeStr,
3156 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3157 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3161 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3162 SDNode OpNode, X86FoldableSchedWrite sched> {
3163 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3164 sched>, VEX, TAPD, VEX_W;
3165 let Predicates = [HasDQI] in
3166 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3168 let Predicates = [HasBWI] in {
3169 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3170 sched>, VEX, TAPD, VEX_W;
3171 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3176 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3177 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3179 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3180 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3182 X86VectorVTInfo Narrow,
3183 X86VectorVTInfo Wide> {
3184 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3185 (Narrow.VT Narrow.RC:$src2), cond)),
3187 (!cast<Instruction>(InstStr#"Zrri")
3188 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3189 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3190 (X86pcmpm_imm $cc)), Narrow.KRC)>;
3192 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3193 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3194 (Narrow.VT Narrow.RC:$src2),
3196 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3197 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3198 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3199 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3200 (X86pcmpm_imm $cc)), Narrow.KRC)>;
3203 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3205 X86VectorVTInfo Narrow,
3206 X86VectorVTInfo Wide> {
3208 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3209 (Narrow.BroadcastLdFrag addr:$src2), cond)),
3211 (!cast<Instruction>(InstStr#"Zrmib")
3212 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3213 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3215 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3217 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3218 (Narrow.BroadcastLdFrag addr:$src2),
3220 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3221 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3222 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3223 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3225 // Commuted with broadcast load.
3226 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3227 (Narrow.VT Narrow.RC:$src1),
3230 (!cast<Instruction>(InstStr#"Zrmib")
3231 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3232 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3234 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3236 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3237 (Narrow.VT Narrow.RC:$src1),
3239 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3240 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3241 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3242 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3245 // Same as above, but for fp types which don't use PatFrags.
3246 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3247 X86VectorVTInfo Narrow,
3248 X86VectorVTInfo Wide> {
3249 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3250 (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3252 (!cast<Instruction>(InstStr#"Zrri")
3253 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3254 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3255 timm:$cc), Narrow.KRC)>;
3257 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3258 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3259 (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3260 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3261 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3262 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3263 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3264 timm:$cc), Narrow.KRC)>;
3267 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3268 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3270 (!cast<Instruction>(InstStr#"Zrmbi")
3271 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3272 addr:$src2, timm:$cc), Narrow.KRC)>;
3274 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3275 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3276 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3277 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3278 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3279 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3280 addr:$src2, timm:$cc), Narrow.KRC)>;
3282 // Commuted with broadcast load.
3283 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3284 (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3286 (!cast<Instruction>(InstStr#"Zrmbi")
3287 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3288 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3290 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3291 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3292 (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3293 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3294 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3295 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3296 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3299 let Predicates = [HasAVX512, NoVLX] in {
3300 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3301 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3303 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3304 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3306 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3307 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3309 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3310 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3312 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3313 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3315 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3316 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3318 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3319 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3321 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3322 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3324 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3325 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3326 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3327 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3330 let Predicates = [HasBWI, NoVLX] in {
3331 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3332 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3334 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3335 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3337 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3338 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3340 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3341 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3344 // Mask setting all 0s or 1s
3345 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3346 let Predicates = [HasAVX512] in
3347 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3348 SchedRW = [WriteZero] in
3349 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3350 [(set KRC:$dst, (VT Val))]>;
3353 multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3354 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3355 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3356 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3359 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3360 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3362 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3363 let Predicates = [HasAVX512] in {
3364 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3365 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3366 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3367 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3368 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3369 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3370 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3371 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3374 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3375 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3376 RegisterClass RC, ValueType VT> {
3377 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3378 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3380 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3381 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3383 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3384 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3385 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3386 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3387 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3388 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3390 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3391 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3392 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3393 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3394 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3396 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3397 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3398 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3399 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3401 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3402 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3403 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3405 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3406 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3408 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3410 //===----------------------------------------------------------------------===//
3411 // AVX-512 - Aligned and unaligned load and store
3414 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3415 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3416 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3417 bit NoRMPattern = 0,
3418 SDPatternOperator SelectOprr = vselect> {
3419 let hasSideEffects = 0 in {
3420 let isMoveReg = 1 in
3421 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3422 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3423 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3424 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3425 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3426 (ins _.KRCWM:$mask, _.RC:$src),
3427 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3428 "${dst} {${mask}} {z}, $src}"),
3429 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3431 _.ImmAllZerosV)))], _.ExeDomain>,
3432 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3434 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3435 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3436 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3437 !if(NoRMPattern, [],
3439 (_.VT (ld_frag addr:$src)))]),
3440 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3441 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3443 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3444 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3445 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3446 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3447 "${dst} {${mask}}, $src1}"),
3448 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3450 (_.VT _.RC:$src0))))], _.ExeDomain>,
3451 EVEX, EVEX_K, Sched<[Sched.RR]>;
3452 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3453 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3454 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3455 "${dst} {${mask}}, $src1}"),
3456 [(set _.RC:$dst, (_.VT
3457 (vselect_mask _.KRCWM:$mask,
3458 (_.VT (ld_frag addr:$src1)),
3459 (_.VT _.RC:$src0))))], _.ExeDomain>,
3460 EVEX, EVEX_K, Sched<[Sched.RM]>;
3462 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3463 (ins _.KRCWM:$mask, _.MemOp:$src),
3464 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3465 "${dst} {${mask}} {z}, $src}",
3466 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3467 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3468 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3470 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3471 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3473 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3474 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3476 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3477 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3478 _.KRCWM:$mask, addr:$ptr)>;
3481 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3482 AVX512VLVectorVTInfo _, Predicate prd,
3483 X86SchedWriteMoveLSWidths Sched,
3484 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3485 let Predicates = [prd] in
3486 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3487 _.info512.AlignedLdFrag, masked_load_aligned,
3488 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3490 let Predicates = [prd, HasVLX] in {
3491 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3492 _.info256.AlignedLdFrag, masked_load_aligned,
3493 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3494 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3495 _.info128.AlignedLdFrag, masked_load_aligned,
3496 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3500 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3501 AVX512VLVectorVTInfo _, Predicate prd,
3502 X86SchedWriteMoveLSWidths Sched,
3503 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3504 SDPatternOperator SelectOprr = vselect> {
3505 let Predicates = [prd] in
3506 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3507 masked_load, Sched.ZMM, "",
3508 NoRMPattern, SelectOprr>, EVEX_V512;
3510 let Predicates = [prd, HasVLX] in {
3511 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3512 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3513 NoRMPattern, SelectOprr>, EVEX_V256;
3514 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3515 masked_load, Sched.XMM, EVEX2VEXOvrd,
3516 NoRMPattern, SelectOprr>, EVEX_V128;
3520 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3521 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3522 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3523 bit NoMRPattern = 0> {
3524 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3525 let isMoveReg = 1 in
3526 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3527 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3528 [], _.ExeDomain>, EVEX,
3529 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3530 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3531 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3532 (ins _.KRCWM:$mask, _.RC:$src),
3533 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3534 "${dst} {${mask}}, $src}",
3535 [], _.ExeDomain>, EVEX, EVEX_K,
3536 FoldGenData<BaseName#_.ZSuffix#rrk>,
3538 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3539 (ins _.KRCWM:$mask, _.RC:$src),
3540 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3541 "${dst} {${mask}} {z}, $src}",
3542 [], _.ExeDomain>, EVEX, EVEX_KZ,
3543 FoldGenData<BaseName#_.ZSuffix#rrkz>,
3547 let hasSideEffects = 0, mayStore = 1 in
3548 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3549 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3550 !if(NoMRPattern, [],
3551 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3552 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3553 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3554 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3555 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3556 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3557 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3560 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3561 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3562 _.KRCWM:$mask, _.RC:$src)>;
3564 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3565 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3566 _.RC:$dst, _.RC:$src), 0>;
3567 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3568 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3569 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3570 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3571 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3572 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3575 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3576 AVX512VLVectorVTInfo _, Predicate prd,
3577 X86SchedWriteMoveLSWidths Sched,
3578 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3579 let Predicates = [prd] in
3580 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3581 masked_store, Sched.ZMM, "",
3582 NoMRPattern>, EVEX_V512;
3583 let Predicates = [prd, HasVLX] in {
3584 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3585 masked_store, Sched.YMM,
3586 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3587 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3588 masked_store, Sched.XMM, EVEX2VEXOvrd,
3589 NoMRPattern>, EVEX_V128;
3593 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3594 AVX512VLVectorVTInfo _, Predicate prd,
3595 X86SchedWriteMoveLSWidths Sched,
3596 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3597 let Predicates = [prd] in
3598 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3599 masked_store_aligned, Sched.ZMM, "",
3600 NoMRPattern>, EVEX_V512;
3602 let Predicates = [prd, HasVLX] in {
3603 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3604 masked_store_aligned, Sched.YMM,
3605 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3606 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3607 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3608 NoMRPattern>, EVEX_V128;
3612 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3613 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3614 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3615 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3616 PS, EVEX_CD8<32, CD8VF>;
3618 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3619 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3620 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3621 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3622 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3624 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3625 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3626 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3627 SchedWriteFMoveLS, "VMOVUPS">,
3628 PS, EVEX_CD8<32, CD8VF>;
3630 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3631 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3632 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3633 SchedWriteFMoveLS, "VMOVUPD">,
3634 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3636 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3637 HasAVX512, SchedWriteVecMoveLS,
3639 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3640 HasAVX512, SchedWriteVecMoveLS,
3642 PD, EVEX_CD8<32, CD8VF>;
3644 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3645 HasAVX512, SchedWriteVecMoveLS,
3647 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3648 HasAVX512, SchedWriteVecMoveLS,
3650 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3652 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3653 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3654 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3655 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3656 XD, EVEX_CD8<8, CD8VF>;
3658 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3659 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3660 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3661 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3662 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3664 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3665 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3666 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3667 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3668 XS, EVEX_CD8<32, CD8VF>;
3670 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3671 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3672 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3673 SchedWriteVecMoveLS, "VMOVDQU">,
3674 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3676 // Special instructions to help with spilling when we don't have VLX. We need
3677 // to load or store from a ZMM register instead. These are converted in
3678 // expandPostRAPseudos.
3679 let isReMaterializable = 1, canFoldAsLoad = 1,
3680 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3681 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3682 "", []>, Sched<[WriteFLoadX]>;
3683 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3684 "", []>, Sched<[WriteFLoadY]>;
3685 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3686 "", []>, Sched<[WriteFLoadX]>;
3687 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3688 "", []>, Sched<[WriteFLoadY]>;
3691 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3692 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3693 "", []>, Sched<[WriteFStoreX]>;
3694 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3695 "", []>, Sched<[WriteFStoreY]>;
3696 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3697 "", []>, Sched<[WriteFStoreX]>;
3698 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3699 "", []>, Sched<[WriteFStoreY]>;
3702 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3703 (v8i64 VR512:$src))),
3704 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3707 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3708 (v16i32 VR512:$src))),
3709 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3711 // These patterns exist to prevent the above patterns from introducing a second
3712 // mask inversion when one already exists.
3713 def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3714 (v8i64 immAllZerosV),
3715 (v8i64 VR512:$src))),
3716 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3717 def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3718 (v16i32 immAllZerosV),
3719 (v16i32 VR512:$src))),
3720 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3722 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3723 X86VectorVTInfo Wide> {
3724 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3725 Narrow.RC:$src1, Narrow.RC:$src0)),
3728 (!cast<Instruction>(InstrStr#"rrk")
3729 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3730 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3731 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3734 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3735 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3738 (!cast<Instruction>(InstrStr#"rrkz")
3739 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3740 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3744 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3745 // available. Use a 512-bit operation and extract.
3746 let Predicates = [HasAVX512, NoVLX] in {
3747 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3748 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3749 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3750 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3752 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3753 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3754 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3755 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3758 let Predicates = [HasBWI, NoVLX] in {
3759 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3760 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3762 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3763 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3766 let Predicates = [HasAVX512] in {
3768 def : Pat<(alignedloadv16i32 addr:$src),
3769 (VMOVDQA64Zrm addr:$src)>;
3770 def : Pat<(alignedloadv32i16 addr:$src),
3771 (VMOVDQA64Zrm addr:$src)>;
3772 def : Pat<(alignedloadv64i8 addr:$src),
3773 (VMOVDQA64Zrm addr:$src)>;
3774 def : Pat<(loadv16i32 addr:$src),
3775 (VMOVDQU64Zrm addr:$src)>;
3776 def : Pat<(loadv32i16 addr:$src),
3777 (VMOVDQU64Zrm addr:$src)>;
3778 def : Pat<(loadv64i8 addr:$src),
3779 (VMOVDQU64Zrm addr:$src)>;
3782 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3783 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3784 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3785 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3786 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3787 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3788 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3789 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3790 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3791 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3792 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3793 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3796 let Predicates = [HasVLX] in {
3798 def : Pat<(alignedloadv4i32 addr:$src),
3799 (VMOVDQA64Z128rm addr:$src)>;
3800 def : Pat<(alignedloadv8i16 addr:$src),
3801 (VMOVDQA64Z128rm addr:$src)>;
3802 def : Pat<(alignedloadv16i8 addr:$src),
3803 (VMOVDQA64Z128rm addr:$src)>;
3804 def : Pat<(loadv4i32 addr:$src),
3805 (VMOVDQU64Z128rm addr:$src)>;
3806 def : Pat<(loadv8i16 addr:$src),
3807 (VMOVDQU64Z128rm addr:$src)>;
3808 def : Pat<(loadv16i8 addr:$src),
3809 (VMOVDQU64Z128rm addr:$src)>;
3812 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3813 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3814 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3815 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3816 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3817 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3818 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3819 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3820 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3821 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3822 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3823 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3826 def : Pat<(alignedloadv8i32 addr:$src),
3827 (VMOVDQA64Z256rm addr:$src)>;
3828 def : Pat<(alignedloadv16i16 addr:$src),
3829 (VMOVDQA64Z256rm addr:$src)>;
3830 def : Pat<(alignedloadv32i8 addr:$src),
3831 (VMOVDQA64Z256rm addr:$src)>;
3832 def : Pat<(loadv8i32 addr:$src),
3833 (VMOVDQU64Z256rm addr:$src)>;
3834 def : Pat<(loadv16i16 addr:$src),
3835 (VMOVDQU64Z256rm addr:$src)>;
3836 def : Pat<(loadv32i8 addr:$src),
3837 (VMOVDQU64Z256rm addr:$src)>;
3840 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3841 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3842 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3843 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3844 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3845 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3846 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3847 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3848 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3849 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3850 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3851 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3853 let Predicates = [HasFP16] in {
3854 def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))),
3855 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3856 def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)),
3857 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3858 def : Pat<(v32f16 (alignedloadv32f16 addr:$src)),
3859 (VMOVAPSZrm addr:$src)>;
3860 def : Pat<(v32f16 (vselect VK32WM:$mask,
3861 (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3862 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3863 def : Pat<(v32f16 (vselect VK32WM:$mask,
3864 (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
3865 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3866 def : Pat<(v32f16 (loadv32f16 addr:$src)),
3867 (VMOVUPSZrm addr:$src)>;
3868 def : Pat<(v32f16 (vselect VK32WM:$mask,
3869 (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3870 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3871 def : Pat<(v32f16 (vselect VK32WM:$mask,
3872 (v32f16 (loadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
3873 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3874 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, (v32f16 VR512:$src0))),
3875 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3876 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, undef)),
3877 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3878 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)),
3879 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3881 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3882 (VMOVAPSZmr addr:$dst, VR512:$src)>;
3883 def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3884 (VMOVUPSZmr addr:$dst, VR512:$src)>;
3885 def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask),
3886 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3888 let Predicates = [HasFP16, HasVLX] in {
3889 def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))),
3890 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3891 def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)),
3892 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3893 def : Pat<(v16f16 (alignedloadv16f16 addr:$src)),
3894 (VMOVAPSZ256rm addr:$src)>;
3895 def : Pat<(v16f16 (vselect VK16WM:$mask,
3896 (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3897 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3898 def : Pat<(v16f16 (vselect VK16WM:$mask,
3899 (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
3900 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3901 def : Pat<(v16f16 (loadv16f16 addr:$src)),
3902 (VMOVUPSZ256rm addr:$src)>;
3903 def : Pat<(v16f16 (vselect VK16WM:$mask,
3904 (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3905 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3906 def : Pat<(v16f16 (vselect VK16WM:$mask,
3907 (v16f16 (loadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
3908 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3909 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, (v16f16 VR256X:$src0))),
3910 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3911 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, undef)),
3912 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3913 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)),
3914 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3916 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3917 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3918 def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3919 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3920 def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask),
3921 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3923 def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))),
3924 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3925 def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)),
3926 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3927 def : Pat<(v8f16 (alignedloadv8f16 addr:$src)),
3928 (VMOVAPSZ128rm addr:$src)>;
3929 def : Pat<(v8f16 (vselect VK8WM:$mask,
3930 (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3931 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3932 def : Pat<(v8f16 (vselect VK8WM:$mask,
3933 (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
3934 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3935 def : Pat<(v8f16 (loadv8f16 addr:$src)),
3936 (VMOVUPSZ128rm addr:$src)>;
3937 def : Pat<(v8f16 (vselect VK8WM:$mask,
3938 (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3939 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3940 def : Pat<(v8f16 (vselect VK8WM:$mask,
3941 (v8f16 (loadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
3942 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3943 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, (v8f16 VR128X:$src0))),
3944 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3945 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, undef)),
3946 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3947 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)),
3948 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3950 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3951 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3952 def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3953 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3954 def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask),
3955 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3958 // Move Int Doubleword to Packed Double Int
3960 let ExeDomain = SSEPackedInt in {
3961 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3962 "vmovd\t{$src, $dst|$dst, $src}",
3964 (v4i32 (scalar_to_vector GR32:$src)))]>,
3965 EVEX, Sched<[WriteVecMoveFromGpr]>;
3966 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3967 "vmovd\t{$src, $dst|$dst, $src}",
3969 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3970 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3971 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3972 "vmovq\t{$src, $dst|$dst, $src}",
3974 (v2i64 (scalar_to_vector GR64:$src)))]>,
3975 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3976 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3977 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3979 "vmovq\t{$src, $dst|$dst, $src}", []>,
3980 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3981 let isCodeGenOnly = 1 in {
3982 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3983 "vmovq\t{$src, $dst|$dst, $src}",
3984 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3985 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3986 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3987 "vmovq\t{$src, $dst|$dst, $src}",
3988 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3989 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3991 } // ExeDomain = SSEPackedInt
3993 // Move Int Doubleword to Single Scalar
3995 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3996 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3997 "vmovd\t{$src, $dst|$dst, $src}",
3998 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3999 EVEX, Sched<[WriteVecMoveFromGpr]>;
4000 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4002 // Move doubleword from xmm register to r/m32
4004 let ExeDomain = SSEPackedInt in {
4005 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
4006 "vmovd\t{$src, $dst|$dst, $src}",
4007 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
4009 EVEX, Sched<[WriteVecMoveToGpr]>;
4010 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
4011 (ins i32mem:$dst, VR128X:$src),
4012 "vmovd\t{$src, $dst|$dst, $src}",
4013 [(store (i32 (extractelt (v4i32 VR128X:$src),
4014 (iPTR 0))), addr:$dst)]>,
4015 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
4016 } // ExeDomain = SSEPackedInt
4018 // Move quadword from xmm1 register to r/m64
4020 let ExeDomain = SSEPackedInt in {
4021 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
4022 "vmovq\t{$src, $dst|$dst, $src}",
4023 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
4025 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
4026 Requires<[HasAVX512]>;
4028 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
4029 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
4030 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
4031 EVEX, VEX_W, Sched<[WriteVecStore]>,
4032 Requires<[HasAVX512, In64BitMode]>;
4034 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
4035 (ins i64mem:$dst, VR128X:$src),
4036 "vmovq\t{$src, $dst|$dst, $src}",
4037 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
4039 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
4040 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
4042 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
4043 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
4045 "vmovq\t{$src, $dst|$dst, $src}", []>,
4046 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
4047 } // ExeDomain = SSEPackedInt
4049 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
4050 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
4052 let Predicates = [HasAVX512] in {
4053 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
4054 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
4057 // Move Scalar Single to Double Int
4059 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4060 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
4062 "vmovd\t{$src, $dst|$dst, $src}",
4063 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
4064 EVEX, Sched<[WriteVecMoveToGpr]>;
4065 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4067 // Move Quadword Int to Packed Quadword Int
4069 let ExeDomain = SSEPackedInt in {
4070 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
4072 "vmovq\t{$src, $dst|$dst, $src}",
4074 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
4075 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
4076 } // ExeDomain = SSEPackedInt
4078 // Allow "vmovd" but print "vmovq".
4079 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4080 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
4081 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4082 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
4084 // Conversions between masks and scalar fp.
4085 def : Pat<(v32i1 (bitconvert FR32X:$src)),
4086 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
4087 def : Pat<(f32 (bitconvert VK32:$src)),
4088 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
4090 def : Pat<(v64i1 (bitconvert FR64X:$src)),
4091 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
4092 def : Pat<(f64 (bitconvert VK64:$src)),
4093 (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
4095 //===----------------------------------------------------------------------===//
4096 // AVX-512 MOVSH, MOVSS, MOVSD
4097 //===----------------------------------------------------------------------===//
4099 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
4101 list<Predicate> prd = [HasAVX512, OptForSize]> {
4102 let Predicates = prd in
4103 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4104 (ins _.RC:$src1, _.RC:$src2),
4105 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4106 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
4107 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
4108 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4109 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4110 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
4111 "$dst {${mask}} {z}, $src1, $src2}"),
4112 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4113 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4115 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
4116 let Constraints = "$src0 = $dst" in
4117 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4118 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4119 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
4120 "$dst {${mask}}, $src1, $src2}"),
4121 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4122 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4123 (_.VT _.RC:$src0))))],
4124 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
4125 let canFoldAsLoad = 1, isReMaterializable = 1 in {
4126 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
4127 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4128 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
4129 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4130 // _alt version uses FR32/FR64 register class.
4131 let isCodeGenOnly = 1 in
4132 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
4133 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4134 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
4135 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4137 let mayLoad = 1, hasSideEffects = 0 in {
4138 let Constraints = "$src0 = $dst" in
4139 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4140 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
4141 !strconcat(asm, "\t{$src, $dst {${mask}}|",
4142 "$dst {${mask}}, $src}"),
4143 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
4144 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4145 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
4146 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
4147 "$dst {${mask}} {z}, $src}"),
4148 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
4150 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4151 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4152 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
4153 EVEX, Sched<[WriteFStore]>;
4154 let mayStore = 1, hasSideEffects = 0 in
4155 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4156 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4157 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4158 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4162 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4163 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4165 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4166 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4168 defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
4170 VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
4172 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4173 PatLeaf ZeroFP, X86VectorVTInfo _> {
4175 def : Pat<(_.VT (OpNode _.RC:$src0,
4176 (_.VT (scalar_to_vector
4177 (_.EltVT (X86selects VK1WM:$mask,
4178 (_.EltVT _.FRC:$src1),
4179 (_.EltVT _.FRC:$src2))))))),
4180 (!cast<Instruction>(InstrStr#rrk)
4181 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4184 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4186 def : Pat<(_.VT (OpNode _.RC:$src0,
4187 (_.VT (scalar_to_vector
4188 (_.EltVT (X86selects VK1WM:$mask,
4189 (_.EltVT _.FRC:$src1),
4190 (_.EltVT ZeroFP))))))),
4191 (!cast<Instruction>(InstrStr#rrkz)
4194 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4197 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4198 dag Mask, RegisterClass MaskRC> {
4200 def : Pat<(masked_store
4201 (_.info512.VT (insert_subvector undef,
4202 (_.info128.VT _.info128.RC:$src),
4203 (iPTR 0))), addr:$dst, Mask),
4204 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4205 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4206 _.info128.RC:$src)>;
4210 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4211 AVX512VLVectorVTInfo _,
4212 dag Mask, RegisterClass MaskRC,
4213 SubRegIndex subreg> {
4215 def : Pat<(masked_store
4216 (_.info512.VT (insert_subvector undef,
4217 (_.info128.VT _.info128.RC:$src),
4218 (iPTR 0))), addr:$dst, Mask),
4219 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4220 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4221 _.info128.RC:$src)>;
4225 // This matches the more recent codegen from clang that avoids emitting a 512
4226 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4227 // bits on AVX512F only targets.
4228 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4229 AVX512VLVectorVTInfo _,
4230 dag Mask512, dag Mask128,
4231 RegisterClass MaskRC,
4232 SubRegIndex subreg> {
4235 def : Pat<(masked_store
4236 (_.info512.VT (insert_subvector undef,
4237 (_.info128.VT _.info128.RC:$src),
4238 (iPTR 0))), addr:$dst, Mask512),
4239 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4240 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4241 _.info128.RC:$src)>;
4243 // AVX512VL pattern.
4244 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4245 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4246 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4247 _.info128.RC:$src)>;
4250 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4251 dag Mask, RegisterClass MaskRC> {
4253 def : Pat<(_.info128.VT (extract_subvector
4254 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4255 _.info512.ImmAllZerosV)),
4257 (!cast<Instruction>(InstrStr#rmkz)
4258 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4261 def : Pat<(_.info128.VT (extract_subvector
4262 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4263 (_.info512.VT (insert_subvector undef,
4264 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4267 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4268 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4273 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4274 AVX512VLVectorVTInfo _,
4275 dag Mask, RegisterClass MaskRC,
4276 SubRegIndex subreg> {
4278 def : Pat<(_.info128.VT (extract_subvector
4279 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4280 _.info512.ImmAllZerosV)),
4282 (!cast<Instruction>(InstrStr#rmkz)
4283 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4286 def : Pat<(_.info128.VT (extract_subvector
4287 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4288 (_.info512.VT (insert_subvector undef,
4289 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4292 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4293 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4298 // This matches the more recent codegen from clang that avoids emitting a 512
4299 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4300 // bits on AVX512F only targets.
4301 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4302 AVX512VLVectorVTInfo _,
4303 dag Mask512, dag Mask128,
4304 RegisterClass MaskRC,
4305 SubRegIndex subreg> {
4306 // AVX512F patterns.
4307 def : Pat<(_.info128.VT (extract_subvector
4308 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4309 _.info512.ImmAllZerosV)),
4311 (!cast<Instruction>(InstrStr#rmkz)
4312 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4315 def : Pat<(_.info128.VT (extract_subvector
4316 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4317 (_.info512.VT (insert_subvector undef,
4318 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4321 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4322 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4325 // AVX512Vl patterns.
4326 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4327 _.info128.ImmAllZerosV)),
4328 (!cast<Instruction>(InstrStr#rmkz)
4329 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4332 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4333 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4334 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4335 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4339 defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4340 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4341 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4343 defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4344 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4345 defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4346 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4347 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4348 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4349 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4350 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4351 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4352 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4354 defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4355 (v32i1 (insert_subvector
4356 (v32i1 immAllZerosV),
4357 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4359 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4361 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4362 (v16i1 (insert_subvector
4363 (v16i1 immAllZerosV),
4364 (v4i1 (extract_subvector
4365 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4368 (v4i1 (extract_subvector
4369 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4370 (iPTR 0))), GR8, sub_8bit>;
4371 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4376 (v16i1 immAllZerosV),
4377 (v2i1 (extract_subvector
4378 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4382 (v2i1 (extract_subvector
4383 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4384 (iPTR 0))), GR8, sub_8bit>;
4386 defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4387 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4388 defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4389 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4390 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4391 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4392 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4393 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4394 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4395 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4397 defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4398 (v32i1 (insert_subvector
4399 (v32i1 immAllZerosV),
4400 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4402 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4404 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4405 (v16i1 (insert_subvector
4406 (v16i1 immAllZerosV),
4407 (v4i1 (extract_subvector
4408 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4411 (v4i1 (extract_subvector
4412 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4413 (iPTR 0))), GR8, sub_8bit>;
4414 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4419 (v16i1 immAllZerosV),
4420 (v2i1 (extract_subvector
4421 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4425 (v2i1 (extract_subvector
4426 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4427 (iPTR 0))), GR8, sub_8bit>;
4429 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4430 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4431 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4432 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4433 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4435 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4436 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4437 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4439 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4440 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4441 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4442 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4443 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4445 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4446 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4447 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4449 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4451 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4452 VK1WM:$mask, addr:$src)),
4454 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4455 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4457 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4458 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4459 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4460 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4461 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4463 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4464 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4465 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4467 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4469 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4470 VK1WM:$mask, addr:$src)),
4472 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4473 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4476 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4477 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4478 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4479 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4481 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4482 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4483 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4484 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4486 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4487 let Predicates = [HasFP16] in {
4488 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4489 (ins VR128X:$src1, VR128X:$src2),
4490 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4491 []>, T_MAP5XS, EVEX_4V, VEX_LIG,
4492 FoldGenData<"VMOVSHZrr">,
4493 Sched<[SchedWriteFShuffle.XMM]>;
4495 let Constraints = "$src0 = $dst" in
4496 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4497 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4498 VR128X:$src1, VR128X:$src2),
4499 "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4500 "$dst {${mask}}, $src1, $src2}",
4501 []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
4502 FoldGenData<"VMOVSHZrrk">,
4503 Sched<[SchedWriteFShuffle.XMM]>;
4505 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4506 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4507 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4508 "$dst {${mask}} {z}, $src1, $src2}",
4509 []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
4510 FoldGenData<"VMOVSHZrrkz">,
4511 Sched<[SchedWriteFShuffle.XMM]>;
4513 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4514 (ins VR128X:$src1, VR128X:$src2),
4515 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4516 []>, XS, EVEX_4V, VEX_LIG,
4517 FoldGenData<"VMOVSSZrr">,
4518 Sched<[SchedWriteFShuffle.XMM]>;
4520 let Constraints = "$src0 = $dst" in
4521 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4522 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4523 VR128X:$src1, VR128X:$src2),
4524 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4525 "$dst {${mask}}, $src1, $src2}",
4526 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4527 FoldGenData<"VMOVSSZrrk">,
4528 Sched<[SchedWriteFShuffle.XMM]>;
4530 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4531 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4532 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4533 "$dst {${mask}} {z}, $src1, $src2}",
4534 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4535 FoldGenData<"VMOVSSZrrkz">,
4536 Sched<[SchedWriteFShuffle.XMM]>;
4538 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4539 (ins VR128X:$src1, VR128X:$src2),
4540 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4541 []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4542 FoldGenData<"VMOVSDZrr">,
4543 Sched<[SchedWriteFShuffle.XMM]>;
4545 let Constraints = "$src0 = $dst" in
4546 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4547 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4548 VR128X:$src1, VR128X:$src2),
4549 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4550 "$dst {${mask}}, $src1, $src2}",
4551 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4552 VEX_W, FoldGenData<"VMOVSDZrrk">,
4553 Sched<[SchedWriteFShuffle.XMM]>;
4555 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4556 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4558 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4559 "$dst {${mask}} {z}, $src1, $src2}",
4560 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4561 VEX_W, FoldGenData<"VMOVSDZrrkz">,
4562 Sched<[SchedWriteFShuffle.XMM]>;
4565 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4566 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4567 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4568 "$dst {${mask}}, $src1, $src2}",
4569 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4570 VR128X:$src1, VR128X:$src2), 0>;
4571 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4572 "$dst {${mask}} {z}, $src1, $src2}",
4573 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4574 VR128X:$src1, VR128X:$src2), 0>;
4575 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4576 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4577 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4578 "$dst {${mask}}, $src1, $src2}",
4579 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4580 VR128X:$src1, VR128X:$src2), 0>;
4581 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4582 "$dst {${mask}} {z}, $src1, $src2}",
4583 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4584 VR128X:$src1, VR128X:$src2), 0>;
4585 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4586 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4587 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4588 "$dst {${mask}}, $src1, $src2}",
4589 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4590 VR128X:$src1, VR128X:$src2), 0>;
4591 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4592 "$dst {${mask}} {z}, $src1, $src2}",
4593 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4594 VR128X:$src1, VR128X:$src2), 0>;
4596 let Predicates = [HasAVX512, OptForSize] in {
4597 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4598 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4599 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4600 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4602 // Move low f32 and clear high bits.
4603 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4604 (SUBREG_TO_REG (i32 0),
4605 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4606 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4607 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4608 (SUBREG_TO_REG (i32 0),
4609 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4610 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4612 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4613 (SUBREG_TO_REG (i32 0),
4614 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4615 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4616 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4617 (SUBREG_TO_REG (i32 0),
4618 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4619 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4622 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4623 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4624 let Predicates = [HasAVX512, OptForSpeed] in {
4625 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4626 (SUBREG_TO_REG (i32 0),
4627 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4628 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4629 (i8 1))), sub_xmm)>;
4630 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4631 (SUBREG_TO_REG (i32 0),
4632 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4633 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4634 (i8 3))), sub_xmm)>;
4637 let Predicates = [HasAVX512] in {
4638 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4639 (VMOVSSZrm addr:$src)>;
4640 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4641 (VMOVSDZrm addr:$src)>;
4643 // Represent the same patterns above but in the form they appear for
4645 def : Pat<(v8f32 (X86vzload32 addr:$src)),
4646 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4647 def : Pat<(v4f64 (X86vzload64 addr:$src)),
4648 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4650 // Represent the same patterns above but in the form they appear for
4652 def : Pat<(v16f32 (X86vzload32 addr:$src)),
4653 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4654 def : Pat<(v8f64 (X86vzload64 addr:$src)),
4655 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4657 let Predicates = [HasFP16] in {
4658 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4659 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4661 // FIXME we need better canonicalization in dag combine
4662 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4663 (SUBREG_TO_REG (i32 0),
4664 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4665 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4666 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4667 (SUBREG_TO_REG (i32 0),
4668 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4669 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4671 def : Pat<(v8f16 (X86vzload16 addr:$src)),
4672 (VMOVSHZrm addr:$src)>;
4674 def : Pat<(v16f16 (X86vzload16 addr:$src)),
4675 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4677 def : Pat<(v32f16 (X86vzload16 addr:$src)),
4678 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4681 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4682 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4684 "vmovq\t{$src, $dst|$dst, $src}",
4685 [(set VR128X:$dst, (v2i64 (X86vzmovl
4686 (v2i64 VR128X:$src))))]>,
4690 let Predicates = [HasAVX512] in {
4691 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4692 (VMOVDI2PDIZrr GR32:$src)>;
4694 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4695 (VMOV64toPQIZrr GR64:$src)>;
4697 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4698 def : Pat<(v4i32 (X86vzload32 addr:$src)),
4699 (VMOVDI2PDIZrm addr:$src)>;
4700 def : Pat<(v8i32 (X86vzload32 addr:$src)),
4701 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4702 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4703 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4704 def : Pat<(v2i64 (X86vzload64 addr:$src)),
4705 (VMOVQI2PQIZrm addr:$src)>;
4706 def : Pat<(v4i64 (X86vzload64 addr:$src)),
4707 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4709 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4710 def : Pat<(v16i32 (X86vzload32 addr:$src)),
4711 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4712 def : Pat<(v8i64 (X86vzload64 addr:$src)),
4713 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4715 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4716 (SUBREG_TO_REG (i32 0),
4717 (v2f64 (VMOVZPQILo2PQIZrr
4718 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4720 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4721 (SUBREG_TO_REG (i32 0),
4722 (v2i64 (VMOVZPQILo2PQIZrr
4723 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4726 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4727 (SUBREG_TO_REG (i32 0),
4728 (v2f64 (VMOVZPQILo2PQIZrr
4729 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4731 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4732 (SUBREG_TO_REG (i32 0),
4733 (v2i64 (VMOVZPQILo2PQIZrr
4734 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4738 //===----------------------------------------------------------------------===//
4739 // AVX-512 - Non-temporals
4740 //===----------------------------------------------------------------------===//
4742 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4743 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4744 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4745 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4747 let Predicates = [HasVLX] in {
4748 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4750 "vmovntdqa\t{$src, $dst|$dst, $src}",
4751 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4752 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4754 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4756 "vmovntdqa\t{$src, $dst|$dst, $src}",
4757 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4758 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4761 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4762 X86SchedWriteMoveLS Sched,
4763 PatFrag st_frag = alignednontemporalstore> {
4764 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4765 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4766 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4767 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4768 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4771 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4772 AVX512VLVectorVTInfo VTInfo,
4773 X86SchedWriteMoveLSWidths Sched> {
4774 let Predicates = [HasAVX512] in
4775 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4777 let Predicates = [HasAVX512, HasVLX] in {
4778 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4779 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4783 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4784 SchedWriteVecMoveLSNT>, PD;
4785 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4786 SchedWriteFMoveLSNT>, PD, VEX_W;
4787 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4788 SchedWriteFMoveLSNT>, PS;
4790 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4791 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4792 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4793 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4794 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4795 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4796 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4798 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4799 (VMOVNTDQAZrm addr:$src)>;
4800 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4801 (VMOVNTDQAZrm addr:$src)>;
4802 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4803 (VMOVNTDQAZrm addr:$src)>;
4804 def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4805 (VMOVNTDQAZrm addr:$src)>;
4806 def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4807 (VMOVNTDQAZrm addr:$src)>;
4808 def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4809 (VMOVNTDQAZrm addr:$src)>;
4812 let Predicates = [HasVLX], AddedComplexity = 400 in {
4813 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4814 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4815 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4816 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4817 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4818 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4820 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4821 (VMOVNTDQAZ256rm addr:$src)>;
4822 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4823 (VMOVNTDQAZ256rm addr:$src)>;
4824 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4825 (VMOVNTDQAZ256rm addr:$src)>;
4826 def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4827 (VMOVNTDQAZ256rm addr:$src)>;
4828 def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4829 (VMOVNTDQAZ256rm addr:$src)>;
4830 def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4831 (VMOVNTDQAZ256rm addr:$src)>;
4833 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4834 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4835 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4836 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4837 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4838 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4840 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4841 (VMOVNTDQAZ128rm addr:$src)>;
4842 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4843 (VMOVNTDQAZ128rm addr:$src)>;
4844 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4845 (VMOVNTDQAZ128rm addr:$src)>;
4846 def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4847 (VMOVNTDQAZ128rm addr:$src)>;
4848 def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4849 (VMOVNTDQAZ128rm addr:$src)>;
4850 def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4851 (VMOVNTDQAZ128rm addr:$src)>;
4854 //===----------------------------------------------------------------------===//
4855 // AVX-512 - Integer arithmetic
4857 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4858 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4859 bit IsCommutable = 0> {
4860 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4861 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4862 "$src2, $src1", "$src1, $src2",
4863 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4864 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4867 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4868 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4869 "$src2, $src1", "$src1, $src2",
4870 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4871 AVX512BIBase, EVEX_4V,
4872 Sched<[sched.Folded, sched.ReadAfterFold]>;
4875 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4876 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4877 bit IsCommutable = 0> :
4878 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4879 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4880 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4881 "${src2}"#_.BroadcastStr#", $src1",
4882 "$src1, ${src2}"#_.BroadcastStr,
4883 (_.VT (OpNode _.RC:$src1,
4884 (_.BroadcastLdFrag addr:$src2)))>,
4885 AVX512BIBase, EVEX_4V, EVEX_B,
4886 Sched<[sched.Folded, sched.ReadAfterFold]>;
4889 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4890 AVX512VLVectorVTInfo VTInfo,
4891 X86SchedWriteWidths sched, Predicate prd,
4892 bit IsCommutable = 0> {
4893 let Predicates = [prd] in
4894 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4895 IsCommutable>, EVEX_V512;
4897 let Predicates = [prd, HasVLX] in {
4898 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4899 sched.YMM, IsCommutable>, EVEX_V256;
4900 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4901 sched.XMM, IsCommutable>, EVEX_V128;
4905 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4906 AVX512VLVectorVTInfo VTInfo,
4907 X86SchedWriteWidths sched, Predicate prd,
4908 bit IsCommutable = 0> {
4909 let Predicates = [prd] in
4910 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4911 IsCommutable>, EVEX_V512;
4913 let Predicates = [prd, HasVLX] in {
4914 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4915 sched.YMM, IsCommutable>, EVEX_V256;
4916 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4917 sched.XMM, IsCommutable>, EVEX_V128;
4921 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4922 X86SchedWriteWidths sched, Predicate prd,
4923 bit IsCommutable = 0> {
4924 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4925 sched, prd, IsCommutable>,
4926 VEX_W, EVEX_CD8<64, CD8VF>;
4929 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4930 X86SchedWriteWidths sched, Predicate prd,
4931 bit IsCommutable = 0> {
4932 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4933 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4936 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4937 X86SchedWriteWidths sched, Predicate prd,
4938 bit IsCommutable = 0> {
4939 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4940 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4944 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4945 X86SchedWriteWidths sched, Predicate prd,
4946 bit IsCommutable = 0> {
4947 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4948 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4952 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4953 SDNode OpNode, X86SchedWriteWidths sched,
4954 Predicate prd, bit IsCommutable = 0> {
4955 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4958 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4962 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4963 SDNode OpNode, X86SchedWriteWidths sched,
4964 Predicate prd, bit IsCommutable = 0> {
4965 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4968 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4972 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4973 bits<8> opc_d, bits<8> opc_q,
4974 string OpcodeStr, SDNode OpNode,
4975 X86SchedWriteWidths sched,
4976 bit IsCommutable = 0> {
4977 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4978 sched, HasAVX512, IsCommutable>,
4979 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4980 sched, HasBWI, IsCommutable>;
4983 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4984 X86FoldableSchedWrite sched,
4985 SDNode OpNode,X86VectorVTInfo _Src,
4986 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4987 bit IsCommutable = 0> {
4988 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4989 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4990 "$src2, $src1","$src1, $src2",
4992 (_Src.VT _Src.RC:$src1),
4993 (_Src.VT _Src.RC:$src2))),
4995 AVX512BIBase, EVEX_4V, Sched<[sched]>;
4996 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4997 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4998 "$src2, $src1", "$src1, $src2",
4999 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5000 (_Src.LdFrag addr:$src2)))>,
5001 AVX512BIBase, EVEX_4V,
5002 Sched<[sched.Folded, sched.ReadAfterFold]>;
5004 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5005 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
5007 "${src2}"#_Brdct.BroadcastStr#", $src1",
5008 "$src1, ${src2}"#_Brdct.BroadcastStr,
5009 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5010 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
5011 AVX512BIBase, EVEX_4V, EVEX_B,
5012 Sched<[sched.Folded, sched.ReadAfterFold]>;
5015 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
5016 SchedWriteVecALU, 1>;
5017 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
5018 SchedWriteVecALU, 0>;
5019 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
5020 SchedWriteVecALU, HasBWI, 1>;
5021 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
5022 SchedWriteVecALU, HasBWI, 0>;
5023 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
5024 SchedWriteVecALU, HasBWI, 1>;
5025 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
5026 SchedWriteVecALU, HasBWI, 0>;
5027 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
5028 SchedWritePMULLD, HasAVX512, 1>, T8PD;
5029 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
5030 SchedWriteVecIMul, HasBWI, 1>;
5031 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
5032 SchedWriteVecIMul, HasDQI, 1>, T8PD,
5033 NotEVEX2VEXConvertible;
5034 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
5036 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
5038 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
5039 SchedWriteVecIMul, HasBWI, 1>, T8PD;
5040 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
5041 SchedWriteVecALU, HasBWI, 1>;
5042 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
5043 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
5044 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
5045 SchedWriteVecIMul, HasAVX512, 1>;
5047 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
5048 X86SchedWriteWidths sched,
5049 AVX512VLVectorVTInfo _SrcVTInfo,
5050 AVX512VLVectorVTInfo _DstVTInfo,
5051 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
5052 let Predicates = [prd] in
5053 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
5054 _SrcVTInfo.info512, _DstVTInfo.info512,
5055 v8i64_info, IsCommutable>,
5056 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
5057 let Predicates = [HasVLX, prd] in {
5058 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
5059 _SrcVTInfo.info256, _DstVTInfo.info256,
5060 v4i64x_info, IsCommutable>,
5061 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
5062 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
5063 _SrcVTInfo.info128, _DstVTInfo.info128,
5064 v2i64x_info, IsCommutable>,
5065 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
5069 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
5070 avx512vl_i8_info, avx512vl_i8_info,
5071 X86multishift, HasVBMI, 0>, T8PD;
5073 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5074 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
5075 X86FoldableSchedWrite sched> {
5076 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5077 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
5079 "${src2}"#_Src.BroadcastStr#", $src1",
5080 "$src1, ${src2}"#_Src.BroadcastStr,
5081 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5082 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
5083 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
5084 Sched<[sched.Folded, sched.ReadAfterFold]>;
5087 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
5088 SDNode OpNode,X86VectorVTInfo _Src,
5089 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
5090 bit IsCommutable = 0> {
5091 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5092 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5093 "$src2, $src1","$src1, $src2",
5095 (_Src.VT _Src.RC:$src1),
5096 (_Src.VT _Src.RC:$src2))),
5097 IsCommutable, IsCommutable>,
5098 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
5099 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5100 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5101 "$src2, $src1", "$src1, $src2",
5102 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5103 (_Src.LdFrag addr:$src2)))>,
5104 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
5105 Sched<[sched.Folded, sched.ReadAfterFold]>;
5108 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
5110 let Predicates = [HasBWI] in
5111 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
5112 v32i16_info, SchedWriteShuffle.ZMM>,
5113 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
5114 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
5115 let Predicates = [HasBWI, HasVLX] in {
5116 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
5117 v16i16x_info, SchedWriteShuffle.YMM>,
5118 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
5119 v16i16x_info, SchedWriteShuffle.YMM>,
5121 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
5122 v8i16x_info, SchedWriteShuffle.XMM>,
5123 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
5124 v8i16x_info, SchedWriteShuffle.XMM>,
5128 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
5130 let Predicates = [HasBWI] in
5131 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
5132 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
5133 let Predicates = [HasBWI, HasVLX] in {
5134 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
5135 v32i8x_info, SchedWriteShuffle.YMM>,
5137 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
5138 v16i8x_info, SchedWriteShuffle.XMM>,
5143 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
5144 SDNode OpNode, AVX512VLVectorVTInfo _Src,
5145 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
5146 let Predicates = [HasBWI] in
5147 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
5148 _Dst.info512, SchedWriteVecIMul.ZMM,
5149 IsCommutable>, EVEX_V512;
5150 let Predicates = [HasBWI, HasVLX] in {
5151 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
5152 _Dst.info256, SchedWriteVecIMul.YMM,
5153 IsCommutable>, EVEX_V256;
5154 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
5155 _Dst.info128, SchedWriteVecIMul.XMM,
5156 IsCommutable>, EVEX_V128;
5160 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
5161 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
5162 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
5163 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
5165 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
5166 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
5167 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
5168 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
5170 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
5171 SchedWriteVecALU, HasBWI, 1>, T8PD;
5172 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
5173 SchedWriteVecALU, HasBWI, 1>;
5174 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
5175 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5176 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
5177 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5178 NotEVEX2VEXConvertible;
5180 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
5181 SchedWriteVecALU, HasBWI, 1>;
5182 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
5183 SchedWriteVecALU, HasBWI, 1>, T8PD;
5184 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
5185 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5186 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
5187 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5188 NotEVEX2VEXConvertible;
5190 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
5191 SchedWriteVecALU, HasBWI, 1>, T8PD;
5192 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
5193 SchedWriteVecALU, HasBWI, 1>;
5194 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5195 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5196 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5197 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5198 NotEVEX2VEXConvertible;
5200 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5201 SchedWriteVecALU, HasBWI, 1>;
5202 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5203 SchedWriteVecALU, HasBWI, 1>, T8PD;
5204 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5205 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5206 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5207 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5208 NotEVEX2VEXConvertible;
5210 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5211 let Predicates = [HasDQI, NoVLX] in {
5212 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5215 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5216 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5218 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5221 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5225 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5228 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5229 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5231 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5234 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5239 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5240 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5242 (!cast<Instruction>(Instr#"rr")
5243 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5244 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5246 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5248 (!cast<Instruction>(Instr#"rmb")
5249 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5253 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5255 (!cast<Instruction>(Instr#"rr")
5256 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5257 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5259 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5261 (!cast<Instruction>(Instr#"rmb")
5262 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5267 let Predicates = [HasAVX512, NoVLX] in {
5268 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5269 defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5270 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5271 defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5274 //===----------------------------------------------------------------------===//
5275 // AVX-512 Logical Instructions
5276 //===----------------------------------------------------------------------===//
5278 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5279 SchedWriteVecLogic, HasAVX512, 1>;
5280 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5281 SchedWriteVecLogic, HasAVX512, 1>;
5282 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5283 SchedWriteVecLogic, HasAVX512, 1>;
5284 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5285 SchedWriteVecLogic, HasAVX512>;
5287 let Predicates = [HasVLX] in {
5288 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5289 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5290 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5291 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5293 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5294 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5295 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5296 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5298 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5299 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5300 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5301 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5303 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5304 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5305 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5306 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5308 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5309 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5310 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5311 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5313 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5314 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5315 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5316 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5318 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5319 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5320 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5321 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5323 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5324 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5325 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5326 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5328 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5329 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5330 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5331 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5333 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5334 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5335 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5336 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5338 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5339 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5340 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5341 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5343 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5344 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5345 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5346 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5348 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5349 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5350 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5351 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5353 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5354 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5355 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5356 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5358 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5359 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5360 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5361 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5363 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5364 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5365 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5366 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5369 let Predicates = [HasAVX512] in {
5370 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5371 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5372 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5373 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5375 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5376 (VPORQZrr VR512:$src1, VR512:$src2)>;
5377 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5378 (VPORQZrr VR512:$src1, VR512:$src2)>;
5380 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5381 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5382 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5383 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5385 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5386 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5387 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5388 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5390 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5391 (VPANDQZrm VR512:$src1, addr:$src2)>;
5392 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5393 (VPANDQZrm VR512:$src1, addr:$src2)>;
5395 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5396 (VPORQZrm VR512:$src1, addr:$src2)>;
5397 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5398 (VPORQZrm VR512:$src1, addr:$src2)>;
5400 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5401 (VPXORQZrm VR512:$src1, addr:$src2)>;
5402 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5403 (VPXORQZrm VR512:$src1, addr:$src2)>;
5405 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5406 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5407 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5408 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5411 // Patterns to catch vselect with different type than logic op.
5412 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5414 X86VectorVTInfo IntInfo> {
5415 // Masked register-register logical operations.
5416 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5417 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5419 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5420 _.RC:$src1, _.RC:$src2)>;
5422 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5423 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5425 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5428 // Masked register-memory logical operations.
5429 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5430 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5431 (load addr:$src2)))),
5433 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5434 _.RC:$src1, addr:$src2)>;
5435 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5436 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5437 (load addr:$src2)))),
5439 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5443 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5445 X86VectorVTInfo IntInfo> {
5446 // Register-broadcast logical operations.
5447 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5449 (IntInfo.VT (OpNode _.RC:$src1,
5450 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5452 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5453 _.RC:$src1, addr:$src2)>;
5454 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5456 (IntInfo.VT (OpNode _.RC:$src1,
5457 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5459 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5460 _.RC:$src1, addr:$src2)>;
5463 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5464 AVX512VLVectorVTInfo SelectInfo,
5465 AVX512VLVectorVTInfo IntInfo> {
5466 let Predicates = [HasVLX] in {
5467 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5469 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5472 let Predicates = [HasAVX512] in {
5473 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5478 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5479 AVX512VLVectorVTInfo SelectInfo,
5480 AVX512VLVectorVTInfo IntInfo> {
5481 let Predicates = [HasVLX] in {
5482 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5483 SelectInfo.info128, IntInfo.info128>;
5484 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5485 SelectInfo.info256, IntInfo.info256>;
5487 let Predicates = [HasAVX512] in {
5488 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5489 SelectInfo.info512, IntInfo.info512>;
5493 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5494 // i64 vselect with i32/i16/i8 logic op
5495 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5497 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5499 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5502 // i32 vselect with i64/i16/i8 logic op
5503 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5505 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5507 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5510 // f32 vselect with i64/i32/i16/i8 logic op
5511 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5513 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5515 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5517 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5520 // f64 vselect with i64/i32/i16/i8 logic op
5521 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5523 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5525 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5527 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5530 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5533 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5538 defm : avx512_logical_lowering_types<"VPAND", and>;
5539 defm : avx512_logical_lowering_types<"VPOR", or>;
5540 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5541 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5543 //===----------------------------------------------------------------------===//
5544 // AVX-512 FP arithmetic
5545 //===----------------------------------------------------------------------===//
5547 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5548 SDPatternOperator OpNode, SDNode VecNode,
5549 X86FoldableSchedWrite sched, bit IsCommutable> {
5550 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5551 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5552 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5553 "$src2, $src1", "$src1, $src2",
5554 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5557 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5558 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5559 "$src2, $src1", "$src1, $src2",
5560 (_.VT (VecNode _.RC:$src1,
5561 (_.ScalarIntMemFrags addr:$src2)))>,
5562 Sched<[sched.Folded, sched.ReadAfterFold]>;
5563 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5564 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5565 (ins _.FRC:$src1, _.FRC:$src2),
5566 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5567 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5569 let isCommutable = IsCommutable;
5571 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5572 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5573 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5574 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5575 (_.ScalarLdFrag addr:$src2)))]>,
5576 Sched<[sched.Folded, sched.ReadAfterFold]>;
5581 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5582 SDNode VecNode, X86FoldableSchedWrite sched,
5583 bit IsCommutable = 0> {
5584 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5585 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5586 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5587 "$rc, $src2, $src1", "$src1, $src2, $rc",
5588 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5590 EVEX_B, EVEX_RC, Sched<[sched]>;
5592 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5593 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5594 X86FoldableSchedWrite sched, bit IsCommutable,
5595 string EVEX2VexOvrd> {
5596 let ExeDomain = _.ExeDomain in {
5597 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5598 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5599 "$src2, $src1", "$src1, $src2",
5600 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5601 Sched<[sched]>, SIMD_EXC;
5603 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5604 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5605 "$src2, $src1", "$src1, $src2",
5606 (_.VT (VecNode _.RC:$src1,
5607 (_.ScalarIntMemFrags addr:$src2)))>,
5608 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5610 let isCodeGenOnly = 1, Predicates = [HasAVX512],
5611 Uses = [MXCSR], mayRaiseFPException = 1 in {
5612 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5613 (ins _.FRC:$src1, _.FRC:$src2),
5614 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5615 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5617 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5618 let isCommutable = IsCommutable;
5620 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5621 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5622 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5623 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5624 (_.ScalarLdFrag addr:$src2)))]>,
5625 Sched<[sched.Folded, sched.ReadAfterFold]>,
5626 EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5629 let Uses = [MXCSR] in
5630 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5631 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5632 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5633 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5634 EVEX_B, Sched<[sched]>;
5638 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5639 SDNode VecNode, SDNode RndNode,
5640 X86SchedWriteSizes sched, bit IsCommutable> {
5641 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5642 sched.PS.Scl, IsCommutable>,
5643 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5644 sched.PS.Scl, IsCommutable>,
5645 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5646 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5647 sched.PD.Scl, IsCommutable>,
5648 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5649 sched.PD.Scl, IsCommutable>,
5650 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5651 let Predicates = [HasFP16] in
5652 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5653 VecNode, sched.PH.Scl, IsCommutable>,
5654 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5655 sched.PH.Scl, IsCommutable>,
5656 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5659 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5660 SDNode VecNode, SDNode SaeNode,
5661 X86SchedWriteSizes sched, bit IsCommutable> {
5662 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5663 VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5665 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5666 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5667 VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5669 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5670 let Predicates = [HasFP16] in {
5671 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5672 VecNode, SaeNode, sched.PH.Scl, IsCommutable,
5674 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
5675 NotEVEX2VEXConvertible;
5678 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5679 SchedWriteFAddSizes, 1>;
5680 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5681 SchedWriteFMulSizes, 1>;
5682 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5683 SchedWriteFAddSizes, 0>;
5684 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5685 SchedWriteFDivSizes, 0>;
5686 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5687 SchedWriteFCmpSizes, 0>;
5688 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5689 SchedWriteFCmpSizes, 0>;
5691 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5692 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5693 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5694 X86VectorVTInfo _, SDNode OpNode,
5695 X86FoldableSchedWrite sched,
5696 string EVEX2VEXOvrd> {
5697 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5698 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5699 (ins _.FRC:$src1, _.FRC:$src2),
5700 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5701 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5702 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5703 let isCommutable = 1;
5705 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5706 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5707 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5708 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5709 (_.ScalarLdFrag addr:$src2)))]>,
5710 Sched<[sched.Folded, sched.ReadAfterFold]>,
5711 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5714 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5715 SchedWriteFCmp.Scl, "VMINCSS">, XS,
5716 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5718 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5719 SchedWriteFCmp.Scl, "VMINCSD">, XD,
5720 VEX_W, EVEX_4V, VEX_LIG,
5721 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5723 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5724 SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5725 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5727 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5728 SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5729 VEX_W, EVEX_4V, VEX_LIG,
5730 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5732 defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5733 SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
5734 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5735 NotEVEX2VEXConvertible;
5736 defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5737 SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
5738 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5739 NotEVEX2VEXConvertible;
5741 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5742 SDPatternOperator MaskOpNode,
5743 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5745 bit IsKCommutable = IsCommutable> {
5746 let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5747 Uses = [MXCSR], mayRaiseFPException = 1 in {
5748 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5749 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5750 "$src2, $src1", "$src1, $src2",
5751 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5752 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5753 IsKCommutable, IsKCommutable>,
5754 EVEX_4V, Sched<[sched]>;
5755 let mayLoad = 1 in {
5756 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5757 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5758 "$src2, $src1", "$src1, $src2",
5759 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5760 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5761 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5762 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5763 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5764 "${src2}"#_.BroadcastStr#", $src1",
5765 "$src1, ${src2}"#_.BroadcastStr,
5766 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5767 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5769 Sched<[sched.Folded, sched.ReadAfterFold]>;
5774 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5775 SDPatternOperator OpNodeRnd,
5776 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5777 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5778 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5779 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix,
5780 "$rc, $src2, $src1", "$src1, $src2, $rc",
5781 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5782 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5785 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5786 SDPatternOperator OpNodeSAE,
5787 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5788 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5789 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5790 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5791 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5792 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5793 EVEX_4V, EVEX_B, Sched<[sched]>;
5796 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5797 SDPatternOperator MaskOpNode,
5798 Predicate prd, X86SchedWriteSizes sched,
5799 bit IsCommutable = 0,
5800 bit IsPD128Commutable = IsCommutable> {
5801 let Predicates = [prd] in {
5802 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5803 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5804 EVEX_CD8<32, CD8VF>;
5805 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5806 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5807 EVEX_CD8<64, CD8VF>;
5810 // Define only if AVX512VL feature is present.
5811 let Predicates = [prd, HasVLX] in {
5812 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5813 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5814 EVEX_CD8<32, CD8VF>;
5815 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5816 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5817 EVEX_CD8<32, CD8VF>;
5818 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5819 sched.PD.XMM, IsPD128Commutable,
5820 IsCommutable>, EVEX_V128, PD, VEX_W,
5821 EVEX_CD8<64, CD8VF>;
5822 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5823 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5824 EVEX_CD8<64, CD8VF>;
5828 multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5829 SDPatternOperator MaskOpNode,
5830 X86SchedWriteSizes sched, bit IsCommutable = 0,
5831 bit IsPD128Commutable = IsCommutable> {
5832 let Predicates = [HasFP16] in {
5833 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5834 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
5835 EVEX_CD8<16, CD8VF>;
5837 let Predicates = [HasVLX, HasFP16] in {
5838 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5839 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
5840 EVEX_CD8<16, CD8VF>;
5841 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5842 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
5843 EVEX_CD8<16, CD8VF>;
5847 let Uses = [MXCSR] in
5848 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5849 X86SchedWriteSizes sched> {
5850 let Predicates = [HasFP16] in {
5851 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5853 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5855 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5857 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5858 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5860 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5863 let Uses = [MXCSR] in
5864 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5865 X86SchedWriteSizes sched> {
5866 let Predicates = [HasFP16] in {
5867 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5869 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5871 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5873 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5874 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5876 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5879 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5880 SchedWriteFAddSizes, 1>,
5881 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5882 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5883 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5884 SchedWriteFMulSizes, 1>,
5885 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5886 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5887 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5888 SchedWriteFAddSizes>,
5889 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5890 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5891 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5892 SchedWriteFDivSizes>,
5893 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5894 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5895 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5896 SchedWriteFCmpSizes, 0>,
5897 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5898 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5899 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5900 SchedWriteFCmpSizes, 0>,
5901 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5902 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5903 let isCodeGenOnly = 1 in {
5904 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5905 SchedWriteFCmpSizes, 1>,
5906 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5907 SchedWriteFCmpSizes, 1>;
5908 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5909 SchedWriteFCmpSizes, 1>,
5910 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5911 SchedWriteFCmpSizes, 1>;
5913 let Uses = []<Register>, mayRaiseFPException = 0 in {
5914 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5915 SchedWriteFLogicSizes, 1>;
5916 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5917 SchedWriteFLogicSizes, 0>;
5918 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5919 SchedWriteFLogicSizes, 1>;
5920 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5921 SchedWriteFLogicSizes, 1>;
5924 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5925 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5926 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5927 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5928 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5929 "$src2, $src1", "$src1, $src2",
5930 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5931 EVEX_4V, Sched<[sched]>;
5932 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5933 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5934 "$src2, $src1", "$src1, $src2",
5935 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5936 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5937 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5938 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5939 "${src2}"#_.BroadcastStr#", $src1",
5940 "$src1, ${src2}"#_.BroadcastStr,
5941 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5942 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5946 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5947 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5948 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5949 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5950 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5951 "$src2, $src1", "$src1, $src2",
5952 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5954 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5955 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5956 "$src2, $src1", "$src1, $src2",
5957 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5958 Sched<[sched.Folded, sched.ReadAfterFold]>;
5962 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5963 X86SchedWriteWidths sched> {
5964 let Predicates = [HasFP16] in {
5965 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5966 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5967 EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
5968 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5969 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5970 EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
5972 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5973 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5974 EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
5975 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5976 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5977 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
5978 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5979 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5980 X86scalefsRnd, sched.Scl>,
5981 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
5982 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5983 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5984 X86scalefsRnd, sched.Scl>,
5985 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD;
5987 // Define only if AVX512VL feature is present.
5988 let Predicates = [HasVLX] in {
5989 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5990 EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
5991 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5992 EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
5993 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5994 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
5995 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5996 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
5999 let Predicates = [HasFP16, HasVLX] in {
6000 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
6001 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6002 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
6003 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6006 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
6007 SchedWriteFAdd>, NotEVEX2VEXConvertible;
6009 //===----------------------------------------------------------------------===//
6010 // AVX-512 VPTESTM instructions
6011 //===----------------------------------------------------------------------===//
6013 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
6014 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6016 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
6017 // There are just too many permutations due to commutability and bitcasts.
6018 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6019 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
6020 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6021 "$src2, $src1", "$src1, $src2",
6022 (null_frag), (null_frag), 1>,
6023 EVEX_4V, Sched<[sched]>;
6025 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6026 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6027 "$src2, $src1", "$src1, $src2",
6028 (null_frag), (null_frag)>,
6029 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6030 Sched<[sched.Folded, sched.ReadAfterFold]>;
6034 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
6035 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6036 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
6037 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6038 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6039 "${src2}"#_.BroadcastStr#", $src1",
6040 "$src1, ${src2}"#_.BroadcastStr,
6041 (null_frag), (null_frag)>,
6042 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6043 Sched<[sched.Folded, sched.ReadAfterFold]>;
6046 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
6047 X86SchedWriteWidths sched,
6048 AVX512VLVectorVTInfo _> {
6049 let Predicates = [HasAVX512] in
6050 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
6051 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
6053 let Predicates = [HasAVX512, HasVLX] in {
6054 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
6055 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
6056 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
6057 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
6061 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
6062 X86SchedWriteWidths sched> {
6063 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
6065 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
6066 avx512vl_i64_info>, VEX_W;
6069 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
6070 X86SchedWriteWidths sched> {
6071 let Predicates = [HasBWI] in {
6072 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
6073 v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
6074 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
6075 v64i8_info, NAME#"B">, EVEX_V512;
6077 let Predicates = [HasVLX, HasBWI] in {
6079 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
6080 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
6081 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
6082 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
6083 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
6084 v32i8x_info, NAME#"B">, EVEX_V256;
6085 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
6086 v16i8x_info, NAME#"B">, EVEX_V128;
6090 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
6091 X86SchedWriteWidths sched> :
6092 avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
6093 avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
6095 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
6096 SchedWriteVecLogic>, T8PD;
6097 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
6098 SchedWriteVecLogic>, T8XS;
6100 //===----------------------------------------------------------------------===//
6101 // AVX-512 Shift instructions
6102 //===----------------------------------------------------------------------===//
6104 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
6105 string OpcodeStr, SDNode OpNode,
6106 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6107 let ExeDomain = _.ExeDomain in {
6108 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
6109 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
6110 "$src2, $src1", "$src1, $src2",
6111 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
6113 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6114 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
6115 "$src2, $src1", "$src1, $src2",
6116 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
6118 Sched<[sched.Folded]>;
6122 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
6123 string OpcodeStr, SDNode OpNode,
6124 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6125 let ExeDomain = _.ExeDomain in
6126 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6127 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
6128 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
6129 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
6130 EVEX_B, Sched<[sched.Folded]>;
6133 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6134 X86FoldableSchedWrite sched, ValueType SrcVT,
6135 X86VectorVTInfo _> {
6136 // src2 is always 128-bit
6137 let ExeDomain = _.ExeDomain in {
6138 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6139 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
6140 "$src2, $src1", "$src1, $src2",
6141 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
6142 AVX512BIBase, EVEX_4V, Sched<[sched]>;
6143 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6144 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
6145 "$src2, $src1", "$src1, $src2",
6146 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
6148 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6152 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6153 X86SchedWriteWidths sched, ValueType SrcVT,
6154 AVX512VLVectorVTInfo VTInfo,
6156 let Predicates = [prd] in
6157 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
6158 VTInfo.info512>, EVEX_V512,
6159 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
6160 let Predicates = [prd, HasVLX] in {
6161 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
6162 VTInfo.info256>, EVEX_V256,
6163 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
6164 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
6165 VTInfo.info128>, EVEX_V128,
6166 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
6170 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
6171 string OpcodeStr, SDNode OpNode,
6172 X86SchedWriteWidths sched,
6173 bit NotEVEX2VEXConvertibleQ = 0> {
6174 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
6175 avx512vl_i32_info, HasAVX512>;
6176 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6177 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
6178 avx512vl_i64_info, HasAVX512>, VEX_W;
6179 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
6180 avx512vl_i16_info, HasBWI>;
6183 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6184 string OpcodeStr, SDNode OpNode,
6185 X86SchedWriteWidths sched,
6186 AVX512VLVectorVTInfo VTInfo> {
6187 let Predicates = [HasAVX512] in
6188 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6189 sched.ZMM, VTInfo.info512>,
6190 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
6191 VTInfo.info512>, EVEX_V512;
6192 let Predicates = [HasAVX512, HasVLX] in {
6193 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6194 sched.YMM, VTInfo.info256>,
6195 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
6196 VTInfo.info256>, EVEX_V256;
6197 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6198 sched.XMM, VTInfo.info128>,
6199 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
6200 VTInfo.info128>, EVEX_V128;
6204 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
6205 string OpcodeStr, SDNode OpNode,
6206 X86SchedWriteWidths sched> {
6207 let Predicates = [HasBWI] in
6208 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6209 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
6210 let Predicates = [HasVLX, HasBWI] in {
6211 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6212 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
6213 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6214 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
6218 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6219 Format ImmFormR, Format ImmFormM,
6220 string OpcodeStr, SDNode OpNode,
6221 X86SchedWriteWidths sched,
6222 bit NotEVEX2VEXConvertibleQ = 0> {
6223 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6224 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6225 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6226 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6227 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
6230 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6231 SchedWriteVecShiftImm>,
6232 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6233 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6235 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6236 SchedWriteVecShiftImm>,
6237 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6238 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6240 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6241 SchedWriteVecShiftImm, 1>,
6242 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6243 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6245 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6246 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6247 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6248 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6250 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6251 SchedWriteVecShift>;
6252 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6253 SchedWriteVecShift, 1>;
6254 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6255 SchedWriteVecShift>;
6257 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6258 let Predicates = [HasAVX512, NoVLX] in {
6259 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6260 (EXTRACT_SUBREG (v8i64
6262 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6263 VR128X:$src2)), sub_ymm)>;
6265 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6266 (EXTRACT_SUBREG (v8i64
6268 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6269 VR128X:$src2)), sub_xmm)>;
6271 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6272 (EXTRACT_SUBREG (v8i64
6274 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6275 timm:$src2)), sub_ymm)>;
6277 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6278 (EXTRACT_SUBREG (v8i64
6280 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6281 timm:$src2)), sub_xmm)>;
6284 //===-------------------------------------------------------------------===//
6285 // Variable Bit Shifts
6286 //===-------------------------------------------------------------------===//
6288 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6289 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6290 let ExeDomain = _.ExeDomain in {
6291 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6292 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6293 "$src2, $src1", "$src1, $src2",
6294 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6295 AVX5128IBase, EVEX_4V, Sched<[sched]>;
6296 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6297 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6298 "$src2, $src1", "$src1, $src2",
6299 (_.VT (OpNode _.RC:$src1,
6300 (_.VT (_.LdFrag addr:$src2))))>,
6301 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6302 Sched<[sched.Folded, sched.ReadAfterFold]>;
6306 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6307 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6308 let ExeDomain = _.ExeDomain in
6309 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6310 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6311 "${src2}"#_.BroadcastStr#", $src1",
6312 "$src1, ${src2}"#_.BroadcastStr,
6313 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6314 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6315 Sched<[sched.Folded, sched.ReadAfterFold]>;
6318 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6319 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6320 let Predicates = [HasAVX512] in
6321 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6322 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6324 let Predicates = [HasAVX512, HasVLX] in {
6325 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6326 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6327 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6328 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6332 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6333 SDNode OpNode, X86SchedWriteWidths sched> {
6334 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6336 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6337 avx512vl_i64_info>, VEX_W;
6340 // Use 512bit version to implement 128/256 bit in case NoVLX.
6341 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6342 SDNode OpNode, list<Predicate> p> {
6343 let Predicates = p in {
6344 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6345 (_.info256.VT _.info256.RC:$src2))),
6347 (!cast<Instruction>(OpcodeStr#"Zrr")
6348 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6349 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6352 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6353 (_.info128.VT _.info128.RC:$src2))),
6355 (!cast<Instruction>(OpcodeStr#"Zrr")
6356 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6357 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6361 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6362 SDNode OpNode, X86SchedWriteWidths sched> {
6363 let Predicates = [HasBWI] in
6364 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6366 let Predicates = [HasVLX, HasBWI] in {
6368 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6370 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6375 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6376 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6378 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6379 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6381 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6382 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6384 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6385 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6387 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6388 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6389 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6390 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6393 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6394 let Predicates = [HasAVX512, NoVLX] in {
6395 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6396 (EXTRACT_SUBREG (v8i64
6398 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6399 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6401 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6402 (EXTRACT_SUBREG (v8i64
6404 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6405 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6408 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6409 (EXTRACT_SUBREG (v16i32
6411 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6412 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6414 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6415 (EXTRACT_SUBREG (v16i32
6417 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6418 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6421 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6422 (EXTRACT_SUBREG (v8i64
6424 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6425 timm:$src2)), sub_xmm)>;
6426 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6427 (EXTRACT_SUBREG (v8i64
6429 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6430 timm:$src2)), sub_ymm)>;
6432 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6433 (EXTRACT_SUBREG (v16i32
6435 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6436 timm:$src2)), sub_xmm)>;
6437 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6438 (EXTRACT_SUBREG (v16i32
6440 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6441 timm:$src2)), sub_ymm)>;
6444 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6445 let Predicates = [HasAVX512, NoVLX] in {
6446 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6447 (EXTRACT_SUBREG (v8i64
6449 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6450 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6452 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6453 (EXTRACT_SUBREG (v8i64
6455 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6456 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6459 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6460 (EXTRACT_SUBREG (v16i32
6462 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6463 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6465 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6466 (EXTRACT_SUBREG (v16i32
6468 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6469 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6472 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6473 (EXTRACT_SUBREG (v8i64
6475 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6476 timm:$src2)), sub_xmm)>;
6477 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6478 (EXTRACT_SUBREG (v8i64
6480 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6481 timm:$src2)), sub_ymm)>;
6483 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6484 (EXTRACT_SUBREG (v16i32
6486 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6487 timm:$src2)), sub_xmm)>;
6488 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6489 (EXTRACT_SUBREG (v16i32
6491 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6492 timm:$src2)), sub_ymm)>;
6495 //===-------------------------------------------------------------------===//
6496 // 1-src variable permutation VPERMW/D/Q
6497 //===-------------------------------------------------------------------===//
6499 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6500 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6501 let Predicates = [HasAVX512] in
6502 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6503 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6505 let Predicates = [HasAVX512, HasVLX] in
6506 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6507 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6510 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6511 string OpcodeStr, SDNode OpNode,
6512 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6513 let Predicates = [HasAVX512] in
6514 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6515 sched, VTInfo.info512>,
6516 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6517 sched, VTInfo.info512>, EVEX_V512;
6518 let Predicates = [HasAVX512, HasVLX] in
6519 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6520 sched, VTInfo.info256>,
6521 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6522 sched, VTInfo.info256>, EVEX_V256;
6525 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6526 Predicate prd, SDNode OpNode,
6527 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6528 let Predicates = [prd] in
6529 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6531 let Predicates = [HasVLX, prd] in {
6532 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6534 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6539 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6540 WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6541 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6542 WriteVarShuffle256, avx512vl_i8_info>;
6544 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6545 WriteVarShuffle256, avx512vl_i32_info>;
6546 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6547 WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6548 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6549 WriteFVarShuffle256, avx512vl_f32_info>;
6550 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6551 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6553 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6554 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6555 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6556 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6557 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6558 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6560 //===----------------------------------------------------------------------===//
6561 // AVX-512 - VPERMIL
6562 //===----------------------------------------------------------------------===//
6564 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6565 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6566 X86VectorVTInfo Ctrl> {
6567 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6568 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6569 "$src2, $src1", "$src1, $src2",
6570 (_.VT (OpNode _.RC:$src1,
6571 (Ctrl.VT Ctrl.RC:$src2)))>,
6572 T8PD, EVEX_4V, Sched<[sched]>;
6573 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6574 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6575 "$src2, $src1", "$src1, $src2",
6578 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6579 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6580 Sched<[sched.Folded, sched.ReadAfterFold]>;
6581 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6582 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6583 "${src2}"#_.BroadcastStr#", $src1",
6584 "$src1, ${src2}"#_.BroadcastStr,
6587 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6588 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6589 Sched<[sched.Folded, sched.ReadAfterFold]>;
6592 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6593 X86SchedWriteWidths sched,
6594 AVX512VLVectorVTInfo _,
6595 AVX512VLVectorVTInfo Ctrl> {
6596 let Predicates = [HasAVX512] in {
6597 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6598 _.info512, Ctrl.info512>, EVEX_V512;
6600 let Predicates = [HasAVX512, HasVLX] in {
6601 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6602 _.info128, Ctrl.info128>, EVEX_V128;
6603 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6604 _.info256, Ctrl.info256>, EVEX_V256;
6608 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6609 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6610 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6612 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6613 X86VPermilpi, SchedWriteFShuffle, _>,
6614 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6617 let ExeDomain = SSEPackedSingle in
6618 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6620 let ExeDomain = SSEPackedDouble in
6621 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6622 avx512vl_i64_info>, VEX_W1X;
6624 //===----------------------------------------------------------------------===//
6625 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6626 //===----------------------------------------------------------------------===//
6628 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6629 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6630 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6631 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6632 X86PShufhw, SchedWriteShuffle>,
6633 EVEX, AVX512XSIi8Base;
6634 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6635 X86PShuflw, SchedWriteShuffle>,
6636 EVEX, AVX512XDIi8Base;
6638 //===----------------------------------------------------------------------===//
6639 // AVX-512 - VPSHUFB
6640 //===----------------------------------------------------------------------===//
6642 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6643 X86SchedWriteWidths sched> {
6644 let Predicates = [HasBWI] in
6645 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6648 let Predicates = [HasVLX, HasBWI] in {
6649 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6651 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6656 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6657 SchedWriteVarShuffle>, VEX_WIG;
6659 //===----------------------------------------------------------------------===//
6660 // Move Low to High and High to Low packed FP Instructions
6661 //===----------------------------------------------------------------------===//
6663 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6664 (ins VR128X:$src1, VR128X:$src2),
6665 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6666 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6667 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6668 let isCommutable = 1 in
6669 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6670 (ins VR128X:$src1, VR128X:$src2),
6671 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6672 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6673 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6675 //===----------------------------------------------------------------------===//
6676 // VMOVHPS/PD VMOVLPS Instructions
6677 // All patterns was taken from SSS implementation.
6678 //===----------------------------------------------------------------------===//
6680 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6681 SDPatternOperator OpNode,
6682 X86VectorVTInfo _> {
6683 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6684 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6685 (ins _.RC:$src1, f64mem:$src2),
6686 !strconcat(OpcodeStr,
6687 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6691 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6692 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6695 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6696 // SSE1. And MOVLPS pattern is even more complex.
6697 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6698 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6699 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6700 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6701 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6702 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6703 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6704 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6706 let Predicates = [HasAVX512] in {
6708 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6709 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6712 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6713 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6716 let SchedRW = [WriteFStore] in {
6717 let mayStore = 1, hasSideEffects = 0 in
6718 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6719 (ins f64mem:$dst, VR128X:$src),
6720 "vmovhps\t{$src, $dst|$dst, $src}",
6721 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6722 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6723 (ins f64mem:$dst, VR128X:$src),
6724 "vmovhpd\t{$src, $dst|$dst, $src}",
6725 [(store (f64 (extractelt
6726 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6727 (iPTR 0))), addr:$dst)]>,
6728 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6729 let mayStore = 1, hasSideEffects = 0 in
6730 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6731 (ins f64mem:$dst, VR128X:$src),
6732 "vmovlps\t{$src, $dst|$dst, $src}",
6733 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6734 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6735 (ins f64mem:$dst, VR128X:$src),
6736 "vmovlpd\t{$src, $dst|$dst, $src}",
6737 [(store (f64 (extractelt (v2f64 VR128X:$src),
6738 (iPTR 0))), addr:$dst)]>,
6739 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6742 let Predicates = [HasAVX512] in {
6744 def : Pat<(store (f64 (extractelt
6745 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6746 (iPTR 0))), addr:$dst),
6747 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6749 //===----------------------------------------------------------------------===//
6750 // FMA - Fused Multiply Operations
6753 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6754 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6755 X86VectorVTInfo _, string Suff> {
6756 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6757 Uses = [MXCSR], mayRaiseFPException = 1 in {
6758 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6759 (ins _.RC:$src2, _.RC:$src3),
6760 OpcodeStr, "$src3, $src2", "$src2, $src3",
6761 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6762 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6763 AVX512FMA3Base, Sched<[sched]>;
6765 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6766 (ins _.RC:$src2, _.MemOp:$src3),
6767 OpcodeStr, "$src3, $src2", "$src2, $src3",
6768 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6769 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6770 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6772 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6773 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6774 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6775 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6777 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6778 (MaskOpNode _.RC:$src2,
6779 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6780 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6784 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6785 X86FoldableSchedWrite sched,
6786 X86VectorVTInfo _, string Suff> {
6787 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6789 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6790 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6791 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6792 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6793 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6794 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6797 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6798 SDNode MaskOpNode, SDNode OpNodeRnd,
6799 X86SchedWriteWidths sched,
6800 AVX512VLVectorVTInfo _, string Suff> {
6801 let Predicates = [HasAVX512] in {
6802 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6803 sched.ZMM, _.info512, Suff>,
6804 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6806 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6808 let Predicates = [HasVLX, HasAVX512] in {
6809 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6810 sched.YMM, _.info256, Suff>,
6811 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6812 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6813 sched.XMM, _.info128, Suff>,
6814 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6818 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6819 SDNode MaskOpNode, SDNode OpNodeRnd> {
6820 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6821 OpNodeRnd, SchedWriteFMA,
6822 avx512vl_f32_info, "PS">;
6823 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6824 OpNodeRnd, SchedWriteFMA,
6825 avx512vl_f64_info, "PD">, VEX_W;
6828 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6830 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6831 X86Fmsub, X86FmsubRnd>;
6832 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6833 X86Fmaddsub, X86FmaddsubRnd>;
6834 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6835 X86Fmsubadd, X86FmsubaddRnd>;
6836 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6837 X86Fnmadd, X86FnmaddRnd>;
6838 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6839 X86Fnmsub, X86FnmsubRnd>;
6842 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6843 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6844 X86VectorVTInfo _, string Suff> {
6845 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6846 Uses = [MXCSR], mayRaiseFPException = 1 in {
6847 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6848 (ins _.RC:$src2, _.RC:$src3),
6849 OpcodeStr, "$src3, $src2", "$src2, $src3",
6851 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6852 AVX512FMA3Base, Sched<[sched]>;
6854 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6855 (ins _.RC:$src2, _.MemOp:$src3),
6856 OpcodeStr, "$src3, $src2", "$src2, $src3",
6857 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6858 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6859 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6861 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6862 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6863 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6864 "$src2, ${src3}"#_.BroadcastStr,
6865 (_.VT (OpNode _.RC:$src2,
6866 (_.VT (_.BroadcastLdFrag addr:$src3)),
6868 (_.VT (MaskOpNode _.RC:$src2,
6869 (_.VT (_.BroadcastLdFrag addr:$src3)),
6870 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6871 Sched<[sched.Folded, sched.ReadAfterFold]>;
6875 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6876 X86FoldableSchedWrite sched,
6877 X86VectorVTInfo _, string Suff> {
6878 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6880 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6881 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6882 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6884 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6885 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6888 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6889 SDNode MaskOpNode, SDNode OpNodeRnd,
6890 X86SchedWriteWidths sched,
6891 AVX512VLVectorVTInfo _, string Suff> {
6892 let Predicates = [HasAVX512] in {
6893 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6894 sched.ZMM, _.info512, Suff>,
6895 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6897 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6899 let Predicates = [HasVLX, HasAVX512] in {
6900 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6901 sched.YMM, _.info256, Suff>,
6902 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6903 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6904 sched.XMM, _.info128, Suff>,
6905 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6909 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6910 SDNode MaskOpNode, SDNode OpNodeRnd > {
6911 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6912 OpNodeRnd, SchedWriteFMA,
6913 avx512vl_f32_info, "PS">;
6914 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6915 OpNodeRnd, SchedWriteFMA,
6916 avx512vl_f64_info, "PD">, VEX_W;
6919 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6921 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6922 X86Fmsub, X86FmsubRnd>;
6923 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6924 X86Fmaddsub, X86FmaddsubRnd>;
6925 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6926 X86Fmsubadd, X86FmsubaddRnd>;
6927 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6928 X86Fnmadd, X86FnmaddRnd>;
6929 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6930 X86Fnmsub, X86FnmsubRnd>;
6932 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6933 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6934 X86VectorVTInfo _, string Suff> {
6935 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6936 Uses = [MXCSR], mayRaiseFPException = 1 in {
6937 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6938 (ins _.RC:$src2, _.RC:$src3),
6939 OpcodeStr, "$src3, $src2", "$src2, $src3",
6941 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6942 AVX512FMA3Base, Sched<[sched]>;
6944 // Pattern is 312 order so that the load is in a different place from the
6945 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6946 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6947 (ins _.RC:$src2, _.MemOp:$src3),
6948 OpcodeStr, "$src3, $src2", "$src2, $src3",
6949 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6950 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6951 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6953 // Pattern is 312 order so that the load is in a different place from the
6954 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6955 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6956 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6957 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6958 "$src2, ${src3}"#_.BroadcastStr,
6959 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6960 _.RC:$src1, _.RC:$src2)),
6961 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6962 _.RC:$src1, _.RC:$src2)), 1, 0>,
6963 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6967 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6968 X86FoldableSchedWrite sched,
6969 X86VectorVTInfo _, string Suff> {
6970 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6972 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6973 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6974 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6976 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6977 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6980 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6981 SDNode MaskOpNode, SDNode OpNodeRnd,
6982 X86SchedWriteWidths sched,
6983 AVX512VLVectorVTInfo _, string Suff> {
6984 let Predicates = [HasAVX512] in {
6985 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6986 sched.ZMM, _.info512, Suff>,
6987 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6989 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6991 let Predicates = [HasVLX, HasAVX512] in {
6992 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6993 sched.YMM, _.info256, Suff>,
6994 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6995 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6996 sched.XMM, _.info128, Suff>,
6997 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7001 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7002 SDNode MaskOpNode, SDNode OpNodeRnd > {
7003 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
7004 OpNodeRnd, SchedWriteFMA,
7005 avx512vl_f32_info, "PS">;
7006 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
7007 OpNodeRnd, SchedWriteFMA,
7008 avx512vl_f64_info, "PD">, VEX_W;
7011 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
7013 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
7014 X86Fmsub, X86FmsubRnd>;
7015 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
7016 X86Fmaddsub, X86FmaddsubRnd>;
7017 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
7018 X86Fmsubadd, X86FmsubaddRnd>;
7019 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
7020 X86Fnmadd, X86FnmaddRnd>;
7021 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
7022 X86Fnmsub, X86FnmsubRnd>;
7025 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7026 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
7027 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
7028 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7029 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
7030 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7031 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
7034 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7035 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
7036 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7037 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7039 let Uses = [MXCSR] in
7040 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7041 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7042 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
7043 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
7045 let isCodeGenOnly = 1, isCommutable = 1 in {
7046 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
7047 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
7048 !strconcat(OpcodeStr,
7049 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7050 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
7051 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
7052 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
7053 !strconcat(OpcodeStr,
7054 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7055 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7057 let Uses = [MXCSR] in
7058 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
7059 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
7060 !strconcat(OpcodeStr,
7061 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
7062 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
7063 Sched<[SchedWriteFMA.Scl]>;
7064 }// isCodeGenOnly = 1
7065 }// Constraints = "$src1 = $dst"
7068 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7069 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
7070 X86VectorVTInfo _, string SUFF> {
7071 let ExeDomain = _.ExeDomain in {
7072 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
7073 // Operands for intrinsic are in 123 order to preserve passthu
7075 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7077 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7078 (_.ScalarLdFrag addr:$src3)))),
7079 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
7080 _.FRC:$src3, (i32 timm:$rc)))), 0>;
7082 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
7083 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
7085 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
7086 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
7087 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
7088 _.FRC:$src1, (i32 timm:$rc)))), 1>;
7090 // One pattern is 312 order so that the load is in a different place from the
7091 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7092 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
7093 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
7095 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
7096 _.FRC:$src1, _.FRC:$src2))),
7097 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
7098 _.FRC:$src2, (i32 timm:$rc)))), 1>;
7102 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7103 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
7104 let Predicates = [HasAVX512] in {
7105 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7106 OpNodeRnd, f32x_info, "SS">,
7107 EVEX_CD8<32, CD8VT1>, VEX_LIG;
7108 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7109 OpNodeRnd, f64x_info, "SD">,
7110 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
7114 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
7115 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
7116 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
7117 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
7119 multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
7120 SDNode RndOp, string Prefix,
7121 string Suffix, SDNode Move,
7122 X86VectorVTInfo _, PatLeaf ZeroFP> {
7123 let Predicates = [HasAVX512] in {
7124 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7126 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7128 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
7129 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7130 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7132 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7133 (Op _.FRC:$src2, _.FRC:$src3,
7134 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7135 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
7136 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7137 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7139 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7141 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7142 (_.ScalarLdFrag addr:$src3)))))),
7143 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
7144 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7147 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7148 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7149 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
7150 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
7151 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7154 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7155 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7156 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7157 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
7158 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7161 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7162 (X86selects_mask VK1WM:$mask,
7163 (MaskedOp _.FRC:$src2,
7164 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7166 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7167 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
7168 VR128X:$src1, VK1WM:$mask,
7169 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7170 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7172 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7173 (X86selects_mask VK1WM:$mask,
7174 (MaskedOp _.FRC:$src2,
7175 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7176 (_.ScalarLdFrag addr:$src3)),
7177 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7178 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
7179 VR128X:$src1, VK1WM:$mask,
7180 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7182 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7183 (X86selects_mask VK1WM:$mask,
7184 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7185 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7186 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7187 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7188 VR128X:$src1, VK1WM:$mask,
7189 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7191 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7192 (X86selects_mask VK1WM:$mask,
7193 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7194 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7195 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7196 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7197 VR128X:$src1, VK1WM:$mask,
7198 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7199 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7201 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7202 (X86selects_mask VK1WM:$mask,
7203 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7204 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7205 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7206 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7207 VR128X:$src1, VK1WM:$mask,
7208 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7210 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7211 (X86selects_mask VK1WM:$mask,
7212 (MaskedOp _.FRC:$src2,
7213 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7215 (_.EltVT ZeroFP)))))),
7216 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7217 VR128X:$src1, VK1WM:$mask,
7218 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7219 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7221 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7222 (X86selects_mask VK1WM:$mask,
7223 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7224 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7225 (_.EltVT ZeroFP)))))),
7226 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7227 VR128X:$src1, VK1WM:$mask,
7228 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7229 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7231 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7232 (X86selects_mask VK1WM:$mask,
7233 (MaskedOp _.FRC:$src2,
7234 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7235 (_.ScalarLdFrag addr:$src3)),
7236 (_.EltVT ZeroFP)))))),
7237 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7238 VR128X:$src1, VK1WM:$mask,
7239 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7241 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7242 (X86selects_mask VK1WM:$mask,
7243 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7244 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7245 (_.EltVT ZeroFP)))))),
7246 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7247 VR128X:$src1, VK1WM:$mask,
7248 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7250 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7251 (X86selects_mask VK1WM:$mask,
7252 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7253 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7254 (_.EltVT ZeroFP)))))),
7255 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7256 VR128X:$src1, VK1WM:$mask,
7257 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7259 // Patterns with rounding mode.
7260 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7262 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7263 _.FRC:$src3, (i32 timm:$rc)))))),
7264 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7265 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7266 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7268 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7269 (RndOp _.FRC:$src2, _.FRC:$src3,
7270 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7271 (i32 timm:$rc)))))),
7272 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7273 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7274 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7276 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7277 (X86selects_mask VK1WM:$mask,
7279 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7280 _.FRC:$src3, (i32 timm:$rc)),
7281 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7282 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7283 VR128X:$src1, VK1WM:$mask,
7284 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7285 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7287 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7288 (X86selects_mask VK1WM:$mask,
7289 (RndOp _.FRC:$src2, _.FRC:$src3,
7290 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7292 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7293 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7294 VR128X:$src1, VK1WM:$mask,
7295 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7296 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7298 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7299 (X86selects_mask VK1WM:$mask,
7301 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7302 _.FRC:$src3, (i32 timm:$rc)),
7303 (_.EltVT ZeroFP)))))),
7304 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7305 VR128X:$src1, VK1WM:$mask,
7306 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7307 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7309 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7310 (X86selects_mask VK1WM:$mask,
7311 (RndOp _.FRC:$src2, _.FRC:$src3,
7312 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7314 (_.EltVT ZeroFP)))))),
7315 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7316 VR128X:$src1, VK1WM:$mask,
7317 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7318 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7322 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7323 "SS", X86Movss, v4f32x_info, fp32imm0>;
7324 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7325 "SS", X86Movss, v4f32x_info, fp32imm0>;
7326 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7327 "SS", X86Movss, v4f32x_info, fp32imm0>;
7328 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7329 "SS", X86Movss, v4f32x_info, fp32imm0>;
7331 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7332 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7333 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7334 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7335 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7336 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7337 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7338 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7340 //===----------------------------------------------------------------------===//
7341 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7342 //===----------------------------------------------------------------------===//
7343 let Constraints = "$src1 = $dst" in {
7344 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7345 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7346 // NOTE: The SDNode have the multiply operands first with the add last.
7347 // This enables commuted load patterns to be autogenerated by tablegen.
7348 let ExeDomain = _.ExeDomain in {
7349 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7350 (ins _.RC:$src2, _.RC:$src3),
7351 OpcodeStr, "$src3, $src2", "$src2, $src3",
7352 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7353 AVX512FMA3Base, Sched<[sched]>;
7355 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7356 (ins _.RC:$src2, _.MemOp:$src3),
7357 OpcodeStr, "$src3, $src2", "$src2, $src3",
7358 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7359 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
7361 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7362 (ins _.RC:$src2, _.ScalarMemOp:$src3),
7363 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
7364 !strconcat("$src2, ${src3}", _.BroadcastStr ),
7366 (_.VT (_.BroadcastLdFrag addr:$src3)),
7368 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7371 } // Constraints = "$src1 = $dst"
7373 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7374 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7375 let Predicates = [HasIFMA] in {
7376 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7377 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7379 let Predicates = [HasVLX, HasIFMA] in {
7380 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7381 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7382 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7383 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7387 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7388 SchedWriteVecIMul, avx512vl_i64_info>,
7390 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7391 SchedWriteVecIMul, avx512vl_i64_info>,
7394 //===----------------------------------------------------------------------===//
7395 // AVX-512 Scalar convert from sign integer to float/double
7396 //===----------------------------------------------------------------------===//
7398 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7399 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7400 X86MemOperand x86memop, PatFrag ld_frag, string asm,
7401 string mem, list<Register> _Uses = [MXCSR],
7402 bit _mayRaiseFPException = 1> {
7403 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7404 mayRaiseFPException = _mayRaiseFPException in {
7405 let hasSideEffects = 0, isCodeGenOnly = 1 in {
7406 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7407 (ins DstVT.FRC:$src1, SrcRC:$src),
7408 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7409 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7411 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7412 (ins DstVT.FRC:$src1, x86memop:$src),
7413 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7414 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7415 } // hasSideEffects = 0
7416 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7417 (ins DstVT.RC:$src1, SrcRC:$src2),
7418 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7419 [(set DstVT.RC:$dst,
7420 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7421 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7423 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7424 (ins DstVT.RC:$src1, x86memop:$src2),
7425 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7426 [(set DstVT.RC:$dst,
7427 (OpNode (DstVT.VT DstVT.RC:$src1),
7428 (ld_frag addr:$src2)))]>,
7429 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7431 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7432 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7433 DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7436 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7437 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7438 X86VectorVTInfo DstVT, string asm,
7440 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7441 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7442 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7444 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7445 [(set DstVT.RC:$dst,
7446 (OpNode (DstVT.VT DstVT.RC:$src1),
7449 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7450 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7451 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7452 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7455 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7456 X86FoldableSchedWrite sched,
7457 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7458 X86MemOperand x86memop, PatFrag ld_frag,
7459 string asm, string mem> {
7460 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7461 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7462 ld_frag, asm, mem>, VEX_LIG;
7465 let Predicates = [HasAVX512] in {
7466 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7468 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7469 XS, EVEX_CD8<32, CD8VT1>;
7470 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7472 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7473 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7474 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7475 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7476 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7477 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7479 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7480 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7482 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7483 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7484 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7485 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7487 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7488 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7489 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7490 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7491 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7492 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7493 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7494 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7496 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7497 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7498 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7499 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7500 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7501 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7502 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7503 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7505 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7507 v4f32x_info, i32mem, loadi32,
7508 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7509 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7511 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7512 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7513 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7514 i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7515 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7516 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7518 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7519 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7521 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7522 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7523 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7524 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7526 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7527 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7528 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7529 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7530 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7531 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7532 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7533 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7535 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7536 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7537 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7538 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7539 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7540 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7541 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7542 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7545 //===----------------------------------------------------------------------===//
7546 // AVX-512 Scalar convert from float/double to integer
7547 //===----------------------------------------------------------------------===//
7549 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7550 X86VectorVTInfo DstVT, SDNode OpNode,
7552 X86FoldableSchedWrite sched, string asm,
7553 string aliasStr, Predicate prd = HasAVX512> {
7554 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7555 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7556 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7557 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7558 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7559 let Uses = [MXCSR] in
7560 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7561 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7562 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7563 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7565 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7566 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7567 [(set DstVT.RC:$dst, (OpNode
7568 (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7569 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7570 } // Predicates = [prd]
7572 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7573 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7574 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7575 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7576 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7577 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7578 SrcVT.IntScalarMemOp:$src), 0, "att">;
7581 // Convert float/double to signed/unsigned int 32/64
7582 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7583 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7584 XS, EVEX_CD8<32, CD8VT1>;
7585 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7586 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7587 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7588 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7589 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7590 XS, EVEX_CD8<32, CD8VT1>;
7591 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7592 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7593 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7594 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7595 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7596 XD, EVEX_CD8<64, CD8VT1>;
7597 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7598 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7599 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7600 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7601 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7602 XD, EVEX_CD8<64, CD8VT1>;
7603 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7604 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7605 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7607 multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7608 X86VectorVTInfo DstVT, SDNode OpNode,
7609 X86FoldableSchedWrite sched,
7611 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7612 let isCodeGenOnly = 1 in {
7613 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7614 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7615 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7616 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7617 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7618 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7619 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7620 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7622 } // Predicates = [HasAVX512]
7625 defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7626 lrint, WriteCvtSS2I,
7627 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7628 defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7629 llrint, WriteCvtSS2I,
7630 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7631 defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7632 lrint, WriteCvtSD2I,
7633 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7634 defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7635 llrint, WriteCvtSD2I,
7636 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7638 let Predicates = [HasAVX512] in {
7639 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7640 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7642 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7643 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7646 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7647 // which produce unnecessary vmovs{s,d} instructions
7648 let Predicates = [HasAVX512] in {
7649 def : Pat<(v4f32 (X86Movss
7650 (v4f32 VR128X:$dst),
7651 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7652 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7654 def : Pat<(v4f32 (X86Movss
7655 (v4f32 VR128X:$dst),
7656 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7657 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7659 def : Pat<(v4f32 (X86Movss
7660 (v4f32 VR128X:$dst),
7661 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7662 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7664 def : Pat<(v4f32 (X86Movss
7665 (v4f32 VR128X:$dst),
7666 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7667 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7669 def : Pat<(v2f64 (X86Movsd
7670 (v2f64 VR128X:$dst),
7671 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7672 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7674 def : Pat<(v2f64 (X86Movsd
7675 (v2f64 VR128X:$dst),
7676 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7677 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7679 def : Pat<(v2f64 (X86Movsd
7680 (v2f64 VR128X:$dst),
7681 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7682 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7684 def : Pat<(v2f64 (X86Movsd
7685 (v2f64 VR128X:$dst),
7686 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7687 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7689 def : Pat<(v4f32 (X86Movss
7690 (v4f32 VR128X:$dst),
7691 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7692 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7694 def : Pat<(v4f32 (X86Movss
7695 (v4f32 VR128X:$dst),
7696 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7697 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7699 def : Pat<(v4f32 (X86Movss
7700 (v4f32 VR128X:$dst),
7701 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7702 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7704 def : Pat<(v4f32 (X86Movss
7705 (v4f32 VR128X:$dst),
7706 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7707 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7709 def : Pat<(v2f64 (X86Movsd
7710 (v2f64 VR128X:$dst),
7711 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7712 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7714 def : Pat<(v2f64 (X86Movsd
7715 (v2f64 VR128X:$dst),
7716 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7717 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7719 def : Pat<(v2f64 (X86Movsd
7720 (v2f64 VR128X:$dst),
7721 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7722 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7724 def : Pat<(v2f64 (X86Movsd
7725 (v2f64 VR128X:$dst),
7726 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7727 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7728 } // Predicates = [HasAVX512]
7730 // Convert float/double to signed/unsigned int 32/64 with truncation
7731 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7732 X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7733 SDNode OpNodeInt, SDNode OpNodeSAE,
7734 X86FoldableSchedWrite sched, string aliasStr,
7735 Predicate prd = HasAVX512> {
7736 let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7737 let isCodeGenOnly = 1 in {
7738 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7739 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7740 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7741 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7742 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7743 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7744 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7745 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7748 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7749 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7750 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7751 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7752 let Uses = [MXCSR] in
7753 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7754 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7755 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7756 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7757 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7758 (ins _SrcRC.IntScalarMemOp:$src),
7759 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7760 [(set _DstRC.RC:$dst,
7761 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7762 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7763 } // Predicates = [prd]
7765 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7766 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7767 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7768 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7769 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7770 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7771 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7774 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7775 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7776 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7777 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7778 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7779 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7780 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7781 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7782 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7783 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7784 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7785 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7787 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7788 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7789 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7790 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7791 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7792 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7793 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7794 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7795 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7796 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7797 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7798 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7800 //===----------------------------------------------------------------------===//
7801 // AVX-512 Convert form float to double and back
7802 //===----------------------------------------------------------------------===//
7804 let Uses = [MXCSR], mayRaiseFPException = 1 in
7805 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7806 X86VectorVTInfo _Src, SDNode OpNode,
7807 X86FoldableSchedWrite sched> {
7808 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7809 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7810 "$src2, $src1", "$src1, $src2",
7811 (_.VT (OpNode (_.VT _.RC:$src1),
7812 (_Src.VT _Src.RC:$src2)))>,
7813 EVEX_4V, VEX_LIG, Sched<[sched]>;
7814 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7815 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7816 "$src2, $src1", "$src1, $src2",
7817 (_.VT (OpNode (_.VT _.RC:$src1),
7818 (_Src.ScalarIntMemFrags addr:$src2)))>,
7820 Sched<[sched.Folded, sched.ReadAfterFold]>;
7822 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7823 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7824 (ins _.FRC:$src1, _Src.FRC:$src2),
7825 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7826 EVEX_4V, VEX_LIG, Sched<[sched]>;
7828 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7829 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7830 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7831 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7835 // Scalar Conversion with SAE - suppress all exceptions
7836 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7837 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7838 X86FoldableSchedWrite sched> {
7839 let Uses = [MXCSR] in
7840 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7841 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7842 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7843 (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7844 (_Src.VT _Src.RC:$src2)))>,
7845 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7848 // Scalar Conversion with rounding control (RC)
7849 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7850 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7851 X86FoldableSchedWrite sched> {
7852 let Uses = [MXCSR] in
7853 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7854 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7855 "$rc, $src2, $src1", "$src1, $src2, $rc",
7856 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7857 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7858 EVEX_4V, VEX_LIG, Sched<[sched]>,
7861 multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7862 SDNode OpNode, SDNode OpNodeRnd,
7863 X86FoldableSchedWrite sched,
7864 X86VectorVTInfo _src, X86VectorVTInfo _dst,
7865 Predicate prd = HasAVX512> {
7866 let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7867 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7868 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7869 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7873 multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7874 SDNode OpNode, SDNode OpNodeSAE,
7875 X86FoldableSchedWrite sched,
7876 X86VectorVTInfo _src, X86VectorVTInfo _dst,
7877 Predicate prd = HasAVX512> {
7878 let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7879 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7880 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7881 EVEX_CD8<_src.EltSize, CD8VT1>;
7884 defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7885 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7886 f32x_info>, XD, VEX_W;
7887 defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7888 X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7890 defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7891 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7892 f16x_info, HasFP16>, T_MAP5XD, VEX_W;
7893 defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7894 X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7895 f64x_info, HasFP16>, T_MAP5XS;
7896 defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7897 X86froundsRnd, WriteCvtSD2SS, f32x_info,
7898 f16x_info, HasFP16>, T_MAP5PS;
7899 defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7900 X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7901 f32x_info, HasFP16>, T_MAP6PS;
7903 def : Pat<(f64 (any_fpextend FR32X:$src)),
7904 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7905 Requires<[HasAVX512]>;
7906 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7907 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7908 Requires<[HasAVX512, OptForSize]>;
7910 def : Pat<(f32 (any_fpround FR64X:$src)),
7911 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7912 Requires<[HasAVX512]>;
7914 def : Pat<(f32 (any_fpextend FR16X:$src)),
7915 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7916 Requires<[HasFP16]>;
7917 def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7918 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7919 Requires<[HasFP16, OptForSize]>;
7921 def : Pat<(f64 (any_fpextend FR16X:$src)),
7922 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7923 Requires<[HasFP16]>;
7924 def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7925 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7926 Requires<[HasFP16, OptForSize]>;
7928 def : Pat<(f16 (any_fpround FR32X:$src)),
7929 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7930 Requires<[HasFP16]>;
7931 def : Pat<(f16 (any_fpround FR64X:$src)),
7932 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7933 Requires<[HasFP16]>;
7935 def : Pat<(v4f32 (X86Movss
7936 (v4f32 VR128X:$dst),
7937 (v4f32 (scalar_to_vector
7938 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7939 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7940 Requires<[HasAVX512]>;
7942 def : Pat<(v2f64 (X86Movsd
7943 (v2f64 VR128X:$dst),
7944 (v2f64 (scalar_to_vector
7945 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7946 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7947 Requires<[HasAVX512]>;
7949 //===----------------------------------------------------------------------===//
7950 // AVX-512 Vector convert from signed/unsigned integer to float/double
7951 // and from float/double to signed/unsigned integer
7952 //===----------------------------------------------------------------------===//
7954 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7955 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7956 X86FoldableSchedWrite sched,
7957 string Broadcast = _.BroadcastStr,
7958 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7959 RegisterClass MaskRC = _.KRCWM,
7960 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7961 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7962 let Uses = [MXCSR], mayRaiseFPException = 1 in {
7963 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7965 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7966 (ins MaskRC:$mask, _Src.RC:$src),
7967 OpcodeStr, "$src", "$src",
7968 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7969 (vselect_mask MaskRC:$mask,
7970 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7972 (vselect_mask MaskRC:$mask,
7973 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7975 EVEX, Sched<[sched]>;
7977 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7979 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7980 (ins MaskRC:$mask, MemOp:$src),
7981 OpcodeStr#Alias, "$src", "$src",
7983 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7984 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7985 EVEX, Sched<[sched.Folded]>;
7987 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7988 (ins _Src.ScalarMemOp:$src),
7989 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7990 (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7992 "${src}"#Broadcast, "${src}"#Broadcast,
7993 (_.VT (OpNode (_Src.VT
7994 (_Src.BroadcastLdFrag addr:$src))
7996 (vselect_mask MaskRC:$mask,
8000 (_Src.BroadcastLdFrag addr:$src)))),
8002 (vselect_mask MaskRC:$mask,
8006 (_Src.BroadcastLdFrag addr:$src)))),
8008 EVEX, EVEX_B, Sched<[sched.Folded]>;
8011 // Conversion with SAE - suppress all exceptions
8012 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8013 X86VectorVTInfo _Src, SDNode OpNodeSAE,
8014 X86FoldableSchedWrite sched> {
8015 let Uses = [MXCSR] in
8016 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8017 (ins _Src.RC:$src), OpcodeStr,
8018 "{sae}, $src", "$src, {sae}",
8019 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
8020 EVEX, EVEX_B, Sched<[sched]>;
8023 // Conversion with rounding control (RC)
8024 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8025 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
8026 X86FoldableSchedWrite sched> {
8027 let Uses = [MXCSR] in
8028 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8029 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
8030 "$rc, $src", "$src, $rc",
8031 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
8032 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8035 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
8036 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8037 X86VectorVTInfo _Src, SDPatternOperator OpNode,
8039 X86FoldableSchedWrite sched,
8040 string Broadcast = _.BroadcastStr,
8041 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8042 RegisterClass MaskRC = _.KRCWM>
8043 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
8044 Alias, MemOp, MaskRC,
8045 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
8046 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
8048 // Extend [Float to Double, Half to Float]
8049 multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
8050 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8051 X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
8052 let Predicates = [prd] in {
8053 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256,
8054 any_fpextend, fpextend, sched.ZMM>,
8055 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
8056 X86vfpextSAE, sched.ZMM>, EVEX_V512;
8058 let Predicates = [prd, HasVLX] in {
8059 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
8060 X86any_vfpext, X86vfpext, sched.XMM,
8061 _dst.info128.BroadcastStr,
8062 "", f64mem>, EVEX_V128;
8063 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
8064 any_fpextend, fpextend, sched.YMM>, EVEX_V256;
8068 // Truncate [Double to Float, Float to Half]
8069 multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
8070 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8071 X86SchedWriteWidths sched, Predicate prd = HasAVX512,
8072 PatFrag bcast128 = _src.info128.BroadcastLdFrag,
8073 PatFrag bcast256 = _src.info256.BroadcastLdFrag,
8074 PatFrag bcast512 = _src.info512.BroadcastLdFrag,
8075 PatFrag loadVT128 = _src.info128.LdFrag,
8076 PatFrag loadVT256 = _src.info256.LdFrag,
8077 PatFrag loadVT512 = _src.info512.LdFrag,
8078 RegisterClass maskRC128 = _src.info128.KRCWM,
8079 RegisterClass maskRC256 = _src.info256.KRCWM,
8080 RegisterClass maskRC512 = _src.info512.KRCWM> {
8081 let Predicates = [prd] in {
8082 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
8083 X86any_vfpround, X86vfpround, sched.ZMM>,
8084 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8085 X86vfproundRnd, sched.ZMM>, EVEX_V512;
8087 let Predicates = [prd, HasVLX] in {
8088 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
8089 null_frag, null_frag, sched.XMM,
8090 _src.info128.BroadcastStr, "{x}",
8091 f128mem, maskRC128>, EVEX_V128;
8092 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
8093 X86any_vfpround, X86vfpround,
8094 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
8096 // Special patterns to allow use of X86vmfpround for masking. Instruction
8097 // patterns have been disabled with null_frag.
8098 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
8099 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8100 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8102 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
8103 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8105 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
8107 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
8108 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8109 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
8111 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8112 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
8114 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
8116 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
8117 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8118 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8119 (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
8120 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8121 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8122 _dst.info128.ImmAllZerosV, maskRC128:$mask),
8123 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
8126 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8127 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8128 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8129 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8130 VK2WM:$mask, VR128X:$src), 0, "att">;
8131 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
8132 "$dst {${mask}} {z}, $src}",
8133 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8134 VK2WM:$mask, VR128X:$src), 0, "att">;
8135 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8136 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8137 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8138 "$dst {${mask}}, ${src}{1to2}}",
8139 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8140 VK2WM:$mask, f64mem:$src), 0, "att">;
8141 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8142 "$dst {${mask}} {z}, ${src}{1to2}}",
8143 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8144 VK2WM:$mask, f64mem:$src), 0, "att">;
8146 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8147 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8148 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8149 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8150 VK4WM:$mask, VR256X:$src), 0, "att">;
8151 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8152 "$dst {${mask}} {z}, $src}",
8153 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8154 VK4WM:$mask, VR256X:$src), 0, "att">;
8155 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8156 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8157 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8158 "$dst {${mask}}, ${src}{1to4}}",
8159 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8160 VK4WM:$mask, f64mem:$src), 0, "att">;
8161 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8162 "$dst {${mask}} {z}, ${src}{1to4}}",
8163 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8164 VK4WM:$mask, f64mem:$src), 0, "att">;
8167 defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
8168 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
8169 VEX_W, PD, EVEX_CD8<64, CD8VF>;
8170 defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
8171 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
8172 PS, EVEX_CD8<32, CD8VH>;
8174 // Extend Half to Double
8175 multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
8176 X86SchedWriteWidths sched> {
8177 let Predicates = [HasFP16] in {
8178 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
8179 any_fpextend, fpextend, sched.ZMM>,
8180 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
8181 X86vfpextSAE, sched.ZMM>, EVEX_V512;
8182 def : Pat<(v8f64 (extloadv8f16 addr:$src)),
8183 (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
8185 let Predicates = [HasFP16, HasVLX] in {
8186 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8187 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8189 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8190 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8195 // Truncate Double to Half
8196 multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8197 let Predicates = [HasFP16] in {
8198 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8199 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8200 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8201 X86vfproundRnd, sched.ZMM>, EVEX_V512;
8203 let Predicates = [HasFP16, HasVLX] in {
8204 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8205 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8207 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8208 null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8211 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8212 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8213 VR128X:$src), 0, "att">;
8214 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8215 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8216 VK2WM:$mask, VR128X:$src), 0, "att">;
8217 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8218 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8219 VK2WM:$mask, VR128X:$src), 0, "att">;
8220 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8221 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8222 i64mem:$src), 0, "att">;
8223 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8224 "$dst {${mask}}, ${src}{1to2}}",
8225 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8226 VK2WM:$mask, i64mem:$src), 0, "att">;
8227 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8228 "$dst {${mask}} {z}, ${src}{1to2}}",
8229 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8230 VK2WM:$mask, i64mem:$src), 0, "att">;
8232 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8233 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8234 VR256X:$src), 0, "att">;
8235 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8236 "$dst {${mask}}, $src}",
8237 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8238 VK4WM:$mask, VR256X:$src), 0, "att">;
8239 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8240 "$dst {${mask}} {z}, $src}",
8241 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8242 VK4WM:$mask, VR256X:$src), 0, "att">;
8243 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8244 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8245 i64mem:$src), 0, "att">;
8246 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8247 "$dst {${mask}}, ${src}{1to4}}",
8248 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8249 VK4WM:$mask, i64mem:$src), 0, "att">;
8250 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8251 "$dst {${mask}} {z}, ${src}{1to4}}",
8252 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8253 VK4WM:$mask, i64mem:$src), 0, "att">;
8255 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8256 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8257 VR512:$src), 0, "att">;
8258 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8259 "$dst {${mask}}, $src}",
8260 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8261 VK8WM:$mask, VR512:$src), 0, "att">;
8262 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8263 "$dst {${mask}} {z}, $src}",
8264 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8265 VK8WM:$mask, VR512:$src), 0, "att">;
8266 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8267 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8268 i64mem:$src), 0, "att">;
8269 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8270 "$dst {${mask}}, ${src}{1to8}}",
8271 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8272 VK8WM:$mask, i64mem:$src), 0, "att">;
8273 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8274 "$dst {${mask}} {z}, ${src}{1to8}}",
8275 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8276 VK8WM:$mask, i64mem:$src), 0, "att">;
8279 defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8280 avx512vl_f32_info, SchedWriteCvtPD2PS,
8281 HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
8282 defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8283 avx512vl_f16_info, SchedWriteCvtPS2PD,
8284 HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
8285 defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8286 VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
8287 defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8288 T_MAP5PS, EVEX_CD8<16, CD8VQ>;
8290 let Predicates = [HasFP16, HasVLX] in {
8291 // Special patterns to allow use of X86vmfpround for masking. Instruction
8292 // patterns have been disabled with null_frag.
8293 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8294 (VCVTPD2PHZ256rr VR256X:$src)>;
8295 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8297 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8298 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8300 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8302 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8303 (VCVTPD2PHZ256rm addr:$src)>;
8304 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8306 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8307 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8309 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8311 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8312 (VCVTPD2PHZ256rmb addr:$src)>;
8313 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8314 (v8f16 VR128X:$src0), VK4WM:$mask),
8315 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8316 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8317 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8318 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8320 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8321 (VCVTPD2PHZ128rr VR128X:$src)>;
8322 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8324 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8325 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8327 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8329 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8330 (VCVTPD2PHZ128rm addr:$src)>;
8331 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8333 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8334 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8336 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8338 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8339 (VCVTPD2PHZ128rmb addr:$src)>;
8340 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8341 (v8f16 VR128X:$src0), VK2WM:$mask),
8342 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8343 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8344 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8345 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8348 // Convert Signed/Unsigned Doubleword to Double
8349 let Uses = []<Register>, mayRaiseFPException = 0 in
8350 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8351 SDNode MaskOpNode, SDPatternOperator OpNode128,
8352 SDNode MaskOpNode128,
8353 X86SchedWriteWidths sched> {
8354 // No rounding in this op
8355 let Predicates = [HasAVX512] in
8356 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8357 MaskOpNode, sched.ZMM>, EVEX_V512;
8359 let Predicates = [HasVLX] in {
8360 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8361 OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8363 (v2f64 (OpNode128 (bc_v4i32
8365 (scalar_to_vector (loadi64 addr:$src)))))),
8366 (v2f64 (MaskOpNode128 (bc_v4i32
8368 (scalar_to_vector (loadi64 addr:$src))))))>,
8370 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8371 MaskOpNode, sched.YMM>, EVEX_V256;
8375 // Convert Signed/Unsigned Doubleword to Float
8376 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8377 SDNode MaskOpNode, SDNode OpNodeRnd,
8378 X86SchedWriteWidths sched> {
8379 let Predicates = [HasAVX512] in
8380 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8381 MaskOpNode, sched.ZMM>,
8382 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8383 OpNodeRnd, sched.ZMM>, EVEX_V512;
8385 let Predicates = [HasVLX] in {
8386 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8387 MaskOpNode, sched.XMM>, EVEX_V128;
8388 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8389 MaskOpNode, sched.YMM>, EVEX_V256;
8393 // Convert Float to Signed/Unsigned Doubleword with truncation
8394 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8396 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8397 let Predicates = [HasAVX512] in {
8398 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8399 MaskOpNode, sched.ZMM>,
8400 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8401 OpNodeSAE, sched.ZMM>, EVEX_V512;
8403 let Predicates = [HasVLX] in {
8404 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8405 MaskOpNode, sched.XMM>, EVEX_V128;
8406 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8407 MaskOpNode, sched.YMM>, EVEX_V256;
8411 // Convert Float to Signed/Unsigned Doubleword
8412 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8413 SDNode MaskOpNode, SDNode OpNodeRnd,
8414 X86SchedWriteWidths sched> {
8415 let Predicates = [HasAVX512] in {
8416 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8417 MaskOpNode, sched.ZMM>,
8418 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8419 OpNodeRnd, sched.ZMM>, EVEX_V512;
8421 let Predicates = [HasVLX] in {
8422 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8423 MaskOpNode, sched.XMM>, EVEX_V128;
8424 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8425 MaskOpNode, sched.YMM>, EVEX_V256;
8429 // Convert Double to Signed/Unsigned Doubleword with truncation
8430 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8431 SDNode MaskOpNode, SDNode OpNodeSAE,
8432 X86SchedWriteWidths sched> {
8433 let Predicates = [HasAVX512] in {
8434 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8435 MaskOpNode, sched.ZMM>,
8436 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8437 OpNodeSAE, sched.ZMM>, EVEX_V512;
8439 let Predicates = [HasVLX] in {
8440 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8441 // memory forms of these instructions in Asm Parser. They have the same
8442 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8443 // due to the same reason.
8444 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8445 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8447 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8448 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8451 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8452 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8453 VR128X:$src), 0, "att">;
8454 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8455 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8456 VK2WM:$mask, VR128X:$src), 0, "att">;
8457 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8458 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8459 VK2WM:$mask, VR128X:$src), 0, "att">;
8460 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8461 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8462 f64mem:$src), 0, "att">;
8463 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8464 "$dst {${mask}}, ${src}{1to2}}",
8465 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8466 VK2WM:$mask, f64mem:$src), 0, "att">;
8467 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8468 "$dst {${mask}} {z}, ${src}{1to2}}",
8469 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8470 VK2WM:$mask, f64mem:$src), 0, "att">;
8472 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8473 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8474 VR256X:$src), 0, "att">;
8475 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8476 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8477 VK4WM:$mask, VR256X:$src), 0, "att">;
8478 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8479 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8480 VK4WM:$mask, VR256X:$src), 0, "att">;
8481 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8482 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8483 f64mem:$src), 0, "att">;
8484 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8485 "$dst {${mask}}, ${src}{1to4}}",
8486 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8487 VK4WM:$mask, f64mem:$src), 0, "att">;
8488 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8489 "$dst {${mask}} {z}, ${src}{1to4}}",
8490 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8491 VK4WM:$mask, f64mem:$src), 0, "att">;
8494 // Convert Double to Signed/Unsigned Doubleword
8495 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8496 SDNode MaskOpNode, SDNode OpNodeRnd,
8497 X86SchedWriteWidths sched> {
8498 let Predicates = [HasAVX512] in {
8499 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8500 MaskOpNode, sched.ZMM>,
8501 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8502 OpNodeRnd, sched.ZMM>, EVEX_V512;
8504 let Predicates = [HasVLX] in {
8505 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8506 // memory forms of these instructions in Asm Parcer. They have the same
8507 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8508 // due to the same reason.
8509 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8510 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8512 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8513 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8516 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8517 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8518 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8519 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8520 VK2WM:$mask, VR128X:$src), 0, "att">;
8521 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8522 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8523 VK2WM:$mask, VR128X:$src), 0, "att">;
8524 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8525 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8526 f64mem:$src), 0, "att">;
8527 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8528 "$dst {${mask}}, ${src}{1to2}}",
8529 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8530 VK2WM:$mask, f64mem:$src), 0, "att">;
8531 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8532 "$dst {${mask}} {z}, ${src}{1to2}}",
8533 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8534 VK2WM:$mask, f64mem:$src), 0, "att">;
8536 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8537 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8538 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8539 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8540 VK4WM:$mask, VR256X:$src), 0, "att">;
8541 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8542 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8543 VK4WM:$mask, VR256X:$src), 0, "att">;
8544 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8545 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8546 f64mem:$src), 0, "att">;
8547 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8548 "$dst {${mask}}, ${src}{1to4}}",
8549 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8550 VK4WM:$mask, f64mem:$src), 0, "att">;
8551 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8552 "$dst {${mask}} {z}, ${src}{1to4}}",
8553 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8554 VK4WM:$mask, f64mem:$src), 0, "att">;
8557 // Convert Double to Signed/Unsigned Quardword
8558 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8559 SDNode MaskOpNode, SDNode OpNodeRnd,
8560 X86SchedWriteWidths sched> {
8561 let Predicates = [HasDQI] in {
8562 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8563 MaskOpNode, sched.ZMM>,
8564 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8565 OpNodeRnd, sched.ZMM>, EVEX_V512;
8567 let Predicates = [HasDQI, HasVLX] in {
8568 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8569 MaskOpNode, sched.XMM>, EVEX_V128;
8570 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8571 MaskOpNode, sched.YMM>, EVEX_V256;
8575 // Convert Double to Signed/Unsigned Quardword with truncation
8576 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8577 SDNode MaskOpNode, SDNode OpNodeRnd,
8578 X86SchedWriteWidths sched> {
8579 let Predicates = [HasDQI] in {
8580 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8581 MaskOpNode, sched.ZMM>,
8582 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8583 OpNodeRnd, sched.ZMM>, EVEX_V512;
8585 let Predicates = [HasDQI, HasVLX] in {
8586 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8587 MaskOpNode, sched.XMM>, EVEX_V128;
8588 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8589 MaskOpNode, sched.YMM>, EVEX_V256;
8593 // Convert Signed/Unsigned Quardword to Double
8594 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8595 SDNode MaskOpNode, SDNode OpNodeRnd,
8596 X86SchedWriteWidths sched> {
8597 let Predicates = [HasDQI] in {
8598 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8599 MaskOpNode, sched.ZMM>,
8600 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8601 OpNodeRnd, sched.ZMM>, EVEX_V512;
8603 let Predicates = [HasDQI, HasVLX] in {
8604 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8605 MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8606 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8607 MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8611 // Convert Float to Signed/Unsigned Quardword
8612 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8613 SDNode MaskOpNode, SDNode OpNodeRnd,
8614 X86SchedWriteWidths sched> {
8615 let Predicates = [HasDQI] in {
8616 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8617 MaskOpNode, sched.ZMM>,
8618 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8619 OpNodeRnd, sched.ZMM>, EVEX_V512;
8621 let Predicates = [HasDQI, HasVLX] in {
8622 // Explicitly specified broadcast string, since we take only 2 elements
8623 // from v4f32x_info source
8624 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8625 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8626 (v2i64 (OpNode (bc_v4f32
8628 (scalar_to_vector (loadf64 addr:$src)))))),
8629 (v2i64 (MaskOpNode (bc_v4f32
8631 (scalar_to_vector (loadf64 addr:$src))))))>,
8633 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8634 MaskOpNode, sched.YMM>, EVEX_V256;
8638 // Convert Float to Signed/Unsigned Quardword with truncation
8639 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8640 SDNode MaskOpNode, SDNode OpNodeRnd,
8641 X86SchedWriteWidths sched> {
8642 let Predicates = [HasDQI] in {
8643 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8644 MaskOpNode, sched.ZMM>,
8645 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8646 OpNodeRnd, sched.ZMM>, EVEX_V512;
8648 let Predicates = [HasDQI, HasVLX] in {
8649 // Explicitly specified broadcast string, since we take only 2 elements
8650 // from v4f32x_info source
8651 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8652 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8653 (v2i64 (OpNode (bc_v4f32
8655 (scalar_to_vector (loadf64 addr:$src)))))),
8656 (v2i64 (MaskOpNode (bc_v4f32
8658 (scalar_to_vector (loadf64 addr:$src))))))>,
8660 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8661 MaskOpNode, sched.YMM>, EVEX_V256;
8665 // Convert Signed/Unsigned Quardword to Float
8666 // Also Convert Signed/Unsigned Doubleword to Half
8667 multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8668 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8669 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8670 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8671 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8672 let Predicates = [prd] in {
8673 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8674 MaskOpNode, sched.ZMM>,
8675 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8676 OpNodeRnd, sched.ZMM>, EVEX_V512;
8678 let Predicates = [prd, HasVLX] in {
8679 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8680 // memory forms of these instructions in Asm Parcer. They have the same
8681 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8682 // due to the same reason.
8683 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8684 null_frag, sched.XMM, _src.info128.BroadcastStr,
8685 "{x}", i128mem, _src.info128.KRCWM>,
8686 EVEX_V128, NotEVEX2VEXConvertible;
8687 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8688 MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8690 NotEVEX2VEXConvertible;
8692 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8693 // patterns have been disabled with null_frag.
8694 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8695 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8696 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8697 _src.info128.KRCWM:$mask),
8698 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8699 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8700 _src.info128.KRCWM:$mask),
8701 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8703 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8704 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8705 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8706 _src.info128.KRCWM:$mask),
8707 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8708 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8709 _src.info128.KRCWM:$mask),
8710 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8712 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8713 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8714 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8715 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8716 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8717 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8718 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8719 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8722 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8723 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8724 VR128X:$src), 0, "att">;
8725 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8726 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8727 VK2WM:$mask, VR128X:$src), 0, "att">;
8728 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8729 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8730 VK2WM:$mask, VR128X:$src), 0, "att">;
8731 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8732 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8733 i64mem:$src), 0, "att">;
8734 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8735 "$dst {${mask}}, ${src}{1to2}}",
8736 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8737 VK2WM:$mask, i64mem:$src), 0, "att">;
8738 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8739 "$dst {${mask}} {z}, ${src}{1to2}}",
8740 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8741 VK2WM:$mask, i64mem:$src), 0, "att">;
8743 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8744 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8745 VR256X:$src), 0, "att">;
8746 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8747 "$dst {${mask}}, $src}",
8748 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8749 VK4WM:$mask, VR256X:$src), 0, "att">;
8750 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8751 "$dst {${mask}} {z}, $src}",
8752 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8753 VK4WM:$mask, VR256X:$src), 0, "att">;
8754 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8755 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8756 i64mem:$src), 0, "att">;
8757 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8758 "$dst {${mask}}, ${src}{1to4}}",
8759 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8760 VK4WM:$mask, i64mem:$src), 0, "att">;
8761 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8762 "$dst {${mask}} {z}, ${src}{1to4}}",
8763 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8764 VK4WM:$mask, i64mem:$src), 0, "att">;
8767 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8768 X86any_VSintToFP, X86VSintToFP,
8769 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8771 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8772 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8773 PS, EVEX_CD8<32, CD8VF>;
8775 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8776 X86cvttp2si, X86cvttp2siSAE,
8777 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8779 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8780 X86cvttp2si, X86cvttp2siSAE,
8781 SchedWriteCvtPD2DQ>,
8782 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8784 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8785 X86cvttp2ui, X86cvttp2uiSAE,
8786 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8788 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8789 X86cvttp2ui, X86cvttp2uiSAE,
8790 SchedWriteCvtPD2DQ>,
8791 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8793 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8794 uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8795 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8797 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8798 uint_to_fp, X86VUintToFpRnd,
8799 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8801 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8802 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8803 EVEX_CD8<32, CD8VF>;
8805 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8806 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8807 VEX_W, EVEX_CD8<64, CD8VF>;
8809 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8810 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8811 PS, EVEX_CD8<32, CD8VF>;
8813 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8814 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8815 PS, EVEX_CD8<64, CD8VF>;
8817 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8818 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8819 PD, EVEX_CD8<64, CD8VF>;
8821 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8822 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8823 EVEX_CD8<32, CD8VH>;
8825 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8826 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8827 PD, EVEX_CD8<64, CD8VF>;
8829 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8830 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8831 EVEX_CD8<32, CD8VH>;
8833 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8834 X86cvttp2si, X86cvttp2siSAE,
8835 SchedWriteCvtPD2DQ>, VEX_W,
8836 PD, EVEX_CD8<64, CD8VF>;
8838 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8839 X86cvttp2si, X86cvttp2siSAE,
8840 SchedWriteCvtPS2DQ>, PD,
8841 EVEX_CD8<32, CD8VH>;
8843 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8844 X86cvttp2ui, X86cvttp2uiSAE,
8845 SchedWriteCvtPD2DQ>, VEX_W,
8846 PD, EVEX_CD8<64, CD8VF>;
8848 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8849 X86cvttp2ui, X86cvttp2uiSAE,
8850 SchedWriteCvtPS2DQ>, PD,
8851 EVEX_CD8<32, CD8VH>;
8853 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8854 sint_to_fp, X86VSintToFpRnd,
8855 SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
8857 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8858 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8859 VEX_W, XS, EVEX_CD8<64, CD8VF>;
8861 defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8862 X86any_VSintToFP, X86VMSintToFP,
8863 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8864 SchedWriteCvtDQ2PS, HasFP16>,
8865 T_MAP5PS, EVEX_CD8<32, CD8VF>;
8867 defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8868 X86any_VUintToFP, X86VMUintToFP,
8869 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8870 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
8871 EVEX_CD8<32, CD8VF>;
8873 defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8874 X86any_VSintToFP, X86VMSintToFP,
8875 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8876 SchedWriteCvtDQ2PS>, VEX_W, PS,
8877 EVEX_CD8<64, CD8VF>;
8879 defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8880 X86any_VUintToFP, X86VMUintToFP,
8881 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8882 SchedWriteCvtDQ2PS>, VEX_W, XD,
8883 EVEX_CD8<64, CD8VF>;
8885 let Predicates = [HasVLX] in {
8886 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8887 // patterns have been disabled with null_frag.
8888 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8889 (VCVTPD2DQZ128rr VR128X:$src)>;
8890 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8892 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8893 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8895 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8897 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8898 (VCVTPD2DQZ128rm addr:$src)>;
8899 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8901 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8902 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8904 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8906 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8907 (VCVTPD2DQZ128rmb addr:$src)>;
8908 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8909 (v4i32 VR128X:$src0), VK2WM:$mask),
8910 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8911 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8912 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8913 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8915 // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8916 // patterns have been disabled with null_frag.
8917 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8918 (VCVTTPD2DQZ128rr VR128X:$src)>;
8919 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8921 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8922 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8924 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8926 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8927 (VCVTTPD2DQZ128rm addr:$src)>;
8928 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8930 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8931 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8933 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8935 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8936 (VCVTTPD2DQZ128rmb addr:$src)>;
8937 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8938 (v4i32 VR128X:$src0), VK2WM:$mask),
8939 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8940 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8941 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8942 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8944 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8945 // patterns have been disabled with null_frag.
8946 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8947 (VCVTPD2UDQZ128rr VR128X:$src)>;
8948 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8950 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8951 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8953 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8955 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8956 (VCVTPD2UDQZ128rm addr:$src)>;
8957 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8959 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8960 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8962 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8964 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8965 (VCVTPD2UDQZ128rmb addr:$src)>;
8966 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8967 (v4i32 VR128X:$src0), VK2WM:$mask),
8968 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8969 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8970 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8971 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8973 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8974 // patterns have been disabled with null_frag.
8975 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8976 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8977 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8979 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8980 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8982 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8984 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8985 (VCVTTPD2UDQZ128rm addr:$src)>;
8986 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8988 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8989 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8991 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8993 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8994 (VCVTTPD2UDQZ128rmb addr:$src)>;
8995 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8996 (v4i32 VR128X:$src0), VK2WM:$mask),
8997 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8998 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8999 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9000 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9003 let Predicates = [HasDQI, HasVLX] in {
9004 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9005 (VCVTPS2QQZ128rm addr:$src)>;
9006 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9007 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9009 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9010 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9011 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9012 v2i64x_info.ImmAllZerosV)),
9013 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9015 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9016 (VCVTPS2UQQZ128rm addr:$src)>;
9017 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9018 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9020 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9021 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9022 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9023 v2i64x_info.ImmAllZerosV)),
9024 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9026 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9027 (VCVTTPS2QQZ128rm addr:$src)>;
9028 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9029 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9031 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9032 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9033 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9034 v2i64x_info.ImmAllZerosV)),
9035 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9037 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9038 (VCVTTPS2UQQZ128rm addr:$src)>;
9039 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9040 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9042 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9043 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9044 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9045 v2i64x_info.ImmAllZerosV)),
9046 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9049 let Predicates = [HasVLX] in {
9050 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9051 (VCVTDQ2PDZ128rm addr:$src)>;
9052 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9053 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9055 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9056 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9057 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9058 v2f64x_info.ImmAllZerosV)),
9059 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9061 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9062 (VCVTUDQ2PDZ128rm addr:$src)>;
9063 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9064 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9066 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9067 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9068 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9069 v2f64x_info.ImmAllZerosV)),
9070 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9073 //===----------------------------------------------------------------------===//
9074 // Half precision conversion instructions
9075 //===----------------------------------------------------------------------===//
9077 let Uses = [MXCSR], mayRaiseFPException = 1 in
9078 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9079 X86MemOperand x86memop, dag ld_dag,
9080 X86FoldableSchedWrite sched> {
9081 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
9082 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
9083 (X86any_cvtph2ps (_src.VT _src.RC:$src)),
9084 (X86cvtph2ps (_src.VT _src.RC:$src))>,
9085 T8PD, Sched<[sched]>;
9086 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
9087 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
9088 (X86any_cvtph2ps (_src.VT ld_dag)),
9089 (X86cvtph2ps (_src.VT ld_dag))>,
9090 T8PD, Sched<[sched.Folded]>;
9093 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9094 X86FoldableSchedWrite sched> {
9095 let Uses = [MXCSR] in
9096 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
9097 (ins _src.RC:$src), "vcvtph2ps",
9098 "{sae}, $src", "$src, {sae}",
9099 (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
9100 T8PD, EVEX_B, Sched<[sched]>;
9103 let Predicates = [HasAVX512] in
9104 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
9105 (load addr:$src), WriteCvtPH2PSZ>,
9106 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
9107 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9109 let Predicates = [HasVLX] in {
9110 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
9111 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
9112 EVEX_CD8<32, CD8VH>;
9113 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
9114 (bitconvert (v2i64 (X86vzload64 addr:$src))),
9115 WriteCvtPH2PS>, EVEX, EVEX_V128,
9116 EVEX_CD8<32, CD8VH>;
9118 // Pattern match vcvtph2ps of a scalar i64 load.
9119 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
9120 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
9121 (VCVTPH2PSZ128rm addr:$src)>;
9124 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9125 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
9126 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9127 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9128 (ins _src.RC:$src1, i32u8imm:$src2),
9129 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9130 [(set _dest.RC:$dst,
9131 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9133 let Constraints = "$src0 = $dst" in
9134 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9135 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9136 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
9137 [(set _dest.RC:$dst,
9138 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9139 _dest.RC:$src0, _src.KRCWM:$mask))]>,
9140 Sched<[RR]>, EVEX_K;
9141 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9142 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9143 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
9144 [(set _dest.RC:$dst,
9145 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9146 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9147 Sched<[RR]>, EVEX_KZ;
9148 let hasSideEffects = 0, mayStore = 1 in {
9149 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
9150 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
9151 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9153 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9154 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9155 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9156 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
9161 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9163 let hasSideEffects = 0, Uses = [MXCSR] in
9164 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
9165 (outs _dest.RC:$dst),
9166 (ins _src.RC:$src1, i32u8imm:$src2),
9167 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
9168 EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
9171 let Predicates = [HasAVX512] in {
9172 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9173 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9174 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9175 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9177 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9178 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9181 let Predicates = [HasVLX] in {
9182 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9183 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9184 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9185 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9186 WriteCvtPS2PH, WriteCvtPS2PHSt>,
9187 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9189 def : Pat<(store (f64 (extractelt
9190 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9191 (iPTR 0))), addr:$dst),
9192 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9193 def : Pat<(store (i64 (extractelt
9194 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9195 (iPTR 0))), addr:$dst),
9196 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9197 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9198 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9201 // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9202 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9203 string OpcodeStr, Domain d,
9204 X86FoldableSchedWrite sched = WriteFComX> {
9205 let hasSideEffects = 0, Uses = [MXCSR] in
9206 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9207 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9208 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9211 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9212 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9213 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9214 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9215 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9216 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9217 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9218 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9219 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9222 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9223 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9224 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9225 EVEX_CD8<32, CD8VT1>;
9226 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9227 "ucomisd", SSEPackedDouble>, PD, EVEX,
9228 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9229 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9230 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9231 EVEX_CD8<32, CD8VT1>;
9232 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9233 "comisd", SSEPackedDouble>, PD, EVEX,
9234 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9235 let isCodeGenOnly = 1 in {
9236 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9237 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9238 EVEX_CD8<32, CD8VT1>;
9239 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9240 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
9241 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9243 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9244 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9245 EVEX_CD8<32, CD8VT1>;
9246 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9247 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
9248 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9252 let Defs = [EFLAGS], Predicates = [HasFP16] in {
9253 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9254 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9255 EVEX_CD8<16, CD8VT1>;
9256 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9257 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9258 EVEX_CD8<16, CD8VT1>;
9259 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9260 "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
9261 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9262 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9263 "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
9264 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9265 let isCodeGenOnly = 1 in {
9266 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9267 sse_load_f16, "ucomish", SSEPackedSingle>,
9268 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9270 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9271 sse_load_f16, "comish", SSEPackedSingle>,
9272 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9276 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9277 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9278 X86FoldableSchedWrite sched, X86VectorVTInfo _,
9279 Predicate prd = HasAVX512> {
9280 let Predicates = [prd], ExeDomain = _.ExeDomain in {
9281 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9282 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9283 "$src2, $src1", "$src1, $src2",
9284 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9285 EVEX_4V, VEX_LIG, Sched<[sched]>;
9286 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9287 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9288 "$src2, $src1", "$src1, $src2",
9289 (OpNode (_.VT _.RC:$src1),
9290 (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
9291 Sched<[sched.Folded, sched.ReadAfterFold]>;
9295 defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9296 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9298 defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9299 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9300 EVEX_CD8<16, CD8VT1>, T_MAP6PD;
9301 let Uses = [MXCSR] in {
9302 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9303 f32x_info>, EVEX_CD8<32, CD8VT1>,
9305 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9306 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
9308 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9309 SchedWriteFRsqrt.Scl, f32x_info>,
9310 EVEX_CD8<32, CD8VT1>, T8PD;
9311 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9312 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
9313 EVEX_CD8<64, CD8VT1>, T8PD;
9316 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9317 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9318 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9319 let ExeDomain = _.ExeDomain in {
9320 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9321 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9322 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
9324 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9325 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9327 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
9328 Sched<[sched.Folded, sched.ReadAfterFold]>;
9329 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9330 (ins _.ScalarMemOp:$src), OpcodeStr,
9331 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9333 (_.BroadcastLdFrag addr:$src)))>,
9334 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9338 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9339 X86SchedWriteWidths sched> {
9340 let Uses = [MXCSR] in {
9341 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9342 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9343 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9344 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9346 let Predicates = [HasFP16] in
9347 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9348 v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9350 // Define only if AVX512VL feature is present.
9351 let Predicates = [HasVLX], Uses = [MXCSR] in {
9352 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9353 OpNode, sched.XMM, v4f32x_info>,
9354 EVEX_V128, EVEX_CD8<32, CD8VF>;
9355 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9356 OpNode, sched.YMM, v8f32x_info>,
9357 EVEX_V256, EVEX_CD8<32, CD8VF>;
9358 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9359 OpNode, sched.XMM, v2f64x_info>,
9360 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
9361 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9362 OpNode, sched.YMM, v4f64x_info>,
9363 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
9365 let Predicates = [HasFP16, HasVLX] in {
9366 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9367 OpNode, sched.XMM, v8f16x_info>,
9368 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9369 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9370 OpNode, sched.YMM, v16f16x_info>,
9371 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9375 defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9376 defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9378 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9379 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9380 SDNode OpNode, SDNode OpNodeSAE,
9381 X86FoldableSchedWrite sched> {
9382 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9383 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9384 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9385 "$src2, $src1", "$src1, $src2",
9386 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9387 Sched<[sched]>, SIMD_EXC;
9389 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9390 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9391 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9392 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9393 EVEX_B, Sched<[sched]>;
9395 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9396 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9397 "$src2, $src1", "$src1, $src2",
9398 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9399 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9403 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9404 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9405 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9406 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
9407 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9408 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V;
9411 multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9412 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9413 let Predicates = [HasFP16] in
9414 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>,
9415 EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
9418 let Predicates = [HasERI] in {
9419 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9420 SchedWriteFRcp.Scl>;
9421 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9422 SchedWriteFRsqrt.Scl>;
9425 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9426 SchedWriteFRnd.Scl>,
9427 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9428 SchedWriteFRnd.Scl>;
9429 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9431 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9432 SDNode OpNode, X86FoldableSchedWrite sched> {
9433 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9434 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9435 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9436 (OpNode (_.VT _.RC:$src))>,
9439 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9440 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9442 (bitconvert (_.LdFrag addr:$src))))>,
9443 Sched<[sched.Folded, sched.ReadAfterFold]>;
9445 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9446 (ins _.ScalarMemOp:$src), OpcodeStr,
9447 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9449 (_.BroadcastLdFrag addr:$src)))>,
9450 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9453 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9454 SDNode OpNode, X86FoldableSchedWrite sched> {
9455 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9456 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9457 (ins _.RC:$src), OpcodeStr,
9458 "{sae}, $src", "$src, {sae}",
9459 (OpNode (_.VT _.RC:$src))>,
9460 EVEX_B, Sched<[sched]>;
9463 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9464 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9465 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9466 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9467 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9468 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9469 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9470 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9473 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9474 SDNode OpNode, X86SchedWriteWidths sched> {
9475 // Define only if AVX512VL feature is present.
9476 let Predicates = [HasVLX] in {
9477 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9479 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
9480 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9482 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
9483 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9485 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9486 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9488 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9492 multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9493 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9494 let Predicates = [HasFP16] in
9495 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9496 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9497 T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9498 let Predicates = [HasFP16, HasVLX] in {
9499 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9500 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9501 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9502 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9505 let Predicates = [HasERI] in {
9506 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9507 SchedWriteFRsqrt>, EVEX;
9508 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9509 SchedWriteFRcp>, EVEX;
9510 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9511 SchedWriteFAdd>, EVEX;
9513 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9515 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9517 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9518 SchedWriteFRnd>, EVEX;
9520 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9521 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9522 let ExeDomain = _.ExeDomain in
9523 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9524 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9525 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9526 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9529 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9530 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9531 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9532 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9533 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9534 (_.VT (any_fsqrt _.RC:$src)),
9535 (_.VT (fsqrt _.RC:$src))>, EVEX,
9537 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9538 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9539 (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9540 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9541 Sched<[sched.Folded, sched.ReadAfterFold]>;
9542 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9543 (ins _.ScalarMemOp:$src), OpcodeStr,
9544 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9545 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9546 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9547 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9551 let Uses = [MXCSR], mayRaiseFPException = 1 in
9552 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9553 X86SchedWriteSizes sched> {
9554 let Predicates = [HasFP16] in
9555 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9556 sched.PH.ZMM, v32f16_info>,
9557 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9558 let Predicates = [HasFP16, HasVLX] in {
9559 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9560 sched.PH.XMM, v8f16x_info>,
9561 EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9562 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9563 sched.PH.YMM, v16f16x_info>,
9564 EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9566 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9567 sched.PS.ZMM, v16f32_info>,
9568 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9569 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9570 sched.PD.ZMM, v8f64_info>,
9571 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9572 // Define only if AVX512VL feature is present.
9573 let Predicates = [HasVLX] in {
9574 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9575 sched.PS.XMM, v4f32x_info>,
9576 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
9577 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9578 sched.PS.YMM, v8f32x_info>,
9579 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
9580 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9581 sched.PD.XMM, v2f64x_info>,
9582 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9583 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9584 sched.PD.YMM, v4f64x_info>,
9585 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9589 let Uses = [MXCSR] in
9590 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9591 X86SchedWriteSizes sched> {
9592 let Predicates = [HasFP16] in
9593 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9594 sched.PH.ZMM, v32f16_info>,
9595 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9596 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9597 sched.PS.ZMM, v16f32_info>,
9598 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9599 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9600 sched.PD.ZMM, v8f64_info>,
9601 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9604 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9605 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9606 let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9607 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9608 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9609 "$src2, $src1", "$src1, $src2",
9610 (X86fsqrts (_.VT _.RC:$src1),
9611 (_.VT _.RC:$src2))>,
9612 Sched<[sched]>, SIMD_EXC;
9613 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9614 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9615 "$src2, $src1", "$src1, $src2",
9616 (X86fsqrts (_.VT _.RC:$src1),
9617 (_.ScalarIntMemFrags addr:$src2))>,
9618 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9619 let Uses = [MXCSR] in
9620 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9621 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9622 "$rc, $src2, $src1", "$src1, $src2, $rc",
9623 (X86fsqrtRnds (_.VT _.RC:$src1),
9626 EVEX_B, EVEX_RC, Sched<[sched]>;
9628 let isCodeGenOnly = 1, hasSideEffects = 0 in {
9629 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9630 (ins _.FRC:$src1, _.FRC:$src2),
9631 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9632 Sched<[sched]>, SIMD_EXC;
9634 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9635 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9636 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9637 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9641 let Predicates = [prd] in {
9642 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9643 (!cast<Instruction>(Name#Zr)
9644 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9647 let Predicates = [prd, OptForSize] in {
9648 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9649 (!cast<Instruction>(Name#Zm)
9650 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9654 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9655 X86SchedWriteSizes sched> {
9656 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9657 EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
9658 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9659 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9660 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9661 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9664 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9665 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9667 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9669 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9670 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9671 let ExeDomain = _.ExeDomain in {
9672 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9673 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9674 "$src3, $src2, $src1", "$src1, $src2, $src3",
9675 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9676 (i32 timm:$src3)))>,
9677 Sched<[sched]>, SIMD_EXC;
9679 let Uses = [MXCSR] in
9680 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9681 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9682 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9683 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9684 (i32 timm:$src3)))>, EVEX_B,
9687 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9688 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9690 "$src3, $src2, $src1", "$src1, $src2, $src3",
9691 (_.VT (X86RndScales _.RC:$src1,
9692 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9693 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9695 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9696 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9697 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9698 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9699 []>, Sched<[sched]>, SIMD_EXC;
9702 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9703 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9704 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9705 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9709 let Predicates = [HasAVX512] in {
9710 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9711 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9712 _.FRC:$src1, timm:$src2))>;
9715 let Predicates = [HasAVX512, OptForSize] in {
9716 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9717 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9718 addr:$src1, timm:$src2))>;
9722 let Predicates = [HasFP16] in
9723 defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9724 SchedWriteFRnd.Scl, f16x_info>,
9725 AVX512PSIi8Base, TA, EVEX_4V,
9726 EVEX_CD8<16, CD8VT1>;
9728 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9729 SchedWriteFRnd.Scl, f32x_info>,
9730 AVX512AIi8Base, EVEX_4V, VEX_LIG,
9731 EVEX_CD8<32, CD8VT1>;
9733 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9734 SchedWriteFRnd.Scl, f64x_info>,
9735 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9736 EVEX_CD8<64, CD8VT1>;
9738 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9739 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9740 dag OutMask, Predicate BasePredicate> {
9741 let Predicates = [BasePredicate] in {
9742 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9743 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9744 (extractelt _.VT:$dst, (iPTR 0))))),
9745 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9746 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9748 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9749 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9751 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9752 OutMask, _.VT:$src2, _.VT:$src1)>;
9756 defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9757 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9758 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>;
9759 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9760 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9761 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9762 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9763 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9764 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9767 //-------------------------------------------------
9768 // Integer truncate and extend operations
9769 //-------------------------------------------------
9771 // PatFrags that contain a select and a truncate op. The take operands in the
9772 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9773 // either to the multiclasses.
9774 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9775 (vselect_mask node:$mask,
9776 (trunc node:$src), node:$src0)>;
9777 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9778 (vselect_mask node:$mask,
9779 (X86vtruncs node:$src), node:$src0)>;
9780 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9781 (vselect_mask node:$mask,
9782 (X86vtruncus node:$src), node:$src0)>;
9784 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9785 SDPatternOperator MaskNode,
9786 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9787 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9788 let ExeDomain = DestInfo.ExeDomain in {
9789 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9790 (ins SrcInfo.RC:$src),
9791 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9792 [(set DestInfo.RC:$dst,
9793 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9794 EVEX, Sched<[sched]>;
9795 let Constraints = "$src0 = $dst" in
9796 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9797 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9798 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9799 [(set DestInfo.RC:$dst,
9800 (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9801 (DestInfo.VT DestInfo.RC:$src0),
9802 SrcInfo.KRCWM:$mask))]>,
9803 EVEX, EVEX_K, Sched<[sched]>;
9804 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9805 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9806 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9807 [(set DestInfo.RC:$dst,
9808 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9809 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9810 EVEX, EVEX_KZ, Sched<[sched]>;
9813 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9814 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9815 (ins x86memop:$dst, SrcInfo.RC:$src),
9816 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9817 EVEX, Sched<[sched.Folded]>;
9819 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9820 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9821 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9822 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9823 }//mayStore = 1, hasSideEffects = 0
9826 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9827 X86VectorVTInfo DestInfo,
9828 PatFrag truncFrag, PatFrag mtruncFrag,
9831 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9832 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9833 addr:$dst, SrcInfo.RC:$src)>;
9835 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9836 SrcInfo.KRCWM:$mask),
9837 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9838 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9841 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9842 SDNode OpNode256, SDNode OpNode512,
9843 SDPatternOperator MaskNode128,
9844 SDPatternOperator MaskNode256,
9845 SDPatternOperator MaskNode512,
9846 X86FoldableSchedWrite sched,
9847 AVX512VLVectorVTInfo VTSrcInfo,
9848 X86VectorVTInfo DestInfoZ128,
9849 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9850 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9851 X86MemOperand x86memopZ, PatFrag truncFrag,
9852 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9854 let Predicates = [HasVLX, prd] in {
9855 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9856 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9857 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9858 truncFrag, mtruncFrag, NAME>, EVEX_V128;
9860 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9861 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9862 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9863 truncFrag, mtruncFrag, NAME>, EVEX_V256;
9865 let Predicates = [prd] in
9866 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9867 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9868 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9869 truncFrag, mtruncFrag, NAME>, EVEX_V512;
9872 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9873 SDPatternOperator MaskNode,
9874 X86FoldableSchedWrite sched, PatFrag StoreNode,
9875 PatFrag MaskedStoreNode, SDNode InVecNode,
9876 SDPatternOperator InVecMaskNode> {
9877 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9878 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9879 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9880 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9881 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9884 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9885 SDPatternOperator MaskNode,
9886 X86FoldableSchedWrite sched, PatFrag StoreNode,
9887 PatFrag MaskedStoreNode, SDNode InVecNode,
9888 SDPatternOperator InVecMaskNode> {
9889 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9890 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9891 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9892 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9893 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9896 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9897 SDPatternOperator MaskNode,
9898 X86FoldableSchedWrite sched, PatFrag StoreNode,
9899 PatFrag MaskedStoreNode, SDNode InVecNode,
9900 SDPatternOperator InVecMaskNode> {
9901 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9902 InVecMaskNode, MaskNode, MaskNode, sched,
9903 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9904 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9905 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9908 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9909 SDPatternOperator MaskNode,
9910 X86FoldableSchedWrite sched, PatFrag StoreNode,
9911 PatFrag MaskedStoreNode, SDNode InVecNode,
9912 SDPatternOperator InVecMaskNode> {
9913 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9914 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9915 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9916 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9917 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9920 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9921 SDPatternOperator MaskNode,
9922 X86FoldableSchedWrite sched, PatFrag StoreNode,
9923 PatFrag MaskedStoreNode, SDNode InVecNode,
9924 SDPatternOperator InVecMaskNode> {
9925 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9926 InVecMaskNode, MaskNode, MaskNode, sched,
9927 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9928 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9929 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9932 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9933 SDPatternOperator MaskNode,
9934 X86FoldableSchedWrite sched, PatFrag StoreNode,
9935 PatFrag MaskedStoreNode, SDNode InVecNode,
9936 SDPatternOperator InVecMaskNode> {
9937 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9938 InVecMaskNode, MaskNode, MaskNode, sched,
9939 avx512vl_i16_info, v16i8x_info, v16i8x_info,
9940 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9941 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9944 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
9945 WriteShuffle256, truncstorevi8,
9946 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9947 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
9948 WriteShuffle256, truncstore_s_vi8,
9949 masked_truncstore_s_vi8, X86vtruncs,
9951 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9952 select_truncus, WriteShuffle256,
9953 truncstore_us_vi8, masked_truncstore_us_vi8,
9954 X86vtruncus, X86vmtruncus>;
9956 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9957 WriteShuffle256, truncstorevi16,
9958 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9959 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
9960 WriteShuffle256, truncstore_s_vi16,
9961 masked_truncstore_s_vi16, X86vtruncs,
9963 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9964 select_truncus, WriteShuffle256,
9965 truncstore_us_vi16, masked_truncstore_us_vi16,
9966 X86vtruncus, X86vmtruncus>;
9968 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9969 WriteShuffle256, truncstorevi32,
9970 masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9971 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
9972 WriteShuffle256, truncstore_s_vi32,
9973 masked_truncstore_s_vi32, X86vtruncs,
9975 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9976 select_truncus, WriteShuffle256,
9977 truncstore_us_vi32, masked_truncstore_us_vi32,
9978 X86vtruncus, X86vmtruncus>;
9980 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9981 WriteShuffle256, truncstorevi8,
9982 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9983 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9984 WriteShuffle256, truncstore_s_vi8,
9985 masked_truncstore_s_vi8, X86vtruncs,
9987 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
9988 select_truncus, WriteShuffle256,
9989 truncstore_us_vi8, masked_truncstore_us_vi8,
9990 X86vtruncus, X86vmtruncus>;
9992 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9993 WriteShuffle256, truncstorevi16,
9994 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9995 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9996 WriteShuffle256, truncstore_s_vi16,
9997 masked_truncstore_s_vi16, X86vtruncs,
9999 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
10000 select_truncus, WriteShuffle256,
10001 truncstore_us_vi16, masked_truncstore_us_vi16,
10002 X86vtruncus, X86vmtruncus>;
10004 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
10005 WriteShuffle256, truncstorevi8,
10006 masked_truncstorevi8, X86vtrunc,
10008 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
10009 WriteShuffle256, truncstore_s_vi8,
10010 masked_truncstore_s_vi8, X86vtruncs,
10012 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
10013 select_truncus, WriteShuffle256,
10014 truncstore_us_vi8, masked_truncstore_us_vi8,
10015 X86vtruncus, X86vmtruncus>;
10017 let Predicates = [HasAVX512, NoVLX] in {
10018 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
10019 (v8i16 (EXTRACT_SUBREG
10020 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
10021 VR256X:$src, sub_ymm)))), sub_xmm))>;
10022 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
10023 (v4i32 (EXTRACT_SUBREG
10024 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
10025 VR256X:$src, sub_ymm)))), sub_xmm))>;
10028 let Predicates = [HasBWI, NoVLX] in {
10029 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10030 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
10031 VR256X:$src, sub_ymm))), sub_xmm))>;
10034 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
10035 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
10036 X86VectorVTInfo DestInfo,
10037 X86VectorVTInfo SrcInfo> {
10038 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10040 SrcInfo.KRCWM:$mask)),
10041 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
10042 SrcInfo.KRCWM:$mask,
10045 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10046 DestInfo.ImmAllZerosV,
10047 SrcInfo.KRCWM:$mask)),
10048 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
10052 let Predicates = [HasVLX] in {
10053 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
10054 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
10055 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
10058 let Predicates = [HasAVX512] in {
10059 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
10060 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
10061 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
10063 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
10064 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
10065 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
10067 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
10068 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
10069 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
10072 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
10073 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
10074 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
10075 let ExeDomain = DestInfo.ExeDomain in {
10076 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10077 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
10078 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
10079 EVEX, Sched<[sched]>;
10081 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10082 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
10083 (DestInfo.VT (LdFrag addr:$src))>,
10084 EVEX, Sched<[sched.Folded]>;
10088 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
10089 SDNode OpNode, SDNode InVecNode, string ExtTy,
10090 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10091 let Predicates = [HasVLX, HasBWI] in {
10092 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
10093 v16i8x_info, i64mem, LdFrag, InVecNode>,
10094 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10096 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
10097 v16i8x_info, i128mem, LdFrag, OpNode>,
10098 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10100 let Predicates = [HasBWI] in {
10101 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
10102 v32i8x_info, i256mem, LdFrag, OpNode>,
10103 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10107 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
10108 SDNode OpNode, SDNode InVecNode, string ExtTy,
10109 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10110 let Predicates = [HasVLX, HasAVX512] in {
10111 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
10112 v16i8x_info, i32mem, LdFrag, InVecNode>,
10113 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10115 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
10116 v16i8x_info, i64mem, LdFrag, InVecNode>,
10117 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10119 let Predicates = [HasAVX512] in {
10120 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
10121 v16i8x_info, i128mem, LdFrag, OpNode>,
10122 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10126 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
10127 SDNode OpNode, SDNode InVecNode, string ExtTy,
10128 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10129 let Predicates = [HasVLX, HasAVX512] in {
10130 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
10131 v16i8x_info, i16mem, LdFrag, InVecNode>,
10132 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
10134 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
10135 v16i8x_info, i32mem, LdFrag, InVecNode>,
10136 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
10138 let Predicates = [HasAVX512] in {
10139 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
10140 v16i8x_info, i64mem, LdFrag, InVecNode>,
10141 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
10145 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
10146 SDNode OpNode, SDNode InVecNode, string ExtTy,
10147 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10148 let Predicates = [HasVLX, HasAVX512] in {
10149 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
10150 v8i16x_info, i64mem, LdFrag, InVecNode>,
10151 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10153 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
10154 v8i16x_info, i128mem, LdFrag, OpNode>,
10155 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10157 let Predicates = [HasAVX512] in {
10158 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
10159 v16i16x_info, i256mem, LdFrag, OpNode>,
10160 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10164 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
10165 SDNode OpNode, SDNode InVecNode, string ExtTy,
10166 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10167 let Predicates = [HasVLX, HasAVX512] in {
10168 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
10169 v8i16x_info, i32mem, LdFrag, InVecNode>,
10170 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10172 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
10173 v8i16x_info, i64mem, LdFrag, InVecNode>,
10174 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10176 let Predicates = [HasAVX512] in {
10177 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
10178 v8i16x_info, i128mem, LdFrag, OpNode>,
10179 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10183 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
10184 SDNode OpNode, SDNode InVecNode, string ExtTy,
10185 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10187 let Predicates = [HasVLX, HasAVX512] in {
10188 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
10189 v4i32x_info, i64mem, LdFrag, InVecNode>,
10190 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
10192 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
10193 v4i32x_info, i128mem, LdFrag, OpNode>,
10194 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
10196 let Predicates = [HasAVX512] in {
10197 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
10198 v8i32x_info, i256mem, LdFrag, OpNode>,
10199 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
10203 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
10204 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
10205 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
10206 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
10207 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
10208 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
10210 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
10211 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
10212 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
10213 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
10214 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
10215 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
10218 // Patterns that we also need any extend versions of. aext_vector_inreg
10219 // is currently legalized to zext_vector_inreg.
10220 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10221 // 256-bit patterns
10222 let Predicates = [HasVLX, HasBWI] in {
10223 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10224 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10227 let Predicates = [HasVLX] in {
10228 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10229 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10231 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10232 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10235 // 512-bit patterns
10236 let Predicates = [HasBWI] in {
10237 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10238 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10240 let Predicates = [HasAVX512] in {
10241 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10242 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10243 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10244 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10246 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10247 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10249 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10250 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10254 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10256 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10257 // 128-bit patterns
10258 let Predicates = [HasVLX, HasBWI] in {
10259 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10260 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10261 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10262 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10263 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10264 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10266 let Predicates = [HasVLX] in {
10267 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10268 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10269 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10270 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10272 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10273 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10275 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10276 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10277 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10278 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10279 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10280 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10282 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10283 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10284 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10285 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10287 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10288 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10289 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10290 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10291 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10292 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10294 let Predicates = [HasVLX] in {
10295 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10296 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10297 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10298 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10299 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10300 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10302 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10303 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10304 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10305 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10307 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10308 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10309 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10310 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10311 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10312 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10314 // 512-bit patterns
10315 let Predicates = [HasAVX512] in {
10316 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10317 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10318 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10319 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10320 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10321 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10325 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10326 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10328 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10329 // ext+trunc aggressively making it impossible to legalize the DAG to this
10330 // pattern directly.
10331 let Predicates = [HasAVX512, NoBWI] in {
10332 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10333 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10334 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10335 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10338 //===----------------------------------------------------------------------===//
10339 // GATHER - SCATTER Operations
10341 // FIXME: Improve scheduling of gather/scatter instructions.
10342 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10343 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10344 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10345 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10346 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10347 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10348 !strconcat(OpcodeStr#_.Suffix,
10349 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10350 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10351 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10354 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10355 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10356 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10357 vy512xmem>, EVEX_V512, VEX_W;
10358 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10359 vz512mem>, EVEX_V512, VEX_W;
10360 let Predicates = [HasVLX] in {
10361 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10362 vx256xmem>, EVEX_V256, VEX_W;
10363 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10364 vy256xmem>, EVEX_V256, VEX_W;
10365 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10366 vx128xmem>, EVEX_V128, VEX_W;
10367 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10368 vx128xmem>, EVEX_V128, VEX_W;
10372 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10373 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10374 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10376 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10378 let Predicates = [HasVLX] in {
10379 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10380 vy256xmem>, EVEX_V256;
10381 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10382 vy128xmem>, EVEX_V256;
10383 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10384 vx128xmem>, EVEX_V128;
10385 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10386 vx64xmem, VK2WM>, EVEX_V128;
10391 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10392 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10394 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10395 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10397 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10398 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10400 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10401 hasSideEffects = 0 in
10403 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10404 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10405 !strconcat(OpcodeStr#_.Suffix,
10406 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10407 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10408 Sched<[WriteStore]>;
10411 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10412 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10413 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10414 vy512xmem>, EVEX_V512, VEX_W;
10415 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10416 vz512mem>, EVEX_V512, VEX_W;
10417 let Predicates = [HasVLX] in {
10418 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10419 vx256xmem>, EVEX_V256, VEX_W;
10420 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10421 vy256xmem>, EVEX_V256, VEX_W;
10422 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10423 vx128xmem>, EVEX_V128, VEX_W;
10424 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10425 vx128xmem>, EVEX_V128, VEX_W;
10429 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10430 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10431 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10433 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10435 let Predicates = [HasVLX] in {
10436 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10437 vy256xmem>, EVEX_V256;
10438 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10439 vy128xmem>, EVEX_V256;
10440 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10441 vx128xmem>, EVEX_V128;
10442 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10443 vx64xmem, VK2WM>, EVEX_V128;
10447 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10448 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10450 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10451 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10454 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10455 RegisterClass KRC, X86MemOperand memop> {
10456 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10457 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10458 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10459 EVEX, EVEX_K, Sched<[WriteLoad]>;
10462 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10463 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10465 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10466 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10468 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10469 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10471 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10472 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10474 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10475 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10477 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10478 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10480 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10481 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10483 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10484 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10486 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10487 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10489 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10490 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10492 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10493 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10495 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10496 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10498 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10499 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10501 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10502 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10504 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10505 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10507 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10508 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10510 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
10511 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10512 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10513 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10514 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
10517 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10518 string OpcodeStr, Predicate prd> {
10519 let Predicates = [prd] in
10520 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
10522 let Predicates = [prd, HasVLX] in {
10523 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
10524 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
10528 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10529 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
10530 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10531 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
10533 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10534 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10535 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10536 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10537 EVEX, Sched<[WriteMove]>;
10540 // Use 512bit version to implement 128/256 bit in case NoVLX.
10541 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10545 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10546 (_.KVT (COPY_TO_REGCLASS
10547 (!cast<Instruction>(Name#"Zrr")
10548 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10549 _.RC:$src, _.SubRegIdx)),
10553 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10554 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10555 let Predicates = [prd] in
10556 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10559 let Predicates = [prd, HasVLX] in {
10560 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10562 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10565 let Predicates = [prd, NoVLX] in {
10566 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10567 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10571 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10572 avx512vl_i8_info, HasBWI>;
10573 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10574 avx512vl_i16_info, HasBWI>, VEX_W;
10575 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10576 avx512vl_i32_info, HasDQI>;
10577 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10578 avx512vl_i64_info, HasDQI>, VEX_W;
10580 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10581 // is available, but BWI is not. We can't handle this in lowering because
10582 // a target independent DAG combine likes to combine sext and trunc.
10583 let Predicates = [HasDQI, NoBWI] in {
10584 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10585 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10586 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10587 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10590 let Predicates = [HasDQI, NoBWI, HasVLX] in {
10591 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10592 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10595 //===----------------------------------------------------------------------===//
10596 // AVX-512 - COMPRESS and EXPAND
10599 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10600 string OpcodeStr, X86FoldableSchedWrite sched> {
10601 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10602 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10603 (null_frag)>, AVX5128IBase,
10606 let mayStore = 1, hasSideEffects = 0 in
10607 def mr : AVX5128I<opc, MRMDestMem, (outs),
10608 (ins _.MemOp:$dst, _.RC:$src),
10609 OpcodeStr # "\t{$src, $dst|$dst, $src}",
10610 []>, EVEX_CD8<_.EltSize, CD8VT1>,
10611 Sched<[sched.Folded]>;
10613 def mrk : AVX5128I<opc, MRMDestMem, (outs),
10614 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10615 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10617 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10618 Sched<[sched.Folded]>;
10621 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10622 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10623 (!cast<Instruction>(Name#_.ZSuffix#mrk)
10624 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10626 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10627 (!cast<Instruction>(Name#_.ZSuffix#rrk)
10628 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10629 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10630 (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10631 _.KRCWM:$mask, _.RC:$src)>;
10634 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10635 X86FoldableSchedWrite sched,
10636 AVX512VLVectorVTInfo VTInfo,
10637 Predicate Pred = HasAVX512> {
10638 let Predicates = [Pred] in
10639 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10640 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10642 let Predicates = [Pred, HasVLX] in {
10643 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10644 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10645 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10646 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10650 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10651 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10652 avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10653 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10654 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10655 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10656 avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10657 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10658 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10661 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10662 string OpcodeStr, X86FoldableSchedWrite sched> {
10663 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10664 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10665 (null_frag)>, AVX5128IBase,
10668 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10669 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10671 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10672 Sched<[sched.Folded, sched.ReadAfterFold]>;
10675 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10677 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10678 (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10679 _.KRCWM:$mask, addr:$src)>;
10681 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10682 (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10683 _.KRCWM:$mask, addr:$src)>;
10685 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10686 (_.VT _.RC:$src0))),
10687 (!cast<Instruction>(Name#_.ZSuffix#rmk)
10688 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10690 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10691 (!cast<Instruction>(Name#_.ZSuffix#rrk)
10692 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10693 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10694 (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10695 _.KRCWM:$mask, _.RC:$src)>;
10698 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10699 X86FoldableSchedWrite sched,
10700 AVX512VLVectorVTInfo VTInfo,
10701 Predicate Pred = HasAVX512> {
10702 let Predicates = [Pred] in
10703 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10704 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10706 let Predicates = [Pred, HasVLX] in {
10707 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10708 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10709 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10710 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10714 // FIXME: Is there a better scheduler class for VPEXPAND?
10715 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10716 avx512vl_i32_info>, EVEX;
10717 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10718 avx512vl_i64_info>, EVEX, VEX_W;
10719 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10720 avx512vl_f32_info>, EVEX;
10721 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10722 avx512vl_f64_info>, EVEX, VEX_W;
10724 //handle instruction reg_vec1 = op(reg_vec,imm)
10726 // op(broadcast(eltVt),imm)
10727 //all instruction created with FROUND_CURRENT
10728 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10729 SDPatternOperator OpNode,
10730 SDPatternOperator MaskOpNode,
10731 X86FoldableSchedWrite sched,
10732 X86VectorVTInfo _> {
10733 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10734 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10735 (ins _.RC:$src1, i32u8imm:$src2),
10736 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10737 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10738 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10740 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10741 (ins _.MemOp:$src1, i32u8imm:$src2),
10742 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10743 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10745 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10746 (i32 timm:$src2))>,
10747 Sched<[sched.Folded, sched.ReadAfterFold]>;
10748 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10749 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10750 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10751 "${src1}"#_.BroadcastStr#", $src2",
10752 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10754 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10755 (i32 timm:$src2))>, EVEX_B,
10756 Sched<[sched.Folded, sched.ReadAfterFold]>;
10760 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10761 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10762 SDNode OpNode, X86FoldableSchedWrite sched,
10763 X86VectorVTInfo _> {
10764 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10765 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10766 (ins _.RC:$src1, i32u8imm:$src2),
10767 OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10768 "$src1, {sae}, $src2",
10769 (OpNode (_.VT _.RC:$src1),
10770 (i32 timm:$src2))>,
10771 EVEX_B, Sched<[sched]>;
10774 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10775 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10776 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10778 let Predicates = [prd] in {
10779 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10780 sched.ZMM, _.info512>,
10781 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10782 sched.ZMM, _.info512>, EVEX_V512;
10784 let Predicates = [prd, HasVLX] in {
10785 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10786 sched.XMM, _.info128>, EVEX_V128;
10787 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10788 sched.YMM, _.info256>, EVEX_V256;
10792 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10793 // op(reg_vec2,mem_vec,imm)
10794 // op(reg_vec2,broadcast(eltVt),imm)
10795 //all instruction created with FROUND_CURRENT
10796 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10797 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10798 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10799 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10800 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10801 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10802 (OpNode (_.VT _.RC:$src1),
10804 (i32 timm:$src3))>,
10806 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10807 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10808 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10809 (OpNode (_.VT _.RC:$src1),
10810 (_.VT (bitconvert (_.LdFrag addr:$src2))),
10811 (i32 timm:$src3))>,
10812 Sched<[sched.Folded, sched.ReadAfterFold]>;
10813 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10814 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10815 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10816 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10817 (OpNode (_.VT _.RC:$src1),
10818 (_.VT (_.BroadcastLdFrag addr:$src2)),
10819 (i32 timm:$src3))>, EVEX_B,
10820 Sched<[sched.Folded, sched.ReadAfterFold]>;
10824 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10825 // op(reg_vec2,mem_vec,imm)
10826 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10827 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10828 X86VectorVTInfo SrcInfo>{
10829 let ExeDomain = DestInfo.ExeDomain in {
10830 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10831 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10832 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10833 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10834 (SrcInfo.VT SrcInfo.RC:$src2),
10835 (i8 timm:$src3)))>,
10837 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10838 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10839 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10840 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10841 (SrcInfo.VT (bitconvert
10842 (SrcInfo.LdFrag addr:$src2))),
10843 (i8 timm:$src3)))>,
10844 Sched<[sched.Folded, sched.ReadAfterFold]>;
10848 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10849 // op(reg_vec2,mem_vec,imm)
10850 // op(reg_vec2,broadcast(eltVt),imm)
10851 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10852 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10853 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10855 let ExeDomain = _.ExeDomain in
10856 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10857 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10858 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10859 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10860 (OpNode (_.VT _.RC:$src1),
10861 (_.VT (_.BroadcastLdFrag addr:$src2)),
10862 (i8 timm:$src3))>, EVEX_B,
10863 Sched<[sched.Folded, sched.ReadAfterFold]>;
10866 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10867 // op(reg_vec2,mem_scalar,imm)
10868 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10869 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10870 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10871 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10872 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10873 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10874 (OpNode (_.VT _.RC:$src1),
10876 (i32 timm:$src3))>,
10878 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10879 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10880 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10881 (OpNode (_.VT _.RC:$src1),
10882 (_.ScalarIntMemFrags addr:$src2),
10883 (i32 timm:$src3))>,
10884 Sched<[sched.Folded, sched.ReadAfterFold]>;
10888 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10889 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10890 SDNode OpNode, X86FoldableSchedWrite sched,
10891 X86VectorVTInfo _> {
10892 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10893 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10894 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10895 OpcodeStr, "$src3, {sae}, $src2, $src1",
10896 "$src1, $src2, {sae}, $src3",
10897 (OpNode (_.VT _.RC:$src1),
10899 (i32 timm:$src3))>,
10900 EVEX_B, Sched<[sched]>;
10903 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10904 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10905 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10906 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10907 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10908 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10909 OpcodeStr, "$src3, {sae}, $src2, $src1",
10910 "$src1, $src2, {sae}, $src3",
10911 (OpNode (_.VT _.RC:$src1),
10913 (i32 timm:$src3))>,
10914 EVEX_B, Sched<[sched]>;
10917 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10918 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10919 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10920 let Predicates = [prd] in {
10921 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10922 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10926 let Predicates = [prd, HasVLX] in {
10927 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10929 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10934 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10935 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10936 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10937 let Predicates = [Pred] in {
10938 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10939 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10941 let Predicates = [Pred, HasVLX] in {
10942 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10943 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10944 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10945 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10949 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10950 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10951 Predicate Pred = HasAVX512> {
10952 let Predicates = [Pred] in {
10953 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10956 let Predicates = [Pred, HasVLX] in {
10957 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10959 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10964 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10965 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10966 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10967 let Predicates = [prd] in {
10968 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10969 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10973 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10974 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10975 SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10976 X86SchedWriteWidths sched, Predicate prd>{
10977 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10978 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
10979 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
10980 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10981 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10982 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
10983 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10984 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10985 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W;
10988 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10989 X86VReduce, X86VReduce, X86VReduceSAE,
10990 SchedWriteFRnd, HasDQI>;
10991 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10992 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10993 SchedWriteFRnd, HasAVX512>;
10994 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10995 X86VGetMant, X86VGetMant, X86VGetMantSAE,
10996 SchedWriteFRnd, HasAVX512>;
10998 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10999 0x50, X86VRange, X86VRangeSAE,
11000 SchedWriteFAdd, HasDQI>,
11001 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11002 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
11003 0x50, X86VRange, X86VRangeSAE,
11004 SchedWriteFAdd, HasDQI>,
11005 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11007 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
11008 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11009 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11010 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
11011 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11012 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11014 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
11015 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11016 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11017 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
11018 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11019 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11020 defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
11021 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
11022 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11024 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
11025 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11026 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11027 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
11028 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11029 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11030 defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
11031 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
11032 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11034 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
11035 X86FoldableSchedWrite sched,
11037 X86VectorVTInfo CastInfo,
11038 string EVEX2VEXOvrd> {
11039 let ExeDomain = _.ExeDomain in {
11040 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11041 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11042 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11044 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
11045 (i8 timm:$src3)))))>,
11046 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
11047 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11048 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11049 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11052 (CastInfo.VT (X86Shuf128 _.RC:$src1,
11053 (CastInfo.LdFrag addr:$src2),
11054 (i8 timm:$src3)))))>,
11055 Sched<[sched.Folded, sched.ReadAfterFold]>,
11056 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
11057 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11058 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11059 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11060 "$src1, ${src2}"#_.BroadcastStr#", $src3",
11064 (X86Shuf128 _.RC:$src1,
11065 (_.BroadcastLdFrag addr:$src2),
11066 (i8 timm:$src3)))))>, EVEX_B,
11067 Sched<[sched.Folded, sched.ReadAfterFold]>;
11071 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
11072 AVX512VLVectorVTInfo _,
11073 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
11074 string EVEX2VEXOvrd>{
11075 let Predicates = [HasAVX512] in
11076 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11077 _.info512, CastInfo.info512, "">, EVEX_V512;
11079 let Predicates = [HasAVX512, HasVLX] in
11080 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11081 _.info256, CastInfo.info256,
11082 EVEX2VEXOvrd>, EVEX_V256;
11085 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
11086 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11087 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
11088 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11089 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11090 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11091 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11092 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11094 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11095 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11096 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
11097 // instantiation of this class.
11098 let ExeDomain = _.ExeDomain in {
11099 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11100 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11101 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11102 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11103 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
11104 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11105 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11106 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11107 (_.VT (X86VAlign _.RC:$src1,
11108 (bitconvert (_.LdFrag addr:$src2)),
11109 (i8 timm:$src3)))>,
11110 Sched<[sched.Folded, sched.ReadAfterFold]>,
11111 EVEX2VEXOverride<"VPALIGNRrmi">;
11113 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11114 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11115 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11116 "$src1, ${src2}"#_.BroadcastStr#", $src3",
11117 (X86VAlign _.RC:$src1,
11118 (_.VT (_.BroadcastLdFrag addr:$src2)),
11119 (i8 timm:$src3))>, EVEX_B,
11120 Sched<[sched.Folded, sched.ReadAfterFold]>;
11124 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11125 AVX512VLVectorVTInfo _> {
11126 let Predicates = [HasAVX512] in {
11127 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11128 AVX512AIi8Base, EVEX_4V, EVEX_V512;
11130 let Predicates = [HasAVX512, HasVLX] in {
11131 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11132 AVX512AIi8Base, EVEX_4V, EVEX_V128;
11133 // We can't really override the 256-bit version so change it back to unset.
11134 let EVEX2VEXOverride = ? in
11135 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11136 AVX512AIi8Base, EVEX_4V, EVEX_V256;
11140 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11141 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11142 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11143 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11146 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11147 SchedWriteShuffle, avx512vl_i8_info,
11148 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11150 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
11152 def ValignqImm32XForm : SDNodeXForm<timm, [{
11153 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11155 def ValignqImm8XForm : SDNodeXForm<timm, [{
11156 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11158 def ValigndImm8XForm : SDNodeXForm<timm, [{
11159 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11162 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11163 X86VectorVTInfo From, X86VectorVTInfo To,
11164 SDNodeXForm ImmXForm> {
11165 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11167 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11170 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11171 To.RC:$src1, To.RC:$src2,
11172 (ImmXForm timm:$src3))>;
11174 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11176 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11179 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11180 To.RC:$src1, To.RC:$src2,
11181 (ImmXForm timm:$src3))>;
11183 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11185 (From.VT (OpNode From.RC:$src1,
11186 (From.LdFrag addr:$src2),
11189 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11190 To.RC:$src1, addr:$src2,
11191 (ImmXForm timm:$src3))>;
11193 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11195 (From.VT (OpNode From.RC:$src1,
11196 (From.LdFrag addr:$src2),
11199 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11200 To.RC:$src1, addr:$src2,
11201 (ImmXForm timm:$src3))>;
11204 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11205 X86VectorVTInfo From,
11206 X86VectorVTInfo To,
11207 SDNodeXForm ImmXForm> :
11208 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11209 def : Pat<(From.VT (OpNode From.RC:$src1,
11210 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11212 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11213 (ImmXForm timm:$src3))>;
11215 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11217 (From.VT (OpNode From.RC:$src1,
11219 (To.VT (To.BroadcastLdFrag addr:$src2))),
11222 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11223 To.RC:$src1, addr:$src2,
11224 (ImmXForm timm:$src3))>;
11226 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11228 (From.VT (OpNode From.RC:$src1,
11230 (To.VT (To.BroadcastLdFrag addr:$src2))),
11233 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11234 To.RC:$src1, addr:$src2,
11235 (ImmXForm timm:$src3))>;
11238 let Predicates = [HasAVX512] in {
11239 // For 512-bit we lower to the widest element type we can. So we only need
11240 // to handle converting valignq to valignd.
11241 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11242 v16i32_info, ValignqImm32XForm>;
11245 let Predicates = [HasVLX] in {
11246 // For 128-bit we lower to the widest element type we can. So we only need
11247 // to handle converting valignq to valignd.
11248 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11249 v4i32x_info, ValignqImm32XForm>;
11250 // For 256-bit we lower to the widest element type we can. So we only need
11251 // to handle converting valignq to valignd.
11252 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11253 v8i32x_info, ValignqImm32XForm>;
11256 let Predicates = [HasVLX, HasBWI] in {
11257 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11258 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11259 v16i8x_info, ValignqImm8XForm>;
11260 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11261 v16i8x_info, ValigndImm8XForm>;
11264 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11265 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11266 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
11268 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11269 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11270 let ExeDomain = _.ExeDomain in {
11271 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11272 (ins _.RC:$src1), OpcodeStr,
11274 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11277 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11278 (ins _.MemOp:$src1), OpcodeStr,
11280 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11281 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11282 Sched<[sched.Folded]>;
11286 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11287 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11288 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11289 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11290 (ins _.ScalarMemOp:$src1), OpcodeStr,
11291 "${src1}"#_.BroadcastStr,
11292 "${src1}"#_.BroadcastStr,
11293 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11294 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11295 Sched<[sched.Folded]>;
11298 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11299 X86SchedWriteWidths sched,
11300 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11301 let Predicates = [prd] in
11302 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11305 let Predicates = [prd, HasVLX] in {
11306 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11308 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11313 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11314 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11316 let Predicates = [prd] in
11317 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11320 let Predicates = [prd, HasVLX] in {
11321 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11323 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11328 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11329 SDNode OpNode, X86SchedWriteWidths sched,
11331 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11332 avx512vl_i64_info, prd>, VEX_W;
11333 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11334 avx512vl_i32_info, prd>;
11337 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11338 SDNode OpNode, X86SchedWriteWidths sched,
11340 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11341 avx512vl_i16_info, prd>, VEX_WIG;
11342 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11343 avx512vl_i8_info, prd>, VEX_WIG;
11346 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11347 bits<8> opc_d, bits<8> opc_q,
11348 string OpcodeStr, SDNode OpNode,
11349 X86SchedWriteWidths sched> {
11350 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11352 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11356 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11359 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11360 let Predicates = [HasAVX512, NoVLX] in {
11361 def : Pat<(v4i64 (abs VR256X:$src)),
11364 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11366 def : Pat<(v2i64 (abs VR128X:$src)),
11369 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11373 // Use 512bit version to implement 128/256 bit.
11374 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11375 AVX512VLVectorVTInfo _, Predicate prd> {
11376 let Predicates = [prd, NoVLX] in {
11377 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11379 (!cast<Instruction>(InstrStr # "Zrr")
11380 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11381 _.info256.RC:$src1,
11382 _.info256.SubRegIdx)),
11383 _.info256.SubRegIdx)>;
11385 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11387 (!cast<Instruction>(InstrStr # "Zrr")
11388 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11389 _.info128.RC:$src1,
11390 _.info128.SubRegIdx)),
11391 _.info128.SubRegIdx)>;
11395 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11396 SchedWriteVecIMul, HasCDI>;
11398 // FIXME: Is there a better scheduler class for VPCONFLICT?
11399 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11400 SchedWriteVecALU, HasCDI>;
11402 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11403 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11404 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11406 //===---------------------------------------------------------------------===//
11407 // Counts number of ones - VPOPCNTD and VPOPCNTQ
11408 //===---------------------------------------------------------------------===//
11410 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11411 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11412 SchedWriteVecALU, HasVPOPCNTDQ>;
11414 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11415 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11417 //===---------------------------------------------------------------------===//
11418 // Replicate Single FP - MOVSHDUP and MOVSLDUP
11419 //===---------------------------------------------------------------------===//
11421 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11422 X86SchedWriteWidths sched> {
11423 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11424 avx512vl_f32_info, HasAVX512>, XS;
11427 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11428 SchedWriteFShuffle>;
11429 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11430 SchedWriteFShuffle>;
11432 //===----------------------------------------------------------------------===//
11433 // AVX-512 - MOVDDUP
11434 //===----------------------------------------------------------------------===//
11436 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11437 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11438 let ExeDomain = _.ExeDomain in {
11439 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11440 (ins _.RC:$src), OpcodeStr, "$src", "$src",
11441 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11443 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11444 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11445 (_.VT (_.BroadcastLdFrag addr:$src))>,
11446 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11447 Sched<[sched.Folded]>;
11451 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
11452 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11453 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11454 VTInfo.info512>, EVEX_V512;
11456 let Predicates = [HasAVX512, HasVLX] in {
11457 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11458 VTInfo.info256>, EVEX_V256;
11459 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11460 VTInfo.info128>, EVEX_V128;
11464 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
11465 X86SchedWriteWidths sched> {
11466 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
11467 avx512vl_f64_info>, XD, VEX_W;
11470 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
11472 let Predicates = [HasVLX] in {
11473 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11474 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11476 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11477 (v2f64 VR128X:$src0)),
11478 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11479 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11480 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11482 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11485 //===----------------------------------------------------------------------===//
11486 // AVX-512 - Unpack Instructions
11487 //===----------------------------------------------------------------------===//
11489 let Uses = []<Register>, mayRaiseFPException = 0 in {
11490 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11491 SchedWriteFShuffleSizes, 0, 1>;
11492 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11493 SchedWriteFShuffleSizes>;
11496 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11497 SchedWriteShuffle, HasBWI>;
11498 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11499 SchedWriteShuffle, HasBWI>;
11500 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11501 SchedWriteShuffle, HasBWI>;
11502 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11503 SchedWriteShuffle, HasBWI>;
11505 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11506 SchedWriteShuffle, HasAVX512>;
11507 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11508 SchedWriteShuffle, HasAVX512>;
11509 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11510 SchedWriteShuffle, HasAVX512>;
11511 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11512 SchedWriteShuffle, HasAVX512>;
11514 //===----------------------------------------------------------------------===//
11515 // AVX-512 - Extract & Insert Integer Instructions
11516 //===----------------------------------------------------------------------===//
11518 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11519 X86VectorVTInfo _> {
11520 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11521 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11522 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11523 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11525 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11528 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11529 let Predicates = [HasBWI] in {
11530 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11531 (ins _.RC:$src1, u8imm:$src2),
11532 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11533 [(set GR32orGR64:$dst,
11534 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11535 EVEX, TAPD, Sched<[WriteVecExtract]>;
11537 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
11541 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11542 let Predicates = [HasBWI] in {
11543 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11544 (ins _.RC:$src1, u8imm:$src2),
11545 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11546 [(set GR32orGR64:$dst,
11547 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11548 EVEX, PD, Sched<[WriteVecExtract]>;
11550 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11551 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11552 (ins _.RC:$src1, u8imm:$src2),
11553 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11554 EVEX, TAPD, FoldGenData<NAME#rr>,
11555 Sched<[WriteVecExtract]>;
11557 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
11561 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11562 RegisterClass GRC> {
11563 let Predicates = [HasDQI] in {
11564 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11565 (ins _.RC:$src1, u8imm:$src2),
11566 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11568 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11569 EVEX, TAPD, Sched<[WriteVecExtract]>;
11571 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11572 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11573 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11574 [(store (extractelt (_.VT _.RC:$src1),
11575 imm:$src2),addr:$dst)]>,
11576 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
11577 Sched<[WriteVecExtractSt]>;
11581 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
11582 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
11583 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11584 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
11586 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11587 X86VectorVTInfo _, PatFrag LdFrag,
11588 SDPatternOperator immoperator> {
11589 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11590 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11591 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11593 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11594 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11597 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11598 X86VectorVTInfo _, PatFrag LdFrag> {
11599 let Predicates = [HasBWI] in {
11600 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11601 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11602 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11604 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
11605 Sched<[WriteVecInsert]>;
11607 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11611 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11612 X86VectorVTInfo _, RegisterClass GRC> {
11613 let Predicates = [HasDQI] in {
11614 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11615 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11616 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11618 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11619 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11621 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11622 _.ScalarLdFrag, imm>, TAPD;
11626 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11627 extloadi8>, TAPD, VEX_WIG;
11628 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11629 extloadi16>, PD, VEX_WIG;
11630 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11631 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11633 //===----------------------------------------------------------------------===//
11634 // VSHUFPS - VSHUFPD Operations
11635 //===----------------------------------------------------------------------===//
11637 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11638 AVX512VLVectorVTInfo VTInfo_FP>{
11639 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11640 SchedWriteFShuffle>,
11641 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11642 AVX512AIi8Base, EVEX_4V;
11645 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11646 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11648 //===----------------------------------------------------------------------===//
11649 // AVX-512 - Byte shift Left/Right
11650 //===----------------------------------------------------------------------===//
11652 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11653 Format MRMm, string OpcodeStr,
11654 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11655 def ri : AVX512<opc, MRMr,
11656 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11657 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11658 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11660 def mi : AVX512<opc, MRMm,
11661 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11662 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11663 [(set _.RC:$dst,(_.VT (OpNode
11664 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11665 (i8 timm:$src2))))]>,
11666 Sched<[sched.Folded, sched.ReadAfterFold]>;
11669 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11670 Format MRMm, string OpcodeStr,
11671 X86SchedWriteWidths sched, Predicate prd>{
11672 let Predicates = [prd] in
11673 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11674 sched.ZMM, v64i8_info>, EVEX_V512;
11675 let Predicates = [prd, HasVLX] in {
11676 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11677 sched.YMM, v32i8x_info>, EVEX_V256;
11678 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11679 sched.XMM, v16i8x_info>, EVEX_V128;
11682 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11683 SchedWriteShuffle, HasBWI>,
11684 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11685 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11686 SchedWriteShuffle, HasBWI>,
11687 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11689 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11690 string OpcodeStr, X86FoldableSchedWrite sched,
11691 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11692 let isCommutable = 1 in
11693 def rr : AVX512BI<opc, MRMSrcReg,
11694 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11695 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11696 [(set _dst.RC:$dst,(_dst.VT
11697 (OpNode (_src.VT _src.RC:$src1),
11698 (_src.VT _src.RC:$src2))))]>,
11700 def rm : AVX512BI<opc, MRMSrcMem,
11701 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11702 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11703 [(set _dst.RC:$dst,(_dst.VT
11704 (OpNode (_src.VT _src.RC:$src1),
11705 (_src.VT (bitconvert
11706 (_src.LdFrag addr:$src2))))))]>,
11707 Sched<[sched.Folded, sched.ReadAfterFold]>;
11710 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11711 string OpcodeStr, X86SchedWriteWidths sched,
11713 let Predicates = [prd] in
11714 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11715 v8i64_info, v64i8_info>, EVEX_V512;
11716 let Predicates = [prd, HasVLX] in {
11717 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11718 v4i64x_info, v32i8x_info>, EVEX_V256;
11719 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11720 v2i64x_info, v16i8x_info>, EVEX_V128;
11724 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11725 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11727 // Transforms to swizzle an immediate to enable better matching when
11728 // memory operand isn't in the right place.
11729 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11730 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11731 uint8_t Imm = N->getZExtValue();
11732 // Swap bits 1/4 and 3/6.
11733 uint8_t NewImm = Imm & 0xa5;
11734 if (Imm & 0x02) NewImm |= 0x10;
11735 if (Imm & 0x10) NewImm |= 0x02;
11736 if (Imm & 0x08) NewImm |= 0x40;
11737 if (Imm & 0x40) NewImm |= 0x08;
11738 return getI8Imm(NewImm, SDLoc(N));
11740 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11741 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11742 uint8_t Imm = N->getZExtValue();
11743 // Swap bits 2/4 and 3/5.
11744 uint8_t NewImm = Imm & 0xc3;
11745 if (Imm & 0x04) NewImm |= 0x10;
11746 if (Imm & 0x10) NewImm |= 0x04;
11747 if (Imm & 0x08) NewImm |= 0x20;
11748 if (Imm & 0x20) NewImm |= 0x08;
11749 return getI8Imm(NewImm, SDLoc(N));
11751 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11752 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11753 uint8_t Imm = N->getZExtValue();
11754 // Swap bits 1/2 and 5/6.
11755 uint8_t NewImm = Imm & 0x99;
11756 if (Imm & 0x02) NewImm |= 0x04;
11757 if (Imm & 0x04) NewImm |= 0x02;
11758 if (Imm & 0x20) NewImm |= 0x40;
11759 if (Imm & 0x40) NewImm |= 0x20;
11760 return getI8Imm(NewImm, SDLoc(N));
11762 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11763 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11764 uint8_t Imm = N->getZExtValue();
11765 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11766 uint8_t NewImm = Imm & 0x81;
11767 if (Imm & 0x02) NewImm |= 0x04;
11768 if (Imm & 0x04) NewImm |= 0x10;
11769 if (Imm & 0x08) NewImm |= 0x40;
11770 if (Imm & 0x10) NewImm |= 0x02;
11771 if (Imm & 0x20) NewImm |= 0x08;
11772 if (Imm & 0x40) NewImm |= 0x20;
11773 return getI8Imm(NewImm, SDLoc(N));
11775 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11776 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11777 uint8_t Imm = N->getZExtValue();
11778 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11779 uint8_t NewImm = Imm & 0x81;
11780 if (Imm & 0x02) NewImm |= 0x10;
11781 if (Imm & 0x04) NewImm |= 0x02;
11782 if (Imm & 0x08) NewImm |= 0x20;
11783 if (Imm & 0x10) NewImm |= 0x04;
11784 if (Imm & 0x20) NewImm |= 0x40;
11785 if (Imm & 0x40) NewImm |= 0x08;
11786 return getI8Imm(NewImm, SDLoc(N));
11789 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11790 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11792 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11793 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11794 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11795 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11796 (OpNode (_.VT _.RC:$src1),
11799 (i8 timm:$src4)), 1, 1>,
11800 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11801 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11802 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11803 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11804 (OpNode (_.VT _.RC:$src1),
11806 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11807 (i8 timm:$src4)), 1, 0>,
11808 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11809 Sched<[sched.Folded, sched.ReadAfterFold]>;
11810 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11811 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11812 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11813 "$src2, ${src3}"#_.BroadcastStr#", $src4",
11814 (OpNode (_.VT _.RC:$src1),
11816 (_.VT (_.BroadcastLdFrag addr:$src3)),
11817 (i8 timm:$src4)), 1, 0>, EVEX_B,
11818 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11819 Sched<[sched.Folded, sched.ReadAfterFold]>;
11820 }// Constraints = "$src1 = $dst"
11822 // Additional patterns for matching passthru operand in other positions.
11823 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11824 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11826 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11827 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11828 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11829 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11831 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11832 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11834 // Additional patterns for matching zero masking with loads in other
11836 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11837 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11838 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11840 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11841 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11842 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11843 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11844 _.RC:$src2, (i8 timm:$src4)),
11846 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11847 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11849 // Additional patterns for matching masked loads with different
11851 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11852 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11853 _.RC:$src2, (i8 timm:$src4)),
11855 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11856 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11857 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11858 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11859 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11861 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11862 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11863 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11864 (OpNode _.RC:$src2, _.RC:$src1,
11865 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11867 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11868 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11869 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11870 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11871 _.RC:$src1, (i8 timm:$src4)),
11873 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11874 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11875 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11876 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11877 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11879 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11880 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11882 // Additional patterns for matching zero masking with broadcasts in other
11884 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11885 (OpNode (_.BroadcastLdFrag addr:$src3),
11886 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11888 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11889 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11890 (VPTERNLOG321_imm8 timm:$src4))>;
11891 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11892 (OpNode _.RC:$src1,
11893 (_.BroadcastLdFrag addr:$src3),
11894 _.RC:$src2, (i8 timm:$src4)),
11896 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11897 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11898 (VPTERNLOG132_imm8 timm:$src4))>;
11900 // Additional patterns for matching masked broadcasts with different
11902 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11903 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11904 _.RC:$src2, (i8 timm:$src4)),
11906 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11907 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11908 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11909 (OpNode (_.BroadcastLdFrag addr:$src3),
11910 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11912 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11913 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11914 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11915 (OpNode _.RC:$src2, _.RC:$src1,
11916 (_.BroadcastLdFrag addr:$src3),
11917 (i8 timm:$src4)), _.RC:$src1)),
11918 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11919 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11920 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11921 (OpNode _.RC:$src2,
11922 (_.BroadcastLdFrag addr:$src3),
11923 _.RC:$src1, (i8 timm:$src4)),
11925 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11926 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11927 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11928 (OpNode (_.BroadcastLdFrag addr:$src3),
11929 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11931 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11932 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11935 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11936 AVX512VLVectorVTInfo _> {
11937 let Predicates = [HasAVX512] in
11938 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11939 _.info512, NAME>, EVEX_V512;
11940 let Predicates = [HasAVX512, HasVLX] in {
11941 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11942 _.info128, NAME>, EVEX_V128;
11943 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11944 _.info256, NAME>, EVEX_V256;
11948 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11949 avx512vl_i32_info>;
11950 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11951 avx512vl_i64_info>, VEX_W;
11953 // Patterns to implement vnot using vpternlog instead of creating all ones
11954 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11955 // so that the result is only dependent on src0. But we use the same source
11956 // for all operands to prevent a false dependency.
11957 // TODO: We should maybe have a more generalized algorithm for folding to
11959 let Predicates = [HasAVX512] in {
11960 def : Pat<(v64i8 (vnot VR512:$src)),
11961 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11962 def : Pat<(v32i16 (vnot VR512:$src)),
11963 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11964 def : Pat<(v16i32 (vnot VR512:$src)),
11965 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11966 def : Pat<(v8i64 (vnot VR512:$src)),
11967 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11970 let Predicates = [HasAVX512, NoVLX] in {
11971 def : Pat<(v16i8 (vnot VR128X:$src)),
11974 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11975 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11976 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11977 (i8 15)), sub_xmm)>;
11978 def : Pat<(v8i16 (vnot VR128X:$src)),
11981 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11982 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11983 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11984 (i8 15)), sub_xmm)>;
11985 def : Pat<(v4i32 (vnot VR128X:$src)),
11988 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11989 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11990 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11991 (i8 15)), sub_xmm)>;
11992 def : Pat<(v2i64 (vnot VR128X:$src)),
11995 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11996 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11997 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11998 (i8 15)), sub_xmm)>;
12000 def : Pat<(v32i8 (vnot VR256X:$src)),
12003 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12004 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12005 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12006 (i8 15)), sub_ymm)>;
12007 def : Pat<(v16i16 (vnot VR256X:$src)),
12010 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12011 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12012 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12013 (i8 15)), sub_ymm)>;
12014 def : Pat<(v8i32 (vnot VR256X:$src)),
12017 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12018 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12019 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12020 (i8 15)), sub_ymm)>;
12021 def : Pat<(v4i64 (vnot VR256X:$src)),
12024 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12025 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12026 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12027 (i8 15)), sub_ymm)>;
12030 let Predicates = [HasVLX] in {
12031 def : Pat<(v16i8 (vnot VR128X:$src)),
12032 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12033 def : Pat<(v8i16 (vnot VR128X:$src)),
12034 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12035 def : Pat<(v4i32 (vnot VR128X:$src)),
12036 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12037 def : Pat<(v2i64 (vnot VR128X:$src)),
12038 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12040 def : Pat<(v32i8 (vnot VR256X:$src)),
12041 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12042 def : Pat<(v16i16 (vnot VR256X:$src)),
12043 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12044 def : Pat<(v8i32 (vnot VR256X:$src)),
12045 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12046 def : Pat<(v4i64 (vnot VR256X:$src)),
12047 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12050 //===----------------------------------------------------------------------===//
12051 // AVX-512 - FixupImm
12052 //===----------------------------------------------------------------------===//
12054 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
12055 X86FoldableSchedWrite sched, X86VectorVTInfo _,
12056 X86VectorVTInfo TblVT>{
12057 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
12058 Uses = [MXCSR], mayRaiseFPException = 1 in {
12059 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12060 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12061 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12062 (X86VFixupimm (_.VT _.RC:$src1),
12064 (TblVT.VT _.RC:$src3),
12065 (i32 timm:$src4))>, Sched<[sched]>;
12066 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12067 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
12068 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12069 (X86VFixupimm (_.VT _.RC:$src1),
12071 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12072 (i32 timm:$src4))>,
12073 Sched<[sched.Folded, sched.ReadAfterFold]>;
12074 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12075 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12076 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12077 "$src2, ${src3}"#_.BroadcastStr#", $src4",
12078 (X86VFixupimm (_.VT _.RC:$src1),
12080 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12081 (i32 timm:$src4))>,
12082 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12083 } // Constraints = "$src1 = $dst"
12086 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12087 X86FoldableSchedWrite sched,
12088 X86VectorVTInfo _, X86VectorVTInfo TblVT>
12089 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12090 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12091 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12092 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12093 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12094 "$src2, $src3, {sae}, $src4",
12095 (X86VFixupimmSAE (_.VT _.RC:$src1),
12097 (TblVT.VT _.RC:$src3),
12098 (i32 timm:$src4))>,
12099 EVEX_B, Sched<[sched]>;
12103 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12104 X86FoldableSchedWrite sched, X86VectorVTInfo _,
12105 X86VectorVTInfo _src3VT> {
12106 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12107 ExeDomain = _.ExeDomain in {
12108 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12109 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12110 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12111 (X86VFixupimms (_.VT _.RC:$src1),
12113 (_src3VT.VT _src3VT.RC:$src3),
12114 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12115 let Uses = [MXCSR] in
12116 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12117 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12118 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12119 "$src2, $src3, {sae}, $src4",
12120 (X86VFixupimmSAEs (_.VT _.RC:$src1),
12122 (_src3VT.VT _src3VT.RC:$src3),
12123 (i32 timm:$src4))>,
12124 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12125 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12126 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12127 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12128 (X86VFixupimms (_.VT _.RC:$src1),
12130 (_src3VT.VT (scalar_to_vector
12131 (_src3VT.ScalarLdFrag addr:$src3))),
12132 (i32 timm:$src4))>,
12133 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12137 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12138 AVX512VLVectorVTInfo _Vec,
12139 AVX512VLVectorVTInfo _Tbl> {
12140 let Predicates = [HasAVX512] in
12141 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12142 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12143 EVEX_4V, EVEX_V512;
12144 let Predicates = [HasAVX512, HasVLX] in {
12145 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12146 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12147 EVEX_4V, EVEX_V128;
12148 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12149 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12150 EVEX_4V, EVEX_V256;
12154 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12155 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12156 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
12157 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12158 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12159 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
12160 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12161 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12162 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12163 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
12165 // Patterns used to select SSE scalar fp arithmetic instructions from
12168 // (1) a scalar fp operation followed by a blend
12170 // The effect is that the backend no longer emits unnecessary vector
12171 // insert instructions immediately after SSE scalar fp instructions
12172 // like addss or mulss.
12174 // For example, given the following code:
12175 // __m128 foo(__m128 A, __m128 B) {
12180 // Previously we generated:
12181 // addss %xmm0, %xmm1
12182 // movss %xmm1, %xmm0
12184 // We now generate:
12185 // addss %xmm1, %xmm0
12187 // (2) a vector packed single/double fp operation followed by a vector insert
12189 // The effect is that the backend converts the packed fp instruction
12190 // followed by a vector insert into a single SSE scalar fp instruction.
12192 // For example, given the following code:
12193 // __m128 foo(__m128 A, __m128 B) {
12194 // __m128 C = A + B;
12195 // return (__m128) {c[0], a[1], a[2], a[3]};
12198 // Previously we generated:
12199 // addps %xmm0, %xmm1
12200 // movss %xmm1, %xmm0
12202 // We now generate:
12203 // addss %xmm1, %xmm0
12205 // TODO: Some canonicalization in lowering would simplify the number of
12206 // patterns we have to try to match.
12207 multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12208 string OpcPrefix, SDNode MoveNode,
12209 X86VectorVTInfo _, PatLeaf ZeroFP> {
12210 let Predicates = [HasAVX512] in {
12211 // extracted scalar math op with insert via movss
12212 def : Pat<(MoveNode
12213 (_.VT VR128X:$dst),
12214 (_.VT (scalar_to_vector
12215 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12217 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12218 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12219 def : Pat<(MoveNode
12220 (_.VT VR128X:$dst),
12221 (_.VT (scalar_to_vector
12222 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12223 (_.ScalarLdFrag addr:$src))))),
12224 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12226 // extracted masked scalar math op with insert via movss
12227 def : Pat<(MoveNode (_.VT VR128X:$src1),
12229 (X86selects_mask VK1WM:$mask,
12231 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12234 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12235 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12236 VK1WM:$mask, _.VT:$src1,
12237 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12238 def : Pat<(MoveNode (_.VT VR128X:$src1),
12240 (X86selects_mask VK1WM:$mask,
12242 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12243 (_.ScalarLdFrag addr:$src2)),
12245 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12246 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12247 VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12249 // extracted masked scalar math op with insert via movss
12250 def : Pat<(MoveNode (_.VT VR128X:$src1),
12252 (X86selects_mask VK1WM:$mask,
12254 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12255 _.FRC:$src2), (_.EltVT ZeroFP)))),
12256 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12257 VK1WM:$mask, _.VT:$src1,
12258 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12259 def : Pat<(MoveNode (_.VT VR128X:$src1),
12261 (X86selects_mask VK1WM:$mask,
12263 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12264 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12265 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12269 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12270 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12271 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12272 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12274 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12275 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12276 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12277 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12279 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12280 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12281 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12282 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12284 multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12285 SDNode Move, X86VectorVTInfo _> {
12286 let Predicates = [HasAVX512] in {
12287 def : Pat<(_.VT (Move _.VT:$dst,
12288 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12289 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12293 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12294 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12295 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12297 //===----------------------------------------------------------------------===//
12298 // AES instructions
12299 //===----------------------------------------------------------------------===//
12301 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12302 let Predicates = [HasVLX, HasVAES] in {
12303 defm Z128 : AESI_binop_rm_int<Op, OpStr,
12304 !cast<Intrinsic>(IntPrefix),
12305 loadv2i64, 0, VR128X, i128mem>,
12306 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
12307 defm Z256 : AESI_binop_rm_int<Op, OpStr,
12308 !cast<Intrinsic>(IntPrefix#"_256"),
12309 loadv4i64, 0, VR256X, i256mem>,
12310 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
12312 let Predicates = [HasAVX512, HasVAES] in
12313 defm Z : AESI_binop_rm_int<Op, OpStr,
12314 !cast<Intrinsic>(IntPrefix#"_512"),
12315 loadv8i64, 0, VR512, i512mem>,
12316 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
12319 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12320 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12321 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12322 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12324 //===----------------------------------------------------------------------===//
12325 // PCLMUL instructions - Carry less multiplication
12326 //===----------------------------------------------------------------------===//
12328 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12329 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12330 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
12332 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12333 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12334 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
12336 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12337 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12338 EVEX_CD8<64, CD8VF>, VEX_WIG;
12342 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12343 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12344 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12346 //===----------------------------------------------------------------------===//
12348 //===----------------------------------------------------------------------===//
12350 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12351 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12352 let Constraints = "$src1 = $dst",
12353 ExeDomain = VTI.ExeDomain in {
12354 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12355 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12356 "$src3, $src2", "$src2, $src3",
12357 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12358 AVX512FMA3Base, Sched<[sched]>;
12359 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12360 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12361 "$src3, $src2", "$src2, $src3",
12362 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12363 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12365 Sched<[sched.Folded, sched.ReadAfterFold]>;
12369 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12370 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12371 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12372 let Constraints = "$src1 = $dst",
12373 ExeDomain = VTI.ExeDomain in
12374 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12375 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12376 "${src3}"#VTI.BroadcastStr#", $src2",
12377 "$src2, ${src3}"#VTI.BroadcastStr,
12378 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12379 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12380 AVX512FMA3Base, EVEX_B,
12381 Sched<[sched.Folded, sched.ReadAfterFold]>;
12384 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12385 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12386 let Predicates = [HasVBMI2] in
12387 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12389 let Predicates = [HasVBMI2, HasVLX] in {
12390 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12392 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12397 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12398 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12399 let Predicates = [HasVBMI2] in
12400 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12402 let Predicates = [HasVBMI2, HasVLX] in {
12403 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12405 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12409 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12410 SDNode OpNode, X86SchedWriteWidths sched> {
12411 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12412 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
12413 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12414 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12415 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12416 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
12419 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12420 SDNode OpNode, X86SchedWriteWidths sched> {
12421 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12422 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12423 VEX_W, EVEX_CD8<16, CD8VF>;
12424 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12425 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12426 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12427 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
12431 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12432 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12433 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12434 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12437 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12438 avx512vl_i8_info, HasVBMI2>, EVEX,
12440 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12441 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
12444 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12445 avx512vl_i8_info, HasVBMI2>, EVEX;
12446 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12447 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
12449 //===----------------------------------------------------------------------===//
12451 //===----------------------------------------------------------------------===//
12453 let Constraints = "$src1 = $dst" in
12454 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12455 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12456 bit IsCommutable> {
12457 let ExeDomain = VTI.ExeDomain in {
12458 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12459 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12460 "$src3, $src2", "$src2, $src3",
12461 (VTI.VT (OpNode VTI.RC:$src1,
12462 VTI.RC:$src2, VTI.RC:$src3)),
12463 IsCommutable, IsCommutable>,
12464 EVEX_4V, T8PD, Sched<[sched]>;
12465 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12466 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12467 "$src3, $src2", "$src2, $src3",
12468 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12469 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12470 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12471 Sched<[sched.Folded, sched.ReadAfterFold]>;
12472 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12473 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12474 OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12475 "$src2, ${src3}"#VTI.BroadcastStr,
12476 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12477 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12478 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12479 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12483 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12484 X86SchedWriteWidths sched, bit IsCommutable> {
12485 let Predicates = [HasVNNI] in
12486 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12487 IsCommutable>, EVEX_V512;
12488 let Predicates = [HasVNNI, HasVLX] in {
12489 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12490 IsCommutable>, EVEX_V256;
12491 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12492 IsCommutable>, EVEX_V128;
12496 // FIXME: Is there a better scheduler class for VPDP?
12497 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12498 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12499 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12500 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12502 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12503 let Predicates = [HasVNNI] in {
12504 def : Pat<(v16i32 (add VR512:$src1,
12505 (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12506 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12507 def : Pat<(v16i32 (add VR512:$src1,
12508 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12509 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12511 let Predicates = [HasVNNI,HasVLX] in {
12512 def : Pat<(v8i32 (add VR256X:$src1,
12513 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12514 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12515 def : Pat<(v8i32 (add VR256X:$src1,
12516 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12517 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12518 def : Pat<(v4i32 (add VR128X:$src1,
12519 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12520 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12521 def : Pat<(v4i32 (add VR128X:$src1,
12522 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12523 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12526 //===----------------------------------------------------------------------===//
12528 //===----------------------------------------------------------------------===//
12530 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12531 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12532 avx512vl_i8_info, HasBITALG>;
12533 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12534 avx512vl_i16_info, HasBITALG>, VEX_W;
12536 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12537 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12539 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12540 (X86Vpshufbitqmb node:$src1, node:$src2), [{
12541 return N->hasOneUse();
12544 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12545 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12546 (ins VTI.RC:$src1, VTI.RC:$src2),
12548 "$src2, $src1", "$src1, $src2",
12549 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12550 (VTI.VT VTI.RC:$src2)),
12551 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12552 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12554 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12555 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12557 "$src2, $src1", "$src1, $src2",
12558 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12559 (VTI.VT (VTI.LdFrag addr:$src2))),
12560 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12561 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12562 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12563 Sched<[sched.Folded, sched.ReadAfterFold]>;
12566 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12567 let Predicates = [HasBITALG] in
12568 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12569 let Predicates = [HasBITALG, HasVLX] in {
12570 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12571 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12575 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12576 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12578 //===----------------------------------------------------------------------===//
12580 //===----------------------------------------------------------------------===//
12582 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12583 X86SchedWriteWidths sched> {
12584 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12585 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12587 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12588 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12590 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12595 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12597 EVEX_CD8<8, CD8VF>, T8PD;
12599 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12600 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12601 X86VectorVTInfo BcstVTI>
12602 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12603 let ExeDomain = VTI.ExeDomain in
12604 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12605 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12606 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12607 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12608 (OpNode (VTI.VT VTI.RC:$src1),
12609 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12610 (i8 timm:$src3))>, EVEX_B,
12611 Sched<[sched.Folded, sched.ReadAfterFold]>;
12614 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12615 X86SchedWriteWidths sched> {
12616 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12617 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12618 v64i8_info, v8i64_info>, EVEX_V512;
12619 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12620 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12621 v32i8x_info, v4i64x_info>, EVEX_V256;
12622 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12623 v16i8x_info, v2i64x_info>, EVEX_V128;
12627 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12628 X86GF2P8affineinvqb, SchedWriteVecIMul>,
12629 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12630 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12631 X86GF2P8affineqb, SchedWriteVecIMul>,
12632 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12635 //===----------------------------------------------------------------------===//
12637 //===----------------------------------------------------------------------===//
12639 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12640 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12641 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12642 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12643 "v4fmaddps", "$src3, $src2", "$src2, $src3",
12644 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12645 Sched<[SchedWriteFMA.ZMM.Folded]>;
12647 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12648 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12649 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12650 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12651 Sched<[SchedWriteFMA.ZMM.Folded]>;
12653 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12654 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12655 "v4fmaddss", "$src3, $src2", "$src2, $src3",
12656 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12657 Sched<[SchedWriteFMA.Scl.Folded]>;
12659 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12660 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12661 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12662 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12663 Sched<[SchedWriteFMA.Scl.Folded]>;
12666 //===----------------------------------------------------------------------===//
12668 //===----------------------------------------------------------------------===//
12670 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12671 Constraints = "$src1 = $dst" in {
12672 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12673 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12674 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12675 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12676 Sched<[SchedWriteFMA.ZMM.Folded]>;
12678 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12679 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12680 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12681 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12682 Sched<[SchedWriteFMA.ZMM.Folded]>;
12685 let hasSideEffects = 0 in {
12686 let mayStore = 1, SchedRW = [WriteFStoreX] in
12687 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12688 let mayLoad = 1, SchedRW = [WriteFLoadX] in
12689 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12692 //===----------------------------------------------------------------------===//
12694 //===----------------------------------------------------------------------===//
12696 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12697 def rr : I<0x68, MRMSrcReg,
12698 (outs _.KRPC:$dst),
12699 (ins _.RC:$src1, _.RC:$src2),
12700 !strconcat("vp2intersect", _.Suffix,
12701 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12702 [(set _.KRPC:$dst, (X86vp2intersect
12703 _.RC:$src1, (_.VT _.RC:$src2)))]>,
12704 EVEX_4V, T8XD, Sched<[sched]>;
12706 def rm : I<0x68, MRMSrcMem,
12707 (outs _.KRPC:$dst),
12708 (ins _.RC:$src1, _.MemOp:$src2),
12709 !strconcat("vp2intersect", _.Suffix,
12710 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12711 [(set _.KRPC:$dst, (X86vp2intersect
12712 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12713 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12714 Sched<[sched.Folded, sched.ReadAfterFold]>;
12716 def rmb : I<0x68, MRMSrcMem,
12717 (outs _.KRPC:$dst),
12718 (ins _.RC:$src1, _.ScalarMemOp:$src2),
12719 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12720 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12721 [(set _.KRPC:$dst, (X86vp2intersect
12722 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12723 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12724 Sched<[sched.Folded, sched.ReadAfterFold]>;
12727 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12728 let Predicates = [HasAVX512, HasVP2INTERSECT] in
12729 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12731 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12732 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12733 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12737 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12738 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12740 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12741 X86SchedWriteWidths sched,
12742 AVX512VLVectorVTInfo _SrcVTInfo,
12743 AVX512VLVectorVTInfo _DstVTInfo,
12744 SDNode OpNode, Predicate prd,
12745 bit IsCommutable = 0> {
12746 let Predicates = [prd] in
12747 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12748 _SrcVTInfo.info512, _DstVTInfo.info512,
12749 _SrcVTInfo.info512, IsCommutable>,
12750 EVEX_V512, EVEX_CD8<32, CD8VF>;
12751 let Predicates = [HasVLX, prd] in {
12752 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12753 _SrcVTInfo.info256, _DstVTInfo.info256,
12754 _SrcVTInfo.info256, IsCommutable>,
12755 EVEX_V256, EVEX_CD8<32, CD8VF>;
12756 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12757 _SrcVTInfo.info128, _DstVTInfo.info128,
12758 _SrcVTInfo.info128, IsCommutable>,
12759 EVEX_V128, EVEX_CD8<32, CD8VF>;
12763 let ExeDomain = SSEPackedSingle in
12764 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12765 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12766 avx512vl_f32_info, avx512vl_i16_info,
12767 X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12769 // Truncate Float to BFloat16
12770 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12771 X86SchedWriteWidths sched> {
12772 let ExeDomain = SSEPackedSingle in {
12773 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12774 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12775 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12777 let Predicates = [HasBF16, HasVLX] in {
12778 let Uses = []<Register>, mayRaiseFPException = 0 in {
12779 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12780 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12782 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12783 X86cvtneps2bf16, X86cvtneps2bf16,
12784 sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12786 } // Predicates = [HasBF16, HasVLX]
12787 } // ExeDomain = SSEPackedSingle
12789 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12790 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12792 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12793 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12794 f128mem:$src), 0, "intel">;
12795 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12796 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12798 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12799 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12800 f256mem:$src), 0, "intel">;
12803 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12804 SchedWriteCvtPD2PS>, T8XS,
12805 EVEX_CD8<32, CD8VF>;
12807 let Predicates = [HasBF16, HasVLX] in {
12808 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12809 // patterns have been disabled with null_frag.
12810 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12811 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12812 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12814 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12815 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12817 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12819 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12820 (VCVTNEPS2BF16Z128rm addr:$src)>;
12821 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12823 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12824 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12826 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12828 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12829 (X86VBroadcastld32 addr:$src)))),
12830 (VCVTNEPS2BF16Z128rmb addr:$src)>;
12831 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12832 (v8i16 VR128X:$src0), VK4WM:$mask),
12833 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12834 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12835 v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12836 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12839 let Constraints = "$src1 = $dst" in {
12840 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12841 X86FoldableSchedWrite sched,
12842 X86VectorVTInfo _, X86VectorVTInfo src_v> {
12843 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12844 (ins src_v.RC:$src2, src_v.RC:$src3),
12845 OpcodeStr, "$src3, $src2", "$src2, $src3",
12846 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12847 EVEX_4V, Sched<[sched]>;
12849 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12850 (ins src_v.RC:$src2, src_v.MemOp:$src3),
12851 OpcodeStr, "$src3, $src2", "$src2, $src3",
12852 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12853 (src_v.LdFrag addr:$src3)))>, EVEX_4V,
12854 Sched<[sched.Folded, sched.ReadAfterFold]>;
12856 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12857 (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
12859 !strconcat("${src3}", _.BroadcastStr,", $src2"),
12860 !strconcat("$src2, ${src3}", _.BroadcastStr),
12861 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12862 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12863 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12866 } // Constraints = "$src1 = $dst"
12868 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12869 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12870 AVX512VLVectorVTInfo src_v, Predicate prd> {
12871 let Predicates = [prd] in {
12872 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12873 src_v.info512>, EVEX_V512;
12875 let Predicates = [HasVLX, prd] in {
12876 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12877 src_v.info256>, EVEX_V256;
12878 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12879 src_v.info128>, EVEX_V128;
12883 let ExeDomain = SSEPackedSingle in
12884 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12885 avx512vl_f32_info, avx512vl_i32_info,
12886 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
12888 //===----------------------------------------------------------------------===//
12890 //===----------------------------------------------------------------------===//
12892 let Predicates = [HasFP16] in {
12893 // Move word ( r/m16) to Packed word
12894 def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12895 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12896 def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12897 "vmovw\t{$src, $dst|$dst, $src}",
12899 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12900 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12902 def : Pat<(f16 (bitconvert GR16:$src)),
12903 (f16 (COPY_TO_REGCLASS
12905 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12907 def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12908 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12909 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12910 (VMOVW2SHrr GR32:$src)>;
12911 // FIXME: We should really find a way to improve these patterns.
12912 def : Pat<(v8i32 (X86vzmovl
12913 (insert_subvector undef,
12914 (v4i32 (scalar_to_vector
12915 (and GR32:$src, 0xffff))),
12917 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12918 def : Pat<(v16i32 (X86vzmovl
12919 (insert_subvector undef,
12920 (v4i32 (scalar_to_vector
12921 (and GR32:$src, 0xffff))),
12923 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12925 def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))),
12926 (VMOVW2SHrr GR32:$src)>;
12928 // AVX 128-bit movw instruction write zeros in the high 128-bit part.
12929 def : Pat<(v8i16 (X86vzload16 addr:$src)),
12930 (VMOVWrm addr:$src)>;
12931 def : Pat<(v16i16 (X86vzload16 addr:$src)),
12932 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12934 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12935 def : Pat<(v32i16 (X86vzload16 addr:$src)),
12936 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12938 def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12939 (VMOVWrm addr:$src)>;
12940 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12941 (VMOVWrm addr:$src)>;
12942 def : Pat<(v8i32 (X86vzmovl
12943 (insert_subvector undef,
12944 (v4i32 (scalar_to_vector
12945 (i32 (zextloadi16 addr:$src)))),
12947 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12948 def : Pat<(v16i32 (X86vzmovl
12949 (insert_subvector undef,
12950 (v4i32 (scalar_to_vector
12951 (i32 (zextloadi16 addr:$src)))),
12953 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12955 // Move word from xmm register to r/m16
12956 def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12957 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12958 def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs),
12959 (ins i16mem:$dst, VR128X:$src),
12960 "vmovw\t{$src, $dst|$dst, $src}",
12961 [(store (i16 (extractelt (v8i16 VR128X:$src),
12962 (iPTR 0))), addr:$dst)]>,
12963 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12965 def : Pat<(i16 (bitconvert FR16X:$src)),
12966 (i16 (EXTRACT_SUBREG
12967 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12969 def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12970 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12973 // Allow "vmovw" to use GR64
12974 let hasSideEffects = 0 in {
12975 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12976 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
12977 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12978 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
12981 // Convert 16-bit float to i16/u16
12982 multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12983 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12984 AVX512VLVectorVTInfo _Dst,
12985 AVX512VLVectorVTInfo _Src,
12986 X86SchedWriteWidths sched> {
12987 let Predicates = [HasFP16] in {
12988 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12989 OpNode, MaskOpNode, sched.ZMM>,
12990 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
12991 OpNodeRnd, sched.ZMM>, EVEX_V512;
12993 let Predicates = [HasFP16, HasVLX] in {
12994 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12995 OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12996 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12997 OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13001 // Convert 16-bit float to i16/u16 truncate
13002 multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13003 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13004 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
13005 X86SchedWriteWidths sched> {
13006 let Predicates = [HasFP16] in {
13007 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13008 OpNode, MaskOpNode, sched.ZMM>,
13009 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
13010 OpNodeRnd, sched.ZMM>, EVEX_V512;
13012 let Predicates = [HasFP16, HasVLX] in {
13013 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13014 OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13015 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13016 OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13020 defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
13021 X86cvtp2UIntRnd, avx512vl_i16_info,
13022 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13023 T_MAP5PS, EVEX_CD8<16, CD8VF>;
13024 defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
13025 X86VUintToFpRnd, avx512vl_f16_info,
13026 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13027 T_MAP5XD, EVEX_CD8<16, CD8VF>;
13028 defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
13029 X86cvttp2si, X86cvttp2siSAE,
13030 avx512vl_i16_info, avx512vl_f16_info,
13031 SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
13032 defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13033 X86cvttp2ui, X86cvttp2uiSAE,
13034 avx512vl_i16_info, avx512vl_f16_info,
13035 SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
13036 defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13037 X86cvtp2IntRnd, avx512vl_i16_info,
13038 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13039 T_MAP5PD, EVEX_CD8<16, CD8VF>;
13040 defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13041 X86VSintToFpRnd, avx512vl_f16_info,
13042 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13043 T_MAP5XS, EVEX_CD8<16, CD8VF>;
13045 // Convert Half to Signed/Unsigned Doubleword
13046 multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13047 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13048 X86SchedWriteWidths sched> {
13049 let Predicates = [HasFP16] in {
13050 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13051 MaskOpNode, sched.ZMM>,
13052 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13053 OpNodeRnd, sched.ZMM>, EVEX_V512;
13055 let Predicates = [HasFP16, HasVLX] in {
13056 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13057 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13058 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13059 MaskOpNode, sched.YMM>, EVEX_V256;
13063 // Convert Half to Signed/Unsigned Doubleword with truncation
13064 multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13065 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13066 X86SchedWriteWidths sched> {
13067 let Predicates = [HasFP16] in {
13068 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13069 MaskOpNode, sched.ZMM>,
13070 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13071 OpNodeRnd, sched.ZMM>, EVEX_V512;
13073 let Predicates = [HasFP16, HasVLX] in {
13074 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13075 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13076 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13077 MaskOpNode, sched.YMM>, EVEX_V256;
13082 defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13083 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13084 EVEX_CD8<16, CD8VH>;
13085 defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13086 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
13087 EVEX_CD8<16, CD8VH>;
13089 defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13090 X86cvttp2si, X86cvttp2siSAE,
13091 SchedWriteCvtPS2DQ>, T_MAP5XS,
13092 EVEX_CD8<16, CD8VH>;
13094 defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13095 X86cvttp2ui, X86cvttp2uiSAE,
13096 SchedWriteCvtPS2DQ>, T_MAP5PS,
13097 EVEX_CD8<16, CD8VH>;
13099 // Convert Half to Signed/Unsigned Quardword
13100 multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13101 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13102 X86SchedWriteWidths sched> {
13103 let Predicates = [HasFP16] in {
13104 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13105 MaskOpNode, sched.ZMM>,
13106 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13107 OpNodeRnd, sched.ZMM>, EVEX_V512;
13109 let Predicates = [HasFP16, HasVLX] in {
13110 // Explicitly specified broadcast string, since we take only 2 elements
13111 // from v8f16x_info source
13112 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13113 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13115 // Explicitly specified broadcast string, since we take only 4 elements
13116 // from v8f16x_info source
13117 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13118 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13123 // Convert Half to Signed/Unsigned Quardword with truncation
13124 multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13125 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13126 X86SchedWriteWidths sched> {
13127 let Predicates = [HasFP16] in {
13128 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13129 MaskOpNode, sched.ZMM>,
13130 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13131 OpNodeRnd, sched.ZMM>, EVEX_V512;
13133 let Predicates = [HasFP16, HasVLX] in {
13134 // Explicitly specified broadcast string, since we take only 2 elements
13135 // from v8f16x_info source
13136 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13137 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13138 // Explicitly specified broadcast string, since we take only 4 elements
13139 // from v8f16x_info source
13140 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13141 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13145 defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13146 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13147 EVEX_CD8<16, CD8VQ>;
13149 defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13150 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13151 EVEX_CD8<16, CD8VQ>;
13153 defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13154 X86cvttp2si, X86cvttp2siSAE,
13155 SchedWriteCvtPS2DQ>, T_MAP5PD,
13156 EVEX_CD8<16, CD8VQ>;
13158 defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13159 X86cvttp2ui, X86cvttp2uiSAE,
13160 SchedWriteCvtPS2DQ>, T_MAP5PD,
13161 EVEX_CD8<16, CD8VQ>;
13163 // Convert Signed/Unsigned Quardword to Half
13164 multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13165 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13166 X86SchedWriteWidths sched> {
13167 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13168 // 512 memory forms of these instructions in Asm Parcer. They have the same
13169 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13170 // due to the same reason.
13171 let Predicates = [HasFP16] in {
13172 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13173 MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13174 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13175 OpNodeRnd, sched.ZMM>, EVEX_V512;
13177 let Predicates = [HasFP16, HasVLX] in {
13178 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13179 null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13181 EVEX_V128, NotEVEX2VEXConvertible;
13182 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13183 null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13185 EVEX_V256, NotEVEX2VEXConvertible;
13188 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13189 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13190 VR128X:$src), 0, "att">;
13191 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13192 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13193 VK2WM:$mask, VR128X:$src), 0, "att">;
13194 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13195 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13196 VK2WM:$mask, VR128X:$src), 0, "att">;
13197 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13198 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13199 i64mem:$src), 0, "att">;
13200 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13201 "$dst {${mask}}, ${src}{1to2}}",
13202 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13203 VK2WM:$mask, i64mem:$src), 0, "att">;
13204 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13205 "$dst {${mask}} {z}, ${src}{1to2}}",
13206 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13207 VK2WM:$mask, i64mem:$src), 0, "att">;
13209 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13210 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13211 VR256X:$src), 0, "att">;
13212 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13213 "$dst {${mask}}, $src}",
13214 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13215 VK4WM:$mask, VR256X:$src), 0, "att">;
13216 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13217 "$dst {${mask}} {z}, $src}",
13218 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13219 VK4WM:$mask, VR256X:$src), 0, "att">;
13220 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13221 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13222 i64mem:$src), 0, "att">;
13223 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13224 "$dst {${mask}}, ${src}{1to4}}",
13225 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13226 VK4WM:$mask, i64mem:$src), 0, "att">;
13227 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13228 "$dst {${mask}} {z}, ${src}{1to4}}",
13229 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13230 VK4WM:$mask, i64mem:$src), 0, "att">;
13232 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13233 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13234 VR512:$src), 0, "att">;
13235 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13236 "$dst {${mask}}, $src}",
13237 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13238 VK8WM:$mask, VR512:$src), 0, "att">;
13239 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13240 "$dst {${mask}} {z}, $src}",
13241 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13242 VK8WM:$mask, VR512:$src), 0, "att">;
13243 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13244 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13245 i64mem:$src), 0, "att">;
13246 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13247 "$dst {${mask}}, ${src}{1to8}}",
13248 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13249 VK8WM:$mask, i64mem:$src), 0, "att">;
13250 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13251 "$dst {${mask}} {z}, ${src}{1to8}}",
13252 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13253 VK8WM:$mask, i64mem:$src), 0, "att">;
13256 defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13257 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
13258 EVEX_CD8<64, CD8VF>;
13260 defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13261 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
13262 EVEX_CD8<64, CD8VF>;
13264 // Convert half to signed/unsigned int 32/64
13265 defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13266 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13267 T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13268 defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13269 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13270 T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13271 defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13272 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13273 T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13274 defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13275 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13276 T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13278 defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13279 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13280 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13281 defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13282 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13283 "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13284 defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13285 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13286 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13287 defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13288 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13289 "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13291 let Predicates = [HasFP16] in {
13292 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13293 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13294 T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13295 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13296 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13297 T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13298 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13299 v8f16x_info, i32mem, loadi32,
13300 "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13301 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13302 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13303 T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13304 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13305 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13307 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13308 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13311 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13312 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13313 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13314 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13316 def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13317 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13318 def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13319 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13321 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13322 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13323 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13324 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13326 def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13327 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13328 def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13329 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13331 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13332 // which produce unnecessary vmovsh instructions
13333 def : Pat<(v8f16 (X86Movsh
13334 (v8f16 VR128X:$dst),
13335 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13336 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13338 def : Pat<(v8f16 (X86Movsh
13339 (v8f16 VR128X:$dst),
13340 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13341 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13343 def : Pat<(v8f16 (X86Movsh
13344 (v8f16 VR128X:$dst),
13345 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13346 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13348 def : Pat<(v8f16 (X86Movsh
13349 (v8f16 VR128X:$dst),
13350 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13351 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13353 def : Pat<(v8f16 (X86Movsh
13354 (v8f16 VR128X:$dst),
13355 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13356 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13358 def : Pat<(v8f16 (X86Movsh
13359 (v8f16 VR128X:$dst),
13360 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13361 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13363 def : Pat<(v8f16 (X86Movsh
13364 (v8f16 VR128X:$dst),
13365 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13366 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13368 def : Pat<(v8f16 (X86Movsh
13369 (v8f16 VR128X:$dst),
13370 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13371 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13372 } // Predicates = [HasFP16]
13374 let Predicates = [HasFP16, HasVLX] in {
13375 // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13376 // patterns have been disabled with null_frag.
13377 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13378 (VCVTQQ2PHZ256rr VR256X:$src)>;
13379 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13381 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13382 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13384 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13386 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13387 (VCVTQQ2PHZ256rm addr:$src)>;
13388 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13390 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13391 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13393 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13395 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13396 (VCVTQQ2PHZ256rmb addr:$src)>;
13397 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13398 (v8f16 VR128X:$src0), VK4WM:$mask),
13399 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13400 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13401 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13402 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13404 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13405 (VCVTQQ2PHZ128rr VR128X:$src)>;
13406 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13408 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13409 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13411 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13413 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13414 (VCVTQQ2PHZ128rm addr:$src)>;
13415 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13417 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13418 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13420 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13422 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13423 (VCVTQQ2PHZ128rmb addr:$src)>;
13424 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13425 (v8f16 VR128X:$src0), VK2WM:$mask),
13426 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13427 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13428 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13429 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13431 // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13432 // patterns have been disabled with null_frag.
13433 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13434 (VCVTUQQ2PHZ256rr VR256X:$src)>;
13435 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13437 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13438 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13440 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13442 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13443 (VCVTUQQ2PHZ256rm addr:$src)>;
13444 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13446 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13447 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13449 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13451 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13452 (VCVTUQQ2PHZ256rmb addr:$src)>;
13453 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13454 (v8f16 VR128X:$src0), VK4WM:$mask),
13455 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13456 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13457 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13458 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13460 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13461 (VCVTUQQ2PHZ128rr VR128X:$src)>;
13462 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13464 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13465 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13467 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13469 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13470 (VCVTUQQ2PHZ128rm addr:$src)>;
13471 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13473 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13474 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13476 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13478 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13479 (VCVTUQQ2PHZ128rmb addr:$src)>;
13480 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13481 (v8f16 VR128X:$src0), VK2WM:$mask),
13482 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13483 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13484 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13485 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;