1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // This multiclass generates the masking variants from the non-masking
16 // variant. It only provides the assembly pieces for the masking variants.
17 // It assumes custom ISel patterns for masking which can be provided as
18 // template arguments.
19 multiclass AVX512_maskable_custom<bits<8> O, Format F,
21 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
23 string AttSrcAsm, string IntelSrcAsm,
25 list<dag> MaskingPattern,
26 list<dag> ZeroMaskingPattern,
27 string MaskingConstraint = "",
29 bit IsKCommutable = 0,
30 bit IsKZCommutable = IsCommutable,
31 string ClobberConstraint = ""> {
32 let isCommutable = IsCommutable, Constraints = ClobberConstraint in
33 def NAME: AVX512<O, F, Outs, Ins,
34 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
35 "$dst, "#IntelSrcAsm#"}",
38 // Prefer over VMOV*rrk Pat<>
39 let isCommutable = IsKCommutable in
40 def NAME#k: AVX512<O, F, Outs, MaskingIns,
41 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
42 "$dst {${mask}}, "#IntelSrcAsm#"}",
45 // In case of the 3src subclass this is overridden with a let.
46 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
47 !if(!eq(MaskingConstraint, ""), ClobberConstraint,
48 !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
51 // Zero mask does not add any restrictions to commute operands transformation.
52 // So, it is Ok to use IsCommutable instead of IsKCommutable.
53 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
54 Constraints = ClobberConstraint in
55 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
56 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
57 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
63 // Common base class of AVX512_maskable and AVX512_maskable_3src.
64 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
66 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
68 string AttSrcAsm, string IntelSrcAsm,
69 dag RHS, dag MaskingRHS,
70 SDPatternOperator Select = vselect_mask,
71 string MaskingConstraint = "",
73 bit IsKCommutable = 0,
74 bit IsKZCommutable = IsCommutable,
75 string ClobberConstraint = ""> :
76 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
77 AttSrcAsm, IntelSrcAsm,
78 [(set _.RC:$dst, RHS)],
79 [(set _.RC:$dst, MaskingRHS)],
81 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
82 MaskingConstraint, IsCommutable,
83 IsKCommutable, IsKZCommutable, ClobberConstraint>;
85 // This multiclass generates the unconditional/non-masking, the masking and
86 // the zero-masking variant of the vector instruction. In the masking case, the
87 // preserved vector elements come from a new dummy input operand tied to $dst.
88 // This version uses a separate dag for non-masking and masking.
89 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
90 dag Outs, dag Ins, string OpcodeStr,
91 string AttSrcAsm, string IntelSrcAsm,
93 string ClobberConstraint = "",
94 bit IsCommutable = 0, bit IsKCommutable = 0,
95 bit IsKZCommutable = IsCommutable> :
96 AVX512_maskable_custom<O, F, Outs, Ins,
97 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
98 !con((ins _.KRCWM:$mask), Ins),
99 OpcodeStr, AttSrcAsm, IntelSrcAsm,
100 [(set _.RC:$dst, RHS)],
102 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
104 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
105 "$src0 = $dst", IsCommutable, IsKCommutable,
106 IsKZCommutable, ClobberConstraint>;
108 // This multiclass generates the unconditional/non-masking, the masking and
109 // the zero-masking variant of the vector instruction. In the masking case, the
110 // preserved vector elements come from a new dummy input operand tied to $dst.
111 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
112 dag Outs, dag Ins, string OpcodeStr,
113 string AttSrcAsm, string IntelSrcAsm,
115 bit IsCommutable = 0, bit IsKCommutable = 0,
116 bit IsKZCommutable = IsCommutable,
117 SDPatternOperator Select = vselect_mask,
118 string ClobberConstraint = ""> :
119 AVX512_maskable_common<O, F, _, Outs, Ins,
120 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
121 !con((ins _.KRCWM:$mask), Ins),
122 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
123 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
124 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
125 IsKZCommutable, ClobberConstraint>;
127 // This multiclass generates the unconditional/non-masking, the masking and
128 // the zero-masking variant of the scalar instruction.
129 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
130 dag Outs, dag Ins, string OpcodeStr,
131 string AttSrcAsm, string IntelSrcAsm,
133 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
134 RHS, 0, 0, 0, X86selects_mask>;
136 // Similar to AVX512_maskable but in this case one of the source operands
137 // ($src1) is already tied to $dst so we just use that for the preserved
138 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
140 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
141 dag Outs, dag NonTiedIns, string OpcodeStr,
142 string AttSrcAsm, string IntelSrcAsm,
144 bit IsCommutable = 0,
145 bit IsKCommutable = 0,
146 SDPatternOperator Select = vselect_mask,
148 AVX512_maskable_common<O, F, _, Outs,
149 !con((ins _.RC:$src1), NonTiedIns),
150 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
151 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
152 OpcodeStr, AttSrcAsm, IntelSrcAsm,
153 !if(MaskOnly, (null_frag), RHS),
154 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
155 Select, "", IsCommutable, IsKCommutable>;
157 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
158 // operand differs from the output VT. This requires a bitconvert on
159 // the preserved vector going into the vselect.
160 // NOTE: The unmasked pattern is disabled.
161 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
162 X86VectorVTInfo InVT,
163 dag Outs, dag NonTiedIns, string OpcodeStr,
164 string AttSrcAsm, string IntelSrcAsm,
165 dag RHS, bit IsCommutable = 0> :
166 AVX512_maskable_common<O, F, OutVT, Outs,
167 !con((ins InVT.RC:$src1), NonTiedIns),
168 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
169 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
170 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
171 (vselect_mask InVT.KRCWM:$mask, RHS,
172 (bitconvert InVT.RC:$src1)),
173 vselect_mask, "", IsCommutable>;
175 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
176 dag Outs, dag NonTiedIns, string OpcodeStr,
177 string AttSrcAsm, string IntelSrcAsm,
179 bit IsCommutable = 0,
180 bit IsKCommutable = 0,
182 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
183 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
184 X86selects_mask, MaskOnly>;
186 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
189 string AttSrcAsm, string IntelSrcAsm,
191 AVX512_maskable_custom<O, F, Outs, Ins,
192 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
193 !con((ins _.KRCWM:$mask), Ins),
194 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
197 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
198 dag Outs, dag NonTiedIns,
200 string AttSrcAsm, string IntelSrcAsm,
202 AVX512_maskable_custom<O, F, Outs,
203 !con((ins _.RC:$src1), NonTiedIns),
204 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
205 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
206 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
209 // Instruction with mask that puts result in mask register,
210 // like "compare" and "vptest"
211 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
213 dag Ins, dag MaskingIns,
215 string AttSrcAsm, string IntelSrcAsm,
217 list<dag> MaskingPattern,
218 bit IsCommutable = 0> {
219 let isCommutable = IsCommutable in {
220 def NAME: AVX512<O, F, Outs, Ins,
221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
222 "$dst, "#IntelSrcAsm#"}",
225 def NAME#k: AVX512<O, F, Outs, MaskingIns,
226 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
227 "$dst {${mask}}, "#IntelSrcAsm#"}",
228 MaskingPattern>, EVEX_K;
232 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
234 dag Ins, dag MaskingIns,
236 string AttSrcAsm, string IntelSrcAsm,
237 dag RHS, dag MaskingRHS,
238 bit IsCommutable = 0> :
239 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
240 AttSrcAsm, IntelSrcAsm,
241 [(set _.KRC:$dst, RHS)],
242 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
244 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
245 dag Outs, dag Ins, string OpcodeStr,
246 string AttSrcAsm, string IntelSrcAsm,
247 dag RHS, dag RHS_su, bit IsCommutable = 0> :
248 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
249 !con((ins _.KRCWM:$mask), Ins),
250 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
251 (and _.KRCWM:$mask, RHS_su), IsCommutable>;
253 // Used by conversion instructions.
254 multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
256 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
258 string AttSrcAsm, string IntelSrcAsm,
259 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261 AttSrcAsm, IntelSrcAsm,
262 [(set _.RC:$dst, RHS)],
263 [(set _.RC:$dst, MaskingRHS)],
264 [(set _.RC:$dst, ZeroMaskingRHS)],
267 multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
268 dag Outs, dag NonTiedIns, string OpcodeStr,
269 string AttSrcAsm, string IntelSrcAsm,
270 dag RHS, dag MaskingRHS, bit IsCommutable,
272 AVX512_maskable_custom<O, F, Outs,
273 !con((ins _.RC:$src1), NonTiedIns),
274 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
275 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
276 OpcodeStr, AttSrcAsm, IntelSrcAsm,
277 [(set _.RC:$dst, RHS)],
279 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
281 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
282 "", IsCommutable, IsKCommutable>;
284 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
285 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
286 // swizzled by ExecutionDomainFix to pxor.
287 // We set canFoldAsLoad because this can be converted to a constant-pool
288 // load of an all-zeros value if folding it would be beneficial.
289 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
290 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
291 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
292 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
293 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
294 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
297 let Predicates = [HasAVX512] in {
298 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
299 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
300 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
301 def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
302 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
303 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
306 // Alias instructions that allow VPTERNLOG to be used with a mask to create
307 // a mix of all ones and all zeros elements. This is done this way to force
308 // the same register to be used as input for all three sources.
309 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
310 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
311 (ins VK16WM:$mask), "",
312 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
313 (v16i32 immAllOnesV),
314 (v16i32 immAllZerosV)))]>;
315 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
316 (ins VK8WM:$mask), "",
317 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
319 (v8i64 immAllZerosV)))]>;
322 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
323 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
324 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
325 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
326 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
327 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
330 let Predicates = [HasAVX512] in {
331 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
332 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
333 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
334 def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
335 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
336 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
337 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
338 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
339 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
340 def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
341 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
342 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
345 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
346 // This is expanded by ExpandPostRAPseudos.
347 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
348 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
349 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
350 [(set FR16X:$dst, fp16imm0)]>;
351 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
352 [(set FR32X:$dst, fp32imm0)]>;
353 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
354 [(set FR64X:$dst, fp64imm0)]>;
355 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
356 [(set VR128X:$dst, fp128imm0)]>;
359 //===----------------------------------------------------------------------===//
360 // AVX-512 - VECTOR INSERT
363 // Supports two different pattern operators for mask and unmasked ops. Allows
364 // null_frag to be passed for one.
365 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
367 SDPatternOperator vinsert_insert,
368 SDPatternOperator vinsert_for_mask,
369 X86FoldableSchedWrite sched> {
370 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
371 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
372 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
373 "vinsert" # From.EltTypeName # "x" # From.NumElts,
374 "$src3, $src2, $src1", "$src1, $src2, $src3",
375 (vinsert_insert:$src3 (To.VT To.RC:$src1),
376 (From.VT From.RC:$src2),
378 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
379 (From.VT From.RC:$src2),
381 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
383 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
384 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
385 "vinsert" # From.EltTypeName # "x" # From.NumElts,
386 "$src3, $src2, $src1", "$src1, $src2, $src3",
387 (vinsert_insert:$src3 (To.VT To.RC:$src1),
388 (From.VT (From.LdFrag addr:$src2)),
390 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
391 (From.VT (From.LdFrag addr:$src2)),
392 (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV,
393 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
394 Sched<[sched.Folded, sched.ReadAfterFold]>;
398 // Passes the same pattern operator for masked and unmasked ops.
399 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
401 SDPatternOperator vinsert_insert,
402 X86FoldableSchedWrite sched> :
403 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
405 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
406 X86VectorVTInfo To, PatFrag vinsert_insert,
407 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
408 let Predicates = p in {
409 def : Pat<(vinsert_insert:$ins
410 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
411 (To.VT (!cast<Instruction>(InstrStr#"rr")
412 To.RC:$src1, From.RC:$src2,
413 (INSERT_get_vinsert_imm To.RC:$ins)))>;
415 def : Pat<(vinsert_insert:$ins
417 (From.VT (From.LdFrag addr:$src2)),
419 (To.VT (!cast<Instruction>(InstrStr#"rm")
420 To.RC:$src1, addr:$src2,
421 (INSERT_get_vinsert_imm To.RC:$ins)))>;
425 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
426 ValueType EltVT64, int Opcode256,
427 X86FoldableSchedWrite sched> {
429 let Predicates = [HasVLX] in
430 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
431 X86VectorVTInfo< 4, EltVT32, VR128X>,
432 X86VectorVTInfo< 8, EltVT32, VR256X>,
433 vinsert128_insert, sched>, EVEX_V256;
435 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
436 X86VectorVTInfo< 4, EltVT32, VR128X>,
437 X86VectorVTInfo<16, EltVT32, VR512>,
438 vinsert128_insert, sched>, EVEX_V512;
440 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
441 X86VectorVTInfo< 4, EltVT64, VR256X>,
442 X86VectorVTInfo< 8, EltVT64, VR512>,
443 vinsert256_insert, sched>, REX_W, EVEX_V512;
445 // Even with DQI we'd like to only use these instructions for masking.
446 let Predicates = [HasVLX, HasDQI] in
447 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
448 X86VectorVTInfo< 2, EltVT64, VR128X>,
449 X86VectorVTInfo< 4, EltVT64, VR256X>,
450 null_frag, vinsert128_insert, sched>,
453 // Even with DQI we'd like to only use these instructions for masking.
454 let Predicates = [HasDQI] in {
455 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
456 X86VectorVTInfo< 2, EltVT64, VR128X>,
457 X86VectorVTInfo< 8, EltVT64, VR512>,
458 null_frag, vinsert128_insert, sched>,
461 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
462 X86VectorVTInfo< 8, EltVT32, VR256X>,
463 X86VectorVTInfo<16, EltVT32, VR512>,
464 null_frag, vinsert256_insert, sched>,
469 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
470 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
471 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
473 // Codegen pattern with the alternative types,
474 // Even with AVX512DQ we'll still use these for unmasked operations.
475 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
476 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
477 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
478 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
480 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
481 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
482 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
483 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
485 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
486 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
487 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
488 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
490 // Codegen pattern with the alternative types insert VEC128 into VEC256
491 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
492 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
493 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
494 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
495 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
496 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
497 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8bf16x_info, v16bf16x_info,
498 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
499 // Codegen pattern with the alternative types insert VEC128 into VEC512
500 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
501 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
502 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
503 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
504 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
505 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
506 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8bf16x_info, v32bf16_info,
507 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
508 // Codegen pattern with the alternative types insert VEC256 into VEC512
509 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
510 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
511 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
512 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
513 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
514 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
515 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16bf16x_info, v32bf16_info,
516 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
519 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
520 X86VectorVTInfo To, X86VectorVTInfo Cast,
521 PatFrag vinsert_insert,
522 SDNodeXForm INSERT_get_vinsert_imm,
524 let Predicates = p in {
526 (vselect_mask Cast.KRCWM:$mask,
528 (vinsert_insert:$ins (To.VT To.RC:$src1),
529 (From.VT From.RC:$src2),
532 (!cast<Instruction>(InstrStr#"rrk")
533 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
534 (INSERT_get_vinsert_imm To.RC:$ins))>;
536 (vselect_mask Cast.KRCWM:$mask,
538 (vinsert_insert:$ins (To.VT To.RC:$src1),
541 (From.LdFrag addr:$src2))),
544 (!cast<Instruction>(InstrStr#"rmk")
545 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
546 (INSERT_get_vinsert_imm To.RC:$ins))>;
549 (vselect_mask Cast.KRCWM:$mask,
551 (vinsert_insert:$ins (To.VT To.RC:$src1),
552 (From.VT From.RC:$src2),
555 (!cast<Instruction>(InstrStr#"rrkz")
556 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
557 (INSERT_get_vinsert_imm To.RC:$ins))>;
559 (vselect_mask Cast.KRCWM:$mask,
561 (vinsert_insert:$ins (To.VT To.RC:$src1),
562 (From.VT (From.LdFrag addr:$src2)),
565 (!cast<Instruction>(InstrStr#"rmkz")
566 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
567 (INSERT_get_vinsert_imm To.RC:$ins))>;
571 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
572 v8f32x_info, vinsert128_insert,
573 INSERT_get_vinsert128_imm, [HasVLX]>;
574 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
575 v4f64x_info, vinsert128_insert,
576 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
578 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
579 v8i32x_info, vinsert128_insert,
580 INSERT_get_vinsert128_imm, [HasVLX]>;
581 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
582 v8i32x_info, vinsert128_insert,
583 INSERT_get_vinsert128_imm, [HasVLX]>;
584 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
585 v8i32x_info, vinsert128_insert,
586 INSERT_get_vinsert128_imm, [HasVLX]>;
587 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
588 v4i64x_info, vinsert128_insert,
589 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
590 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
591 v4i64x_info, vinsert128_insert,
592 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
593 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
594 v4i64x_info, vinsert128_insert,
595 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
597 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
598 v16f32_info, vinsert128_insert,
599 INSERT_get_vinsert128_imm, [HasAVX512]>;
600 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
601 v8f64_info, vinsert128_insert,
602 INSERT_get_vinsert128_imm, [HasDQI]>;
604 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
605 v16i32_info, vinsert128_insert,
606 INSERT_get_vinsert128_imm, [HasAVX512]>;
607 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
608 v16i32_info, vinsert128_insert,
609 INSERT_get_vinsert128_imm, [HasAVX512]>;
610 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
611 v16i32_info, vinsert128_insert,
612 INSERT_get_vinsert128_imm, [HasAVX512]>;
613 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
614 v8i64_info, vinsert128_insert,
615 INSERT_get_vinsert128_imm, [HasDQI]>;
616 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
617 v8i64_info, vinsert128_insert,
618 INSERT_get_vinsert128_imm, [HasDQI]>;
619 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
620 v8i64_info, vinsert128_insert,
621 INSERT_get_vinsert128_imm, [HasDQI]>;
623 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
624 v16f32_info, vinsert256_insert,
625 INSERT_get_vinsert256_imm, [HasDQI]>;
626 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
627 v8f64_info, vinsert256_insert,
628 INSERT_get_vinsert256_imm, [HasAVX512]>;
630 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
631 v16i32_info, vinsert256_insert,
632 INSERT_get_vinsert256_imm, [HasDQI]>;
633 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
634 v16i32_info, vinsert256_insert,
635 INSERT_get_vinsert256_imm, [HasDQI]>;
636 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
637 v16i32_info, vinsert256_insert,
638 INSERT_get_vinsert256_imm, [HasDQI]>;
639 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
640 v8i64_info, vinsert256_insert,
641 INSERT_get_vinsert256_imm, [HasAVX512]>;
642 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
643 v8i64_info, vinsert256_insert,
644 INSERT_get_vinsert256_imm, [HasAVX512]>;
645 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
646 v8i64_info, vinsert256_insert,
647 INSERT_get_vinsert256_imm, [HasAVX512]>;
649 // vinsertps - insert f32 to XMM
650 let ExeDomain = SSEPackedSingle in {
651 let isCommutable = 1 in
652 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
653 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
654 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
655 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
656 EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
657 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
658 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
659 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
660 [(set VR128X:$dst, (X86insertps VR128X:$src1,
661 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
663 EVEX, VVVV, EVEX_CD8<32, CD8VT1>,
664 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
667 //===----------------------------------------------------------------------===//
668 // AVX-512 VECTOR EXTRACT
671 // Supports two different pattern operators for mask and unmasked ops. Allows
672 // null_frag to be passed for one.
673 multiclass vextract_for_size_split<int Opcode,
674 X86VectorVTInfo From, X86VectorVTInfo To,
675 SDPatternOperator vextract_extract,
676 SDPatternOperator vextract_for_mask,
677 SchedWrite SchedRR, SchedWrite SchedMR> {
679 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
680 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
681 (ins From.RC:$src1, u8imm:$idx),
682 "vextract" # To.EltTypeName # "x" # To.NumElts,
683 "$idx, $src1", "$src1, $idx",
684 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
685 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
686 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
688 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
689 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
690 "vextract" # To.EltTypeName # "x" # To.NumElts #
691 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
692 [(store (To.VT (vextract_extract:$idx
693 (From.VT From.RC:$src1), (iPTR imm))),
697 let mayStore = 1, hasSideEffects = 0 in
698 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
699 (ins To.MemOp:$dst, To.KRCWM:$mask,
700 From.RC:$src1, u8imm:$idx),
701 "vextract" # To.EltTypeName # "x" # To.NumElts #
702 "\t{$idx, $src1, $dst {${mask}}|"
703 "$dst {${mask}}, $src1, $idx}", []>,
704 EVEX_K, EVEX, Sched<[SchedMR]>;
708 // Passes the same pattern operator for masked and unmasked ops.
709 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
711 SDPatternOperator vextract_extract,
712 SchedWrite SchedRR, SchedWrite SchedMR> :
713 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
715 // Codegen pattern for the alternative types
716 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
717 X86VectorVTInfo To, PatFrag vextract_extract,
718 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
719 let Predicates = p in {
720 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
721 (To.VT (!cast<Instruction>(InstrStr#"rr")
723 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
724 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
725 (iPTR imm))), addr:$dst),
726 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
727 (EXTRACT_get_vextract_imm To.RC:$ext))>;
731 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
732 ValueType EltVT64, int Opcode256,
733 SchedWrite SchedRR, SchedWrite SchedMR> {
734 let Predicates = [HasAVX512] in {
735 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
736 X86VectorVTInfo<16, EltVT32, VR512>,
737 X86VectorVTInfo< 4, EltVT32, VR128X>,
738 vextract128_extract, SchedRR, SchedMR>,
739 EVEX_V512, EVEX_CD8<32, CD8VT4>;
740 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
741 X86VectorVTInfo< 8, EltVT64, VR512>,
742 X86VectorVTInfo< 4, EltVT64, VR256X>,
743 vextract256_extract, SchedRR, SchedMR>,
744 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
746 let Predicates = [HasVLX] in
747 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
748 X86VectorVTInfo< 8, EltVT32, VR256X>,
749 X86VectorVTInfo< 4, EltVT32, VR128X>,
750 vextract128_extract, SchedRR, SchedMR>,
751 EVEX_V256, EVEX_CD8<32, CD8VT4>;
753 // Even with DQI we'd like to only use these instructions for masking.
754 let Predicates = [HasVLX, HasDQI] in
755 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
756 X86VectorVTInfo< 4, EltVT64, VR256X>,
757 X86VectorVTInfo< 2, EltVT64, VR128X>,
758 null_frag, vextract128_extract, SchedRR, SchedMR>,
759 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
761 // Even with DQI we'd like to only use these instructions for masking.
762 let Predicates = [HasDQI] in {
763 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
764 X86VectorVTInfo< 8, EltVT64, VR512>,
765 X86VectorVTInfo< 2, EltVT64, VR128X>,
766 null_frag, vextract128_extract, SchedRR, SchedMR>,
767 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
768 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
769 X86VectorVTInfo<16, EltVT32, VR512>,
770 X86VectorVTInfo< 8, EltVT32, VR256X>,
771 null_frag, vextract256_extract, SchedRR, SchedMR>,
772 EVEX_V512, EVEX_CD8<32, CD8VT8>;
776 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
777 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
778 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
780 // extract_subvector codegen patterns with the alternative types.
781 // Even with AVX512DQ we'll still use these for unmasked operations.
782 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
783 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
784 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
785 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
787 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
788 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
789 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
790 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
792 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
793 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
794 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
795 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
797 // Codegen pattern with the alternative types extract VEC128 from VEC256
798 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
799 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
800 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
801 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
802 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
803 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
804 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16bf16x_info, v8bf16x_info,
805 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
807 // Codegen pattern with the alternative types extract VEC128 from VEC512
808 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
809 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
810 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
811 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
812 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
813 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
814 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32bf16_info, v8bf16x_info,
815 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
816 // Codegen pattern with the alternative types extract VEC256 from VEC512
817 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
818 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
819 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
820 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
821 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
822 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
823 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32bf16_info, v16bf16x_info,
824 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
827 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
828 // smaller extract to enable EVEX->VEX.
829 let Predicates = [NoVLX, HasEVEX512] in {
830 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
831 (v2i64 (VEXTRACTI128rr
832 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
834 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
835 (v2f64 (VEXTRACTF128rr
836 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
838 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
839 (v4i32 (VEXTRACTI128rr
840 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
842 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
843 (v4f32 (VEXTRACTF128rr
844 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
846 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
847 (v8i16 (VEXTRACTI128rr
848 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
850 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
851 (v8f16 (VEXTRACTF128rr
852 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
854 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
855 (v16i8 (VEXTRACTI128rr
856 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
860 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
861 // smaller extract to enable EVEX->VEX.
862 let Predicates = [HasVLX] in {
863 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
864 (v2i64 (VEXTRACTI32x4Z256rr
865 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
867 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
868 (v2f64 (VEXTRACTF32x4Z256rr
869 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
871 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
872 (v4i32 (VEXTRACTI32x4Z256rr
873 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
875 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
876 (v4f32 (VEXTRACTF32x4Z256rr
877 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
879 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
880 (v8i16 (VEXTRACTI32x4Z256rr
881 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
883 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
884 (v8f16 (VEXTRACTF32x4Z256rr
885 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
887 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
888 (v16i8 (VEXTRACTI32x4Z256rr
889 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
894 // Additional patterns for handling a bitcast between the vselect and the
895 // extract_subvector.
896 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
897 X86VectorVTInfo To, X86VectorVTInfo Cast,
898 PatFrag vextract_extract,
899 SDNodeXForm EXTRACT_get_vextract_imm,
901 let Predicates = p in {
902 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
904 (To.VT (vextract_extract:$ext
905 (From.VT From.RC:$src), (iPTR imm)))),
907 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
908 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
909 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
911 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
913 (To.VT (vextract_extract:$ext
914 (From.VT From.RC:$src), (iPTR imm)))),
916 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
917 Cast.KRCWM:$mask, From.RC:$src,
918 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
922 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
923 v4f32x_info, vextract128_extract,
924 EXTRACT_get_vextract128_imm, [HasVLX]>;
925 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
926 v2f64x_info, vextract128_extract,
927 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
929 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
930 v4i32x_info, vextract128_extract,
931 EXTRACT_get_vextract128_imm, [HasVLX]>;
932 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
933 v4i32x_info, vextract128_extract,
934 EXTRACT_get_vextract128_imm, [HasVLX]>;
935 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936 v4i32x_info, vextract128_extract,
937 EXTRACT_get_vextract128_imm, [HasVLX]>;
938 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
939 v2i64x_info, vextract128_extract,
940 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
941 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
942 v2i64x_info, vextract128_extract,
943 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
944 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
945 v2i64x_info, vextract128_extract,
946 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
948 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
949 v4f32x_info, vextract128_extract,
950 EXTRACT_get_vextract128_imm, [HasAVX512]>;
951 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
952 v2f64x_info, vextract128_extract,
953 EXTRACT_get_vextract128_imm, [HasDQI]>;
955 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
956 v4i32x_info, vextract128_extract,
957 EXTRACT_get_vextract128_imm, [HasAVX512]>;
958 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
959 v4i32x_info, vextract128_extract,
960 EXTRACT_get_vextract128_imm, [HasAVX512]>;
961 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
962 v4i32x_info, vextract128_extract,
963 EXTRACT_get_vextract128_imm, [HasAVX512]>;
964 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
965 v2i64x_info, vextract128_extract,
966 EXTRACT_get_vextract128_imm, [HasDQI]>;
967 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
968 v2i64x_info, vextract128_extract,
969 EXTRACT_get_vextract128_imm, [HasDQI]>;
970 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
971 v2i64x_info, vextract128_extract,
972 EXTRACT_get_vextract128_imm, [HasDQI]>;
974 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
975 v8f32x_info, vextract256_extract,
976 EXTRACT_get_vextract256_imm, [HasDQI]>;
977 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
978 v4f64x_info, vextract256_extract,
979 EXTRACT_get_vextract256_imm, [HasAVX512]>;
981 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
982 v8i32x_info, vextract256_extract,
983 EXTRACT_get_vextract256_imm, [HasDQI]>;
984 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
985 v8i32x_info, vextract256_extract,
986 EXTRACT_get_vextract256_imm, [HasDQI]>;
987 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
988 v8i32x_info, vextract256_extract,
989 EXTRACT_get_vextract256_imm, [HasDQI]>;
990 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
991 v4i64x_info, vextract256_extract,
992 EXTRACT_get_vextract256_imm, [HasAVX512]>;
993 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
994 v4i64x_info, vextract256_extract,
995 EXTRACT_get_vextract256_imm, [HasAVX512]>;
996 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
997 v4i64x_info, vextract256_extract,
998 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1000 // vextractps - extract 32 bits from XMM
1001 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1002 (ins VR128X:$src1, u8imm:$src2),
1003 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1004 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1005 EVEX, WIG, Sched<[WriteVecExtract]>;
1007 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1008 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1009 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1010 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1012 EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1014 //===---------------------------------------------------------------------===//
1015 // AVX-512 BROADCAST
1017 // broadcast with a scalar argument.
1018 multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1019 X86VectorVTInfo SrcInfo> {
1020 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1021 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1022 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1023 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1024 (X86VBroadcast SrcInfo.FRC:$src),
1025 DestInfo.RC:$src0)),
1026 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1027 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1028 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1029 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1030 (X86VBroadcast SrcInfo.FRC:$src),
1031 DestInfo.ImmAllZerosV)),
1032 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1033 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1036 // Split version to allow mask and broadcast node to be different types. This
1037 // helps support the 32x2 broadcasts.
1038 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1039 SchedWrite SchedRR, SchedWrite SchedRM,
1040 X86VectorVTInfo MaskInfo,
1041 X86VectorVTInfo DestInfo,
1042 X86VectorVTInfo SrcInfo,
1043 bit IsConvertibleToThreeAddress,
1044 SDPatternOperator UnmaskedOp = X86VBroadcast,
1045 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1046 let hasSideEffects = 0 in
1047 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1048 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1049 [(set MaskInfo.RC:$dst,
1053 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1054 DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>;
1055 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1056 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1057 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1058 "${dst} {${mask}} {z}, $src}"),
1059 [(set MaskInfo.RC:$dst,
1060 (vselect_mask MaskInfo.KRCWM:$mask,
1064 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1065 MaskInfo.ImmAllZerosV))],
1066 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1067 let Constraints = "$src0 = $dst" in
1068 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1069 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1071 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1072 "${dst} {${mask}}, $src}"),
1073 [(set MaskInfo.RC:$dst,
1074 (vselect_mask MaskInfo.KRCWM:$mask,
1078 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1079 MaskInfo.RC:$src0))],
1080 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1082 let hasSideEffects = 0, mayLoad = 1, isReMaterializable = 1, canFoldAsLoad = 1 in
1083 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1084 (ins SrcInfo.ScalarMemOp:$src),
1085 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1086 [(set MaskInfo.RC:$dst,
1090 (UnmaskedBcastOp addr:$src)))))],
1091 DestInfo.ExeDomain>, T8, PD, EVEX,
1092 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1094 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1095 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1096 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1097 "${dst} {${mask}} {z}, $src}"),
1098 [(set MaskInfo.RC:$dst,
1099 (vselect_mask MaskInfo.KRCWM:$mask,
1103 (SrcInfo.BroadcastLdFrag addr:$src)))),
1104 MaskInfo.ImmAllZerosV))],
1105 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ,
1106 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1108 let Constraints = "$src0 = $dst",
1109 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1110 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1111 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1112 SrcInfo.ScalarMemOp:$src),
1113 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1114 "${dst} {${mask}}, $src}"),
1115 [(set MaskInfo.RC:$dst,
1116 (vselect_mask MaskInfo.KRCWM:$mask,
1120 (SrcInfo.BroadcastLdFrag addr:$src)))),
1121 MaskInfo.RC:$src0))],
1122 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K,
1123 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1126 // Helper class to force mask and broadcast result to same type.
1127 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1128 SchedWrite SchedRR, SchedWrite SchedRM,
1129 X86VectorVTInfo DestInfo,
1130 X86VectorVTInfo SrcInfo,
1131 bit IsConvertibleToThreeAddress> :
1132 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1133 DestInfo, DestInfo, SrcInfo,
1134 IsConvertibleToThreeAddress>;
1136 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1137 AVX512VLVectorVTInfo _> {
1138 let Predicates = [HasAVX512] in {
1139 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1140 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1141 avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1145 let Predicates = [HasVLX] in {
1146 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1147 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1148 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1153 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1154 AVX512VLVectorVTInfo _> {
1155 let Predicates = [HasAVX512] in {
1156 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1157 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1158 avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1162 let Predicates = [HasVLX] in {
1163 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1164 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1165 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1167 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1168 WriteFShuffle256Ld, _.info128, _.info128, 1>,
1169 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1173 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1175 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1176 avx512vl_f64_info>, REX_W;
1178 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1179 X86VectorVTInfo _, SDPatternOperator OpNode,
1180 RegisterClass SrcRC> {
1181 // Fold with a mask even if it has multiple uses since it is cheap.
1182 let ExeDomain = _.ExeDomain in
1183 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1185 "vpbroadcast"#_.Suffix, "$src", "$src",
1186 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1187 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1188 T8, PD, EVEX, Sched<[SchedRR]>;
1191 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1192 X86VectorVTInfo _, SDPatternOperator OpNode,
1193 RegisterClass SrcRC, SubRegIndex Subreg> {
1194 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1195 defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1196 (outs _.RC:$dst), (ins GR32:$src),
1197 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1198 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1199 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1200 "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>;
1202 def : Pat <(_.VT (OpNode SrcRC:$src)),
1203 (!cast<Instruction>(Name#rr)
1204 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1206 // Fold with a mask even if it has multiple uses since it is cheap.
1207 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1208 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1209 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1211 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1212 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1213 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1216 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1217 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1218 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1219 let Predicates = [prd] in
1220 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1221 OpNode, SrcRC, Subreg>, EVEX_V512;
1222 let Predicates = [prd, HasVLX] in {
1223 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1224 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1225 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1226 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1230 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1231 SDPatternOperator OpNode,
1232 RegisterClass SrcRC, Predicate prd> {
1233 let Predicates = [prd] in
1234 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1236 let Predicates = [prd, HasVLX] in {
1237 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1239 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1244 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1245 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1246 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1247 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1249 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1250 X86VBroadcast, GR32, HasAVX512>;
1251 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1252 X86VBroadcast, GR64, HasAVX512>, REX_W;
1254 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1255 AVX512VLVectorVTInfo _, Predicate prd,
1256 bit IsConvertibleToThreeAddress> {
1257 let Predicates = [prd] in {
1258 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1259 WriteShuffle256Ld, _.info512, _.info128,
1260 IsConvertibleToThreeAddress>,
1263 let Predicates = [prd, HasVLX] in {
1264 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1265 WriteShuffle256Ld, _.info256, _.info128,
1266 IsConvertibleToThreeAddress>,
1268 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1269 WriteShuffleXLd, _.info128, _.info128,
1270 IsConvertibleToThreeAddress>,
1275 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1276 avx512vl_i8_info, HasBWI, 0>;
1277 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1278 avx512vl_i16_info, HasBWI, 0>;
1279 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1280 avx512vl_i32_info, HasAVX512, 1>;
1281 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1282 avx512vl_i64_info, HasAVX512, 1>, REX_W;
1284 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1285 SDPatternOperator OpNode,
1286 X86VectorVTInfo _Dst,
1287 X86VectorVTInfo _Src> {
1288 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1289 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1290 (_Dst.VT (OpNode addr:$src))>,
1291 Sched<[SchedWriteShuffle.YMM.Folded]>,
1295 // This should be used for the AVX512DQ broadcast instructions. It disables
1296 // the unmasked patterns so that we only use the DQ instructions when masking
1298 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1299 SDPatternOperator OpNode,
1300 X86VectorVTInfo _Dst,
1301 X86VectorVTInfo _Src> {
1302 let hasSideEffects = 0, mayLoad = 1 in
1303 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1304 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1306 (_Dst.VT (OpNode addr:$src))>,
1307 Sched<[SchedWriteShuffle.YMM.Folded]>,
1310 let Predicates = [HasBWI] in {
1311 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1312 (VPBROADCASTWZrm addr:$src)>;
1314 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1315 (VPBROADCASTWZrr VR128X:$src)>;
1316 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1317 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1319 let Predicates = [HasVLX, HasBWI] in {
1320 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1321 (VPBROADCASTWZ128rm addr:$src)>;
1322 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1323 (VPBROADCASTWZ256rm addr:$src)>;
1325 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1326 (VPBROADCASTWZ128rr VR128X:$src)>;
1327 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1328 (VPBROADCASTWZ256rr VR128X:$src)>;
1330 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1331 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1332 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1333 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1336 //===----------------------------------------------------------------------===//
1337 // AVX-512 BROADCAST SUBVECTORS
1340 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1341 X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1342 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1343 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1344 X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1345 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1346 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1347 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
1348 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1349 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1350 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
1351 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1353 let Predicates = [HasAVX512] in {
1354 def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1355 (VBROADCASTF64X4rm addr:$src)>;
1356 def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1357 (VBROADCASTF64X4rm addr:$src)>;
1358 def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1359 (VBROADCASTF64X4rm addr:$src)>;
1360 def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1361 (VBROADCASTI64X4rm addr:$src)>;
1362 def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1363 (VBROADCASTI64X4rm addr:$src)>;
1364 def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1365 (VBROADCASTI64X4rm addr:$src)>;
1366 def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1367 (VBROADCASTI64X4rm addr:$src)>;
1369 def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1370 (VBROADCASTF32X4rm addr:$src)>;
1371 def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1372 (VBROADCASTF32X4rm addr:$src)>;
1373 def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1374 (VBROADCASTF32X4rm addr:$src)>;
1375 def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1376 (VBROADCASTI32X4rm addr:$src)>;
1377 def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1378 (VBROADCASTI32X4rm addr:$src)>;
1379 def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1380 (VBROADCASTI32X4rm addr:$src)>;
1381 def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1382 (VBROADCASTI32X4rm addr:$src)>;
1384 // Patterns for selects of bitcasted operations.
1385 def : Pat<(vselect_mask VK16WM:$mask,
1386 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1387 (v16f32 immAllZerosV)),
1388 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1389 def : Pat<(vselect_mask VK16WM:$mask,
1390 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1392 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1393 def : Pat<(vselect_mask VK16WM:$mask,
1394 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1395 (v16i32 immAllZerosV)),
1396 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1397 def : Pat<(vselect_mask VK16WM:$mask,
1398 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1400 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1402 def : Pat<(vselect_mask VK8WM:$mask,
1403 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1404 (v8f64 immAllZerosV)),
1405 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1406 def : Pat<(vselect_mask VK8WM:$mask,
1407 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1409 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1410 def : Pat<(vselect_mask VK8WM:$mask,
1411 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1412 (v8i64 immAllZerosV)),
1413 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1414 def : Pat<(vselect_mask VK8WM:$mask,
1415 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1417 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1420 let Predicates = [HasVLX] in {
1421 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1422 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1423 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1424 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1425 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1426 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1428 def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1429 (VBROADCASTF32X4Z256rm addr:$src)>;
1430 def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1431 (VBROADCASTF32X4Z256rm addr:$src)>;
1432 def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1433 (VBROADCASTF32X4Z256rm addr:$src)>;
1434 def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1435 (VBROADCASTI32X4Z256rm addr:$src)>;
1436 def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1437 (VBROADCASTI32X4Z256rm addr:$src)>;
1438 def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1439 (VBROADCASTI32X4Z256rm addr:$src)>;
1440 def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1441 (VBROADCASTI32X4Z256rm addr:$src)>;
1443 // Patterns for selects of bitcasted operations.
1444 def : Pat<(vselect_mask VK8WM:$mask,
1445 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1446 (v8f32 immAllZerosV)),
1447 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1448 def : Pat<(vselect_mask VK8WM:$mask,
1449 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1451 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1452 def : Pat<(vselect_mask VK8WM:$mask,
1453 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1454 (v8i32 immAllZerosV)),
1455 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1456 def : Pat<(vselect_mask VK8WM:$mask,
1457 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1459 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1462 let Predicates = [HasBF16] in {
1463 def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
1464 (VBROADCASTF64X4rm addr:$src)>;
1465 def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
1466 (VBROADCASTF32X4rm addr:$src)>;
1469 let Predicates = [HasBF16, HasVLX] in
1470 def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
1471 (VBROADCASTF32X4Z256rm addr:$src)>;
1473 let Predicates = [HasVLX, HasDQI] in {
1474 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1475 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>,
1476 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1477 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1478 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>,
1479 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1481 // Patterns for selects of bitcasted operations.
1482 def : Pat<(vselect_mask VK4WM:$mask,
1483 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1484 (v4f64 immAllZerosV)),
1485 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1486 def : Pat<(vselect_mask VK4WM:$mask,
1487 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1489 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1490 def : Pat<(vselect_mask VK4WM:$mask,
1491 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1492 (v4i64 immAllZerosV)),
1493 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1494 def : Pat<(vselect_mask VK4WM:$mask,
1495 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1497 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1500 let Predicates = [HasDQI] in {
1501 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1502 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
1503 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1504 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1505 X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1506 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1507 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1508 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
1509 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1510 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1511 X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1512 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1514 // Patterns for selects of bitcasted operations.
1515 def : Pat<(vselect_mask VK16WM:$mask,
1516 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1517 (v16f32 immAllZerosV)),
1518 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1519 def : Pat<(vselect_mask VK16WM:$mask,
1520 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1522 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1523 def : Pat<(vselect_mask VK16WM:$mask,
1524 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1525 (v16i32 immAllZerosV)),
1526 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1527 def : Pat<(vselect_mask VK16WM:$mask,
1528 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1530 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1532 def : Pat<(vselect_mask VK8WM:$mask,
1533 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1534 (v8f64 immAllZerosV)),
1535 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1536 def : Pat<(vselect_mask VK8WM:$mask,
1537 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1539 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1540 def : Pat<(vselect_mask VK8WM:$mask,
1541 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1542 (v8i64 immAllZerosV)),
1543 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1544 def : Pat<(vselect_mask VK8WM:$mask,
1545 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1547 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1550 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1551 AVX512VLVectorVTInfo _Dst,
1552 AVX512VLVectorVTInfo _Src> {
1553 let Predicates = [HasDQI] in
1554 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1555 WriteShuffle256Ld, _Dst.info512,
1556 _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1558 let Predicates = [HasDQI, HasVLX] in
1559 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1560 WriteShuffle256Ld, _Dst.info256,
1561 _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1565 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1566 AVX512VLVectorVTInfo _Dst,
1567 AVX512VLVectorVTInfo _Src> :
1568 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1570 let Predicates = [HasDQI, HasVLX] in
1571 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1572 WriteShuffleXLd, _Dst.info128,
1573 _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1577 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1578 avx512vl_i32_info, avx512vl_i64_info>;
1579 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1580 avx512vl_f32_info, avx512vl_f64_info>;
1582 //===----------------------------------------------------------------------===//
1583 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1585 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1586 X86VectorVTInfo _, RegisterClass KRC> {
1587 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1588 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1589 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1590 EVEX, Sched<[WriteShuffle]>;
1593 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1594 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1595 let Predicates = [HasCDI] in
1596 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1597 let Predicates = [HasCDI, HasVLX] in {
1598 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1599 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1603 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1604 avx512vl_i32_info, VK16>;
1605 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1606 avx512vl_i64_info, VK8>, REX_W;
1608 //===----------------------------------------------------------------------===//
1609 // -- VPERMI2 - 3 source operands form --
1610 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1611 X86FoldableSchedWrite sched,
1612 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1613 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1614 hasSideEffects = 0 in {
1615 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1616 (ins _.RC:$src2, _.RC:$src3),
1617 OpcodeStr, "$src3, $src2", "$src2, $src3",
1618 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1619 EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1622 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1623 (ins _.RC:$src2, _.MemOp:$src3),
1624 OpcodeStr, "$src3, $src2", "$src2, $src3",
1625 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1626 (_.VT (_.LdFrag addr:$src3)))), 1>,
1627 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1631 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1632 X86FoldableSchedWrite sched,
1633 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1634 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1635 hasSideEffects = 0, mayLoad = 1 in
1636 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1637 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1638 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1639 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1640 (_.VT (X86VPermt2 _.RC:$src2,
1641 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1642 AVX5128IBase, EVEX, VVVV, EVEX_B,
1643 Sched<[sched.Folded, sched.ReadAfterFold]>;
1646 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1647 X86FoldableSchedWrite sched,
1648 AVX512VLVectorVTInfo VTInfo,
1649 AVX512VLVectorVTInfo ShuffleMask> {
1650 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1651 ShuffleMask.info512>,
1652 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1653 ShuffleMask.info512>, EVEX_V512;
1654 let Predicates = [HasVLX] in {
1655 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1656 ShuffleMask.info128>,
1657 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1658 ShuffleMask.info128>, EVEX_V128;
1659 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1660 ShuffleMask.info256>,
1661 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1662 ShuffleMask.info256>, EVEX_V256;
1666 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1667 X86FoldableSchedWrite sched,
1668 AVX512VLVectorVTInfo VTInfo,
1669 AVX512VLVectorVTInfo Idx,
1671 let Predicates = [Prd] in
1672 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1673 Idx.info512>, EVEX_V512;
1674 let Predicates = [Prd, HasVLX] in {
1675 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1676 Idx.info128>, EVEX_V128;
1677 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1678 Idx.info256>, EVEX_V256;
1682 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1683 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1684 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1685 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1686 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1687 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1688 REX_W, EVEX_CD8<16, CD8VF>;
1689 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1690 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1692 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1693 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1694 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1695 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1697 // Extra patterns to deal with extra bitcasts due to passthru and index being
1698 // different types on the fp versions.
1699 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1700 X86VectorVTInfo IdxVT,
1701 X86VectorVTInfo CastVT> {
1702 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1703 (X86VPermt2 (_.VT _.RC:$src2),
1704 (IdxVT.VT (bitconvert
1705 (CastVT.VT _.RC:$src1))),
1707 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1708 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1709 _.RC:$src2, _.RC:$src3)>;
1710 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1711 (X86VPermt2 _.RC:$src2,
1712 (IdxVT.VT (bitconvert
1713 (CastVT.VT _.RC:$src1))),
1714 (_.LdFrag addr:$src3)),
1715 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1716 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1717 _.RC:$src2, addr:$src3)>;
1718 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1719 (X86VPermt2 _.RC:$src2,
1720 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1721 (_.BroadcastLdFrag addr:$src3)),
1722 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1723 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1724 _.RC:$src2, addr:$src3)>;
1727 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1728 defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>;
1729 defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>;
1730 defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>;
1733 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1734 X86FoldableSchedWrite sched,
1735 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1736 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1737 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1738 (ins IdxVT.RC:$src2, _.RC:$src3),
1739 OpcodeStr, "$src3, $src2", "$src2, $src3",
1740 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1741 EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1743 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1744 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1745 OpcodeStr, "$src3, $src2", "$src2, $src3",
1746 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1747 (_.LdFrag addr:$src3))), 1>,
1748 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1751 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1752 X86FoldableSchedWrite sched,
1753 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1754 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1755 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1756 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1757 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1758 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1759 (_.VT (X86VPermt2 _.RC:$src1,
1760 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1761 AVX5128IBase, EVEX, VVVV, EVEX_B,
1762 Sched<[sched.Folded, sched.ReadAfterFold]>;
1765 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1766 X86FoldableSchedWrite sched,
1767 AVX512VLVectorVTInfo VTInfo,
1768 AVX512VLVectorVTInfo ShuffleMask> {
1769 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1770 ShuffleMask.info512>,
1771 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1772 ShuffleMask.info512>, EVEX_V512;
1773 let Predicates = [HasVLX] in {
1774 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1775 ShuffleMask.info128>,
1776 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1777 ShuffleMask.info128>, EVEX_V128;
1778 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1779 ShuffleMask.info256>,
1780 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1781 ShuffleMask.info256>, EVEX_V256;
1785 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1786 X86FoldableSchedWrite sched,
1787 AVX512VLVectorVTInfo VTInfo,
1788 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1789 let Predicates = [Prd] in
1790 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1791 Idx.info512>, EVEX_V512;
1792 let Predicates = [Prd, HasVLX] in {
1793 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1794 Idx.info128>, EVEX_V128;
1795 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1796 Idx.info256>, EVEX_V256;
1800 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1801 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1802 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1803 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1804 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1805 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1806 REX_W, EVEX_CD8<16, CD8VF>;
1807 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1808 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1810 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1811 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1812 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1813 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1815 //===----------------------------------------------------------------------===//
1816 // AVX-512 - BLEND using mask
1819 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1820 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1821 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1822 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1823 (ins _.RC:$src1, _.RC:$src2),
1824 !strconcat(OpcodeStr,
1825 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1826 EVEX, VVVV, Sched<[sched]>;
1827 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1828 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1829 !strconcat(OpcodeStr,
1830 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1831 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
1832 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1833 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1834 !strconcat(OpcodeStr,
1835 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1836 []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>;
1837 let mayLoad = 1 in {
1838 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1839 (ins _.RC:$src1, _.MemOp:$src2),
1840 !strconcat(OpcodeStr,
1841 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1842 []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
1843 Sched<[sched.Folded, sched.ReadAfterFold]>;
1844 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1845 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1846 !strconcat(OpcodeStr,
1847 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1848 []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1849 Sched<[sched.Folded, sched.ReadAfterFold]>;
1850 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1851 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1852 !strconcat(OpcodeStr,
1853 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1854 []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1855 Sched<[sched.Folded, sched.ReadAfterFold]>;
1859 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1860 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1861 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1862 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1863 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1864 !strconcat(OpcodeStr,
1865 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1866 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1867 EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1868 Sched<[sched.Folded, sched.ReadAfterFold]>;
1870 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1871 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1872 !strconcat(OpcodeStr,
1873 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1874 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1875 EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1876 Sched<[sched.Folded, sched.ReadAfterFold]>;
1878 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1879 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1880 !strconcat(OpcodeStr,
1881 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1882 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1883 EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1884 Sched<[sched.Folded, sched.ReadAfterFold]>;
1888 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1889 AVX512VLVectorVTInfo VTInfo> {
1890 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1891 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1894 let Predicates = [HasVLX] in {
1895 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1896 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1898 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1899 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1904 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1905 AVX512VLVectorVTInfo VTInfo> {
1906 let Predicates = [HasBWI] in
1907 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1910 let Predicates = [HasBWI, HasVLX] in {
1911 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1913 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1918 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
1920 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
1921 avx512vl_f64_info>, REX_W;
1922 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
1924 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
1925 avx512vl_i64_info>, REX_W;
1926 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
1928 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
1929 avx512vl_i16_info>, REX_W;
1931 //===----------------------------------------------------------------------===//
1932 // Compare Instructions
1933 //===----------------------------------------------------------------------===//
1935 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1937 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
1938 PatFrag OpNode_su, PatFrag OpNodeSAE_su,
1939 X86FoldableSchedWrite sched> {
1940 defm rri_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1942 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1944 "$cc, $src2, $src1", "$src1, $src2, $cc",
1945 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1946 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc)>,
1947 EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1949 defm rmi_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1951 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
1953 "$cc, $src2, $src1", "$src1, $src2, $cc",
1954 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1956 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1957 timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1958 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1960 let Uses = [MXCSR] in
1961 defm rrib_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1963 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1965 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
1966 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1968 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1970 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
1972 let isCodeGenOnly = 1 in {
1973 let isCommutable = 1 in
1974 def rri : AVX512Ii8<0xC2, MRMSrcReg,
1975 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
1976 !strconcat("vcmp", _.Suffix,
1977 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1978 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1981 EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1982 def rmi : AVX512Ii8<0xC2, MRMSrcMem,
1984 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1985 !strconcat("vcmp", _.Suffix,
1986 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1987 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1988 (_.ScalarLdFrag addr:$src2),
1990 EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1991 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1995 let Predicates = [HasAVX512] in {
1996 let ExeDomain = SSEPackedSingle in
1997 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
1998 X86cmpms_su, X86cmpmsSAE_su,
1999 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2000 let ExeDomain = SSEPackedDouble in
2001 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2002 X86cmpms_su, X86cmpmsSAE_su,
2003 SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
2005 let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2006 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2007 X86cmpms_su, X86cmpmsSAE_su,
2008 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2010 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2011 X86FoldableSchedWrite sched,
2012 X86VectorVTInfo _, bit IsCommutable> {
2013 let isCommutable = IsCommutable, hasSideEffects = 0 in
2014 def rr : AVX512BI<opc, MRMSrcReg,
2015 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2016 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2017 []>, EVEX, VVVV, Sched<[sched]>;
2018 let mayLoad = 1, hasSideEffects = 0 in
2019 def rm : AVX512BI<opc, MRMSrcMem,
2020 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2021 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2022 []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2023 let isCommutable = IsCommutable, hasSideEffects = 0 in
2024 def rrk : AVX512BI<opc, MRMSrcReg,
2025 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2026 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2027 "$dst {${mask}}, $src1, $src2}"),
2028 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
2029 let mayLoad = 1, hasSideEffects = 0 in
2030 def rmk : AVX512BI<opc, MRMSrcMem,
2031 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2032 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2033 "$dst {${mask}}, $src1, $src2}"),
2034 []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2037 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2038 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2040 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2041 let mayLoad = 1, hasSideEffects = 0 in {
2042 def rmb : AVX512BI<opc, MRMSrcMem,
2043 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2044 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2045 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2046 []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2047 def rmbk : AVX512BI<opc, MRMSrcMem,
2048 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2049 _.ScalarMemOp:$src2),
2050 !strconcat(OpcodeStr,
2051 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2052 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2053 []>, EVEX, VVVV, EVEX_K, EVEX_B,
2054 Sched<[sched.Folded, sched.ReadAfterFold]>;
2058 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2059 X86SchedWriteWidths sched,
2060 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2061 bit IsCommutable = 0> {
2062 let Predicates = [prd] in
2063 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2064 VTInfo.info512, IsCommutable>, EVEX_V512;
2066 let Predicates = [prd, HasVLX] in {
2067 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2068 VTInfo.info256, IsCommutable>, EVEX_V256;
2069 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2070 VTInfo.info128, IsCommutable>, EVEX_V128;
2074 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2075 X86SchedWriteWidths sched,
2076 AVX512VLVectorVTInfo VTInfo,
2077 Predicate prd, bit IsCommutable = 0> {
2078 let Predicates = [prd] in
2079 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2080 VTInfo.info512, IsCommutable>, EVEX_V512;
2082 let Predicates = [prd, HasVLX] in {
2083 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2084 VTInfo.info256, IsCommutable>, EVEX_V256;
2085 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2086 VTInfo.info128, IsCommutable>, EVEX_V128;
2090 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2091 // increase the pattern complexity the way an immediate would.
2092 let AddedComplexity = 2 in {
2093 // FIXME: Is there a better scheduler class for VPCMP?
2094 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2095 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2096 EVEX_CD8<8, CD8VF>, WIG;
2098 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2099 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2100 EVEX_CD8<16, CD8VF>, WIG;
2102 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2103 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2104 EVEX_CD8<32, CD8VF>;
2106 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2107 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2108 T8, REX_W, EVEX_CD8<64, CD8VF>;
2110 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2111 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2112 EVEX_CD8<8, CD8VF>, WIG;
2114 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2115 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2116 EVEX_CD8<16, CD8VF>, WIG;
2118 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2119 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2120 EVEX_CD8<32, CD8VF>;
2122 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2123 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2124 T8, REX_W, EVEX_CD8<64, CD8VF>;
2127 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2129 X86FoldableSchedWrite sched,
2130 X86VectorVTInfo _, string Name> {
2131 let isCommutable = 1 in
2132 def rri : AVX512AIi8<opc, MRMSrcReg,
2133 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2134 !strconcat("vpcmp", Suffix,
2135 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2136 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2139 EVEX, VVVV, Sched<[sched]>;
2140 def rmi : AVX512AIi8<opc, MRMSrcMem,
2141 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2142 !strconcat("vpcmp", Suffix,
2143 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2144 [(set _.KRC:$dst, (_.KVT
2147 (_.VT (_.LdFrag addr:$src2)),
2149 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2150 let isCommutable = 1 in
2151 def rrik : AVX512AIi8<opc, MRMSrcReg,
2152 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2154 !strconcat("vpcmp", Suffix,
2155 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2156 "$dst {${mask}}, $src1, $src2, $cc}"),
2157 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2158 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2161 EVEX, VVVV, EVEX_K, Sched<[sched]>;
2162 def rmik : AVX512AIi8<opc, MRMSrcMem,
2163 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2165 !strconcat("vpcmp", Suffix,
2166 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2167 "$dst {${mask}}, $src1, $src2, $cc}"),
2168 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2172 (_.VT (_.LdFrag addr:$src2)),
2174 EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2176 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2177 (_.VT _.RC:$src1), cond)),
2178 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2179 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2181 def : Pat<(and _.KRCWM:$mask,
2182 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2183 (_.VT _.RC:$src1), cond))),
2184 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2185 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2186 (X86pcmpm_imm_commute $cc))>;
2189 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2190 PatFrag Frag_su, X86FoldableSchedWrite sched,
2191 X86VectorVTInfo _, string Name> :
2192 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2193 def rmib : AVX512AIi8<opc, MRMSrcMem,
2194 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2196 !strconcat("vpcmp", Suffix,
2197 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2198 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2199 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2201 (_.BroadcastLdFrag addr:$src2),
2203 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2204 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2205 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2206 _.ScalarMemOp:$src2, u8imm:$cc),
2207 !strconcat("vpcmp", Suffix,
2208 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2209 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2210 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2213 (_.BroadcastLdFrag addr:$src2),
2215 EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2217 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2218 (_.VT _.RC:$src1), cond)),
2219 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2220 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2222 def : Pat<(and _.KRCWM:$mask,
2223 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2224 (_.VT _.RC:$src1), cond))),
2225 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2226 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2227 (X86pcmpm_imm_commute $cc))>;
2230 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2231 PatFrag Frag_su, X86SchedWriteWidths sched,
2232 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2233 let Predicates = [prd] in
2234 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2235 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2237 let Predicates = [prd, HasVLX] in {
2238 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2239 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2240 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2241 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2245 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2246 PatFrag Frag_su, X86SchedWriteWidths sched,
2247 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2248 let Predicates = [prd] in
2249 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2250 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2252 let Predicates = [prd, HasVLX] in {
2253 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2254 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2255 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2256 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2260 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2261 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2262 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2264 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2265 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2268 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2269 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2270 REX_W, EVEX_CD8<16, CD8VF>;
2271 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2272 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2273 REX_W, EVEX_CD8<16, CD8VF>;
2275 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2276 SchedWriteVecALU, avx512vl_i32_info,
2277 HasAVX512>, EVEX_CD8<32, CD8VF>;
2278 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2279 SchedWriteVecALU, avx512vl_i32_info,
2280 HasAVX512>, EVEX_CD8<32, CD8VF>;
2282 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2283 SchedWriteVecALU, avx512vl_i64_info,
2284 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2285 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2286 SchedWriteVecALU, avx512vl_i64_info,
2287 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2289 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2291 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2292 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2293 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2295 "$cc, $src2, $src1", "$src1, $src2, $cc",
2296 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2297 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2300 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2301 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2303 "$cc, $src2, $src1", "$src1, $src2, $cc",
2304 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2306 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2308 Sched<[sched.Folded, sched.ReadAfterFold]>;
2310 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2312 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2314 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2315 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2316 (X86any_cmpm (_.VT _.RC:$src1),
2317 (_.VT (_.BroadcastLdFrag addr:$src2)),
2319 (X86cmpm_su (_.VT _.RC:$src1),
2320 (_.VT (_.BroadcastLdFrag addr:$src2)),
2322 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2325 // Patterns for selecting with loads in other operand.
2326 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2328 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2329 (X86cmpm_imm_commute timm:$cc))>;
2331 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2334 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2335 _.RC:$src1, addr:$src2,
2336 (X86cmpm_imm_commute timm:$cc))>;
2338 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2339 (_.VT _.RC:$src1), timm:$cc),
2340 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2341 (X86cmpm_imm_commute timm:$cc))>;
2343 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2346 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2347 _.RC:$src1, addr:$src2,
2348 (X86cmpm_imm_commute timm:$cc))>;
2350 // Patterns for mask intrinsics.
2351 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2352 (_.KVT immAllOnesV)),
2353 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2355 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2356 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2357 _.RC:$src2, timm:$cc)>;
2359 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2360 (_.KVT immAllOnesV)),
2361 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2363 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2365 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2366 addr:$src2, timm:$cc)>;
2368 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2369 (_.KVT immAllOnesV)),
2370 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2372 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2374 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2375 addr:$src2, timm:$cc)>;
2377 // Patterns for mask intrinsics with loads in other operand.
2378 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2379 (_.KVT immAllOnesV)),
2380 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2381 (X86cmpm_imm_commute timm:$cc))>;
2383 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2385 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2386 _.RC:$src1, addr:$src2,
2387 (X86cmpm_imm_commute timm:$cc))>;
2389 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2390 (_.KVT immAllOnesV)),
2391 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2392 (X86cmpm_imm_commute timm:$cc))>;
2394 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2396 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2397 _.RC:$src1, addr:$src2,
2398 (X86cmpm_imm_commute timm:$cc))>;
2401 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2402 // comparison code form (VCMP[EQ/LT/LE/...]
2403 let Uses = [MXCSR] in
2404 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2405 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2406 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2408 "$cc, {sae}, $src2, $src1",
2409 "$src1, $src2, {sae}, $cc",
2410 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2411 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2412 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2413 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2414 EVEX_B, Sched<[sched]>;
2417 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2418 Predicate Pred = HasAVX512> {
2419 let Predicates = [Pred] in {
2420 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2421 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2424 let Predicates = [Pred,HasVLX] in {
2425 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2426 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2430 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2431 AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
2432 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2433 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
2434 defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2435 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
2437 // Patterns to select fp compares with load as first operand.
2438 let Predicates = [HasAVX512] in {
2439 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2440 (VCMPSDZrmi FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2442 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2443 (VCMPSSZrmi FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2446 let Predicates = [HasFP16] in {
2447 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2448 (VCMPSHZrmi FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2451 // ----------------------------------------------------------------
2454 //handle fpclass instruction mask = op(reg_scalar,imm)
2455 // op(mem_scalar,imm)
2456 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2457 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2459 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2460 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2461 (ins _.RC:$src1, i32u8imm:$src2),
2462 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2463 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2464 (i32 timm:$src2)))]>,
2466 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2467 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2469 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2470 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2471 (X86Vfpclasss_su (_.VT _.RC:$src1),
2472 (i32 timm:$src2))))]>,
2473 EVEX_K, Sched<[sched]>;
2474 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2475 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2477 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2479 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2480 (i32 timm:$src2)))]>,
2481 Sched<[sched.Folded, sched.ReadAfterFold]>;
2482 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2483 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2485 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2486 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2487 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2488 (i32 timm:$src2))))]>,
2489 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2493 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2494 // fpclass(reg_vec, mem_vec, imm)
2495 // fpclass(reg_vec, broadcast(eltVt), imm)
2496 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2497 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2499 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2500 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2501 (ins _.RC:$src1, i32u8imm:$src2),
2502 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2503 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2504 (i32 timm:$src2)))]>,
2506 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2507 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2509 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2510 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2511 (X86Vfpclass_su (_.VT _.RC:$src1),
2512 (i32 timm:$src2))))]>,
2513 EVEX_K, Sched<[sched]>;
2514 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2515 (ins _.MemOp:$src1, i32u8imm:$src2),
2516 OpcodeStr#_.Suffix#"{"#mem#"}"#
2517 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2518 [(set _.KRC:$dst,(X86Vfpclass
2519 (_.VT (_.LdFrag addr:$src1)),
2520 (i32 timm:$src2)))]>,
2521 Sched<[sched.Folded, sched.ReadAfterFold]>;
2522 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2523 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2524 OpcodeStr#_.Suffix#"{"#mem#"}"#
2525 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2526 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2527 (_.VT (_.LdFrag addr:$src1)),
2528 (i32 timm:$src2))))]>,
2529 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2530 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2531 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2532 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2533 _.BroadcastStr#", $dst|$dst, ${src1}"
2534 #_.BroadcastStr#", $src2}",
2535 [(set _.KRC:$dst,(X86Vfpclass
2536 (_.VT (_.BroadcastLdFrag addr:$src1)),
2537 (i32 timm:$src2)))]>,
2538 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2539 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2540 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2541 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2542 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2543 _.BroadcastStr#", $src2}",
2544 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2545 (_.VT (_.BroadcastLdFrag addr:$src1)),
2546 (i32 timm:$src2))))]>,
2547 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2550 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2552 def : InstAlias<OpcodeStr#_.Suffix#mem#
2553 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2554 (!cast<Instruction>(NAME#"rr")
2555 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2556 def : InstAlias<OpcodeStr#_.Suffix#mem#
2557 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2558 (!cast<Instruction>(NAME#"rrk")
2559 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2560 def : InstAlias<OpcodeStr#_.Suffix#mem#
2561 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2562 _.BroadcastStr#", $src2}",
2563 (!cast<Instruction>(NAME#"rmb")
2564 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2565 def : InstAlias<OpcodeStr#_.Suffix#mem#
2566 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2567 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2568 (!cast<Instruction>(NAME#"rmbk")
2569 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2572 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2573 bits<8> opc, X86SchedWriteWidths sched,
2575 let Predicates = [prd] in {
2576 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2577 _.info512, "z">, EVEX_V512;
2579 let Predicates = [prd, HasVLX] in {
2580 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2581 _.info128, "x">, EVEX_V128;
2582 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2583 _.info256, "y">, EVEX_V256;
2587 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2588 bits<8> opcScalar, X86SchedWriteWidths sched> {
2589 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec,
2591 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2592 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2593 sched.Scl, f16x_info, HasFP16>,
2594 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2595 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2597 EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2598 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2600 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
2601 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2602 sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2603 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2604 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2605 sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2606 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
2609 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2611 //-----------------------------------------------------------------
2612 // Mask register copy, including
2613 // - copy between mask registers
2614 // - load/store mask registers
2615 // - copy from GPR to mask register and vice versa
2617 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2618 string OpcodeStr, RegisterClass KRC, ValueType vvt,
2619 X86MemOperand x86memop, string Suffix = ""> {
2620 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
2621 explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in
2622 def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2623 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2625 def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2626 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2627 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2628 Sched<[WriteLoad]>, NoCD8;
2629 def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2630 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2631 [(store KRC:$src, addr:$dst)]>,
2632 Sched<[WriteStore]>, NoCD8;
2635 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2636 string OpcodeStr, RegisterClass KRC,
2637 RegisterClass GRC, string Suffix = ""> {
2638 let hasSideEffects = 0 in {
2639 def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2640 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2642 def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2643 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2648 let Predicates = [HasDQI, NoEGPR] in
2649 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2650 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2652 let Predicates = [HasDQI, HasEGPR, In64BitMode] in
2653 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
2654 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
2657 let Predicates = [HasAVX512, NoEGPR] in
2658 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2659 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2661 let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
2662 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
2663 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
2666 let Predicates = [HasBWI, NoEGPR] in {
2667 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2669 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2671 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2673 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2676 let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
2677 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
2678 EVEX, TB, PD, REX_W;
2679 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
2681 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
2683 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
2684 EVEX, TB, XD, REX_W;
2687 // GR from/to mask register
2688 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2689 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2690 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2691 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2692 def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2693 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2695 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2696 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2697 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2698 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2700 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2701 (KMOVWrk VK16:$src)>;
2702 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2703 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2704 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2705 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2706 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2707 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2709 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2710 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2711 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2712 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2713 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2714 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2715 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2716 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2718 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2719 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2720 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2721 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2722 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2723 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2724 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2725 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2728 let Predicates = [HasDQI] in {
2729 def : Pat<(v1i1 (load addr:$src)),
2730 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2731 def : Pat<(v2i1 (load addr:$src)),
2732 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2733 def : Pat<(v4i1 (load addr:$src)),
2734 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2737 let Predicates = [HasAVX512] in {
2738 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2739 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2740 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2741 (KMOVWkm addr:$src)>;
2744 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2745 SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2746 SDTCVecEltisVT<1, i1>,
2749 let Predicates = [HasAVX512] in {
2750 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2751 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2752 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2754 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2755 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2757 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2758 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2760 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2761 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2764 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2765 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2766 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2767 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2768 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2769 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2770 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2772 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2773 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2775 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2779 // Mask unary operation
2781 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2782 RegisterClass KRC, SDPatternOperator OpNode,
2783 X86FoldableSchedWrite sched, Predicate prd> {
2784 let Predicates = [prd] in
2785 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2786 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2787 [(set KRC:$dst, (OpNode KRC:$src))]>,
2791 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2792 SDPatternOperator OpNode,
2793 X86FoldableSchedWrite sched> {
2794 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2795 sched, HasDQI>, VEX, TB, PD;
2796 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2797 sched, HasAVX512>, VEX, TB;
2798 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2799 sched, HasBWI>, VEX, TB, PD, REX_W;
2800 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2801 sched, HasBWI>, VEX, TB, REX_W;
2804 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2805 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2807 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2808 let Predicates = [HasAVX512, NoDQI] in
2809 def : Pat<(vnot VK8:$src),
2810 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2812 def : Pat<(vnot VK4:$src),
2813 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2814 def : Pat<(vnot VK2:$src),
2815 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2816 def : Pat<(vnot VK1:$src),
2817 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2819 // Mask binary operation
2820 // - KAND, KANDN, KOR, KXNOR, KXOR
2821 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2822 RegisterClass KRC, SDPatternOperator OpNode,
2823 X86FoldableSchedWrite sched, Predicate prd,
2825 let Predicates = [prd], isCommutable = IsCommutable in
2826 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2827 !strconcat(OpcodeStr,
2828 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2829 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2833 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2834 SDPatternOperator OpNode,
2835 X86FoldableSchedWrite sched, bit IsCommutable,
2836 Predicate prdW = HasAVX512> {
2837 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2838 sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD;
2839 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2840 sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB;
2841 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2842 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD;
2843 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2844 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB;
2847 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2848 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
2849 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
2850 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
2851 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
2852 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
2853 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2855 multiclass avx512_binop_pat<SDPatternOperator VOpNode,
2857 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2858 // for the DQI set, this type is legal and KxxxB instruction is used
2859 let Predicates = [NoDQI] in
2860 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2862 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2863 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2865 // All types smaller than 8 bits require conversion anyway
2866 def : Pat<(VOpNode VK1:$src1, VK1:$src2),
2867 (COPY_TO_REGCLASS (Inst
2868 (COPY_TO_REGCLASS VK1:$src1, VK16),
2869 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2870 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2871 (COPY_TO_REGCLASS (Inst
2872 (COPY_TO_REGCLASS VK2:$src1, VK16),
2873 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
2874 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2875 (COPY_TO_REGCLASS (Inst
2876 (COPY_TO_REGCLASS VK4:$src1, VK16),
2877 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
2880 defm : avx512_binop_pat<and, KANDWrr>;
2881 defm : avx512_binop_pat<vandn, KANDNWrr>;
2882 defm : avx512_binop_pat<or, KORWrr>;
2883 defm : avx512_binop_pat<vxnor, KXNORWrr>;
2884 defm : avx512_binop_pat<xor, KXORWrr>;
2887 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
2888 X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
2890 let Predicates = [prd] in {
2891 let hasSideEffects = 0 in
2892 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
2893 (ins Src.KRC:$src1, Src.KRC:$src2),
2894 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2895 VEX, VVVV, VEX_L, Sched<[sched]>;
2897 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
2898 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
2902 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, TB, PD;
2903 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB;
2904 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W;
2907 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2908 SDNode OpNode, X86FoldableSchedWrite sched,
2910 let Predicates = [prd], Defs = [EFLAGS] in
2911 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2912 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2913 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
2917 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2918 X86FoldableSchedWrite sched,
2919 Predicate prdW = HasAVX512> {
2920 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
2922 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
2924 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
2926 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
2930 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2931 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
2932 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
2935 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2936 SDNode OpNode, X86FoldableSchedWrite sched> {
2937 let Predicates = [HasAVX512] in
2938 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2939 !strconcat(OpcodeStr,
2940 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2941 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
2945 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2946 SDNode OpNode, X86FoldableSchedWrite sched> {
2947 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2948 sched>, VEX, TA, PD, REX_W;
2949 let Predicates = [HasDQI] in
2950 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2951 sched>, VEX, TA, PD;
2952 let Predicates = [HasBWI] in {
2953 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2954 sched>, VEX, TA, PD, REX_W;
2955 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2956 sched>, VEX, TA, PD;
2960 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
2961 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
2963 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
2964 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2966 X86VectorVTInfo Narrow,
2967 X86VectorVTInfo Wide> {
2968 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2969 (Narrow.VT Narrow.RC:$src2), cond)),
2971 (!cast<Instruction>(InstStr#"Zrri")
2972 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2973 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2974 (X86pcmpm_imm $cc)), Narrow.KRC)>;
2976 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2977 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
2978 (Narrow.VT Narrow.RC:$src2),
2980 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
2981 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2982 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2983 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2984 (X86pcmpm_imm $cc)), Narrow.KRC)>;
2987 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2989 X86VectorVTInfo Narrow,
2990 X86VectorVTInfo Wide> {
2992 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2993 (Narrow.BroadcastLdFrag addr:$src2), cond)),
2995 (!cast<Instruction>(InstStr#"Zrmib")
2996 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2997 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
2999 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3001 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3002 (Narrow.BroadcastLdFrag addr:$src2),
3004 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3005 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3006 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3007 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3009 // Commuted with broadcast load.
3010 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3011 (Narrow.VT Narrow.RC:$src1),
3014 (!cast<Instruction>(InstStr#"Zrmib")
3015 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3016 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3018 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3020 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3021 (Narrow.VT Narrow.RC:$src1),
3023 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3024 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3025 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3026 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3029 // Same as above, but for fp types which don't use PatFrags.
3030 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3031 X86VectorVTInfo Narrow,
3032 X86VectorVTInfo Wide> {
3033 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3034 (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3036 (!cast<Instruction>(InstStr#"Zrri")
3037 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3038 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3039 timm:$cc), Narrow.KRC)>;
3041 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3042 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3043 (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3044 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3045 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3046 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3047 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3048 timm:$cc), Narrow.KRC)>;
3051 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3052 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3054 (!cast<Instruction>(InstStr#"Zrmbi")
3055 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3056 addr:$src2, timm:$cc), Narrow.KRC)>;
3058 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3059 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3060 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3061 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3062 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3063 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3064 addr:$src2, timm:$cc), Narrow.KRC)>;
3066 // Commuted with broadcast load.
3067 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3068 (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3070 (!cast<Instruction>(InstStr#"Zrmbi")
3071 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3072 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3074 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3075 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3076 (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3077 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3078 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3079 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3080 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3083 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
3084 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3085 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3087 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3088 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3090 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3091 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3093 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3094 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3096 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3097 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3099 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3100 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3102 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3103 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3105 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3106 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3108 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3109 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3110 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3111 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3114 let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
3115 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3116 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3118 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3119 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3121 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3122 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3124 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3125 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3128 // Mask setting all 0s or 1s
3129 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3130 let Predicates = [HasAVX512] in
3131 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3132 SchedRW = [WriteZero] in
3133 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3134 [(set KRC:$dst, (VT Val))]>;
3137 multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3138 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3139 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3140 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3143 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3144 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3146 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3147 let Predicates = [HasAVX512] in {
3148 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3149 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3150 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3151 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3152 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3153 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3154 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3155 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3158 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3159 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3160 RegisterClass RC, ValueType VT> {
3161 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3162 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3164 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3165 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3167 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3168 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3169 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3170 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3171 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3172 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3174 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3175 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3176 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3177 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3178 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3180 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3181 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3182 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3183 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3185 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3186 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3187 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3189 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3190 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3192 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3194 //===----------------------------------------------------------------------===//
3195 // AVX-512 - Aligned and unaligned load and store
3198 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3199 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3200 X86SchedWriteMoveLS Sched, bit NoRMPattern = 0,
3201 SDPatternOperator SelectOprr = vselect> {
3202 let hasSideEffects = 0 in {
3203 let isMoveReg = 1 in
3204 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3205 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3206 _.ExeDomain>, EVEX, Sched<[Sched.RR]>;
3207 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3208 (ins _.KRCWM:$mask, _.RC:$src),
3209 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3210 "${dst} {${mask}} {z}, $src}"),
3211 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3213 _.ImmAllZerosV)))], _.ExeDomain>,
3214 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3216 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3217 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3218 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3219 !if(NoRMPattern, [],
3221 (_.VT (ld_frag addr:$src)))]),
3222 _.ExeDomain>, EVEX, Sched<[Sched.RM]>;
3224 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3225 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3226 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3227 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3228 "${dst} {${mask}}, $src1}"),
3229 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3231 (_.VT _.RC:$src0))))], _.ExeDomain>,
3232 EVEX, EVEX_K, Sched<[Sched.RR]>;
3233 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3234 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3235 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3236 "${dst} {${mask}}, $src1}"),
3237 [(set _.RC:$dst, (_.VT
3238 (vselect_mask _.KRCWM:$mask,
3239 (_.VT (ld_frag addr:$src1)),
3240 (_.VT _.RC:$src0))))], _.ExeDomain>,
3241 EVEX, EVEX_K, Sched<[Sched.RM]>;
3243 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3244 (ins _.KRCWM:$mask, _.MemOp:$src),
3245 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3246 "${dst} {${mask}} {z}, $src}",
3247 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3248 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3249 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3251 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3252 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3254 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3255 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3257 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3258 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3259 _.KRCWM:$mask, addr:$ptr)>;
3262 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3263 AVX512VLVectorVTInfo _, Predicate prd,
3264 X86SchedWriteMoveLSWidths Sched,
3265 bit NoRMPattern = 0> {
3266 let Predicates = [prd] in
3267 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3268 _.info512.AlignedLdFrag, masked_load_aligned,
3269 Sched.ZMM, NoRMPattern>, EVEX_V512;
3271 let Predicates = [prd, HasVLX] in {
3272 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3273 _.info256.AlignedLdFrag, masked_load_aligned,
3274 Sched.YMM, NoRMPattern>, EVEX_V256;
3275 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3276 _.info128.AlignedLdFrag, masked_load_aligned,
3277 Sched.XMM, NoRMPattern>, EVEX_V128;
3281 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3282 AVX512VLVectorVTInfo _, Predicate prd,
3283 X86SchedWriteMoveLSWidths Sched,
3284 bit NoRMPattern = 0,
3285 SDPatternOperator SelectOprr = vselect> {
3286 let Predicates = [prd] in
3287 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3288 masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512;
3290 let Predicates = [prd, HasVLX] in {
3291 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3292 masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256;
3293 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3294 masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128;
3298 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3299 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3300 X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> {
3301 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3302 let isMoveReg = 1 in
3303 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3304 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3305 [], _.ExeDomain>, EVEX,
3307 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3308 (ins _.KRCWM:$mask, _.RC:$src),
3309 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3310 "${dst} {${mask}}, $src}",
3311 [], _.ExeDomain>, EVEX, EVEX_K,
3313 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3314 (ins _.KRCWM:$mask, _.RC:$src),
3315 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3316 "${dst} {${mask}} {z}, $src}",
3317 [], _.ExeDomain>, EVEX, EVEX_KZ,
3321 let hasSideEffects = 0, mayStore = 1 in
3322 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3323 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3324 !if(NoMRPattern, [],
3325 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3326 _.ExeDomain>, EVEX, Sched<[Sched.MR]>;
3327 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3328 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3329 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3330 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
3332 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3333 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3334 _.KRCWM:$mask, _.RC:$src)>;
3336 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3337 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3338 _.RC:$dst, _.RC:$src), 0>;
3339 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3340 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3341 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3342 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3343 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3344 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3347 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3348 AVX512VLVectorVTInfo _, Predicate prd,
3349 X86SchedWriteMoveLSWidths Sched,
3350 bit NoMRPattern = 0> {
3351 let Predicates = [prd] in
3352 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3353 masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512;
3354 let Predicates = [prd, HasVLX] in {
3355 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3356 masked_store, Sched.YMM, NoMRPattern>, EVEX_V256;
3357 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3358 masked_store, Sched.XMM, NoMRPattern>, EVEX_V128;
3362 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3363 AVX512VLVectorVTInfo _, Predicate prd,
3364 X86SchedWriteMoveLSWidths Sched,
3365 bit NoMRPattern = 0> {
3366 let Predicates = [prd] in
3367 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3368 masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512;
3370 let Predicates = [prd, HasVLX] in {
3371 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3372 masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256;
3373 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3374 masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128;
3378 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3379 HasAVX512, SchedWriteFMoveLS>,
3380 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3381 HasAVX512, SchedWriteFMoveLS>,
3382 TB, EVEX_CD8<32, CD8VF>;
3384 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3385 HasAVX512, SchedWriteFMoveLS>,
3386 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3387 HasAVX512, SchedWriteFMoveLS>,
3388 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3390 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3391 SchedWriteFMoveLS, 0, null_frag>,
3392 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3394 TB, EVEX_CD8<32, CD8VF>;
3396 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3397 SchedWriteFMoveLS, 0, null_frag>,
3398 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3400 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3402 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3403 HasAVX512, SchedWriteVecMoveLS, 1>,
3404 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3405 HasAVX512, SchedWriteVecMoveLS, 1>,
3406 TB, PD, EVEX_CD8<32, CD8VF>;
3408 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3409 HasAVX512, SchedWriteVecMoveLS>,
3410 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3411 HasAVX512, SchedWriteVecMoveLS>,
3412 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3414 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3415 SchedWriteVecMoveLS, 1>,
3416 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3417 SchedWriteVecMoveLS, 1>,
3418 TB, XD, EVEX_CD8<8, CD8VF>;
3420 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3421 SchedWriteVecMoveLS, 1>,
3422 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3423 SchedWriteVecMoveLS, 1>,
3424 TB, XD, REX_W, EVEX_CD8<16, CD8VF>;
3426 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3427 SchedWriteVecMoveLS, 1, null_frag>,
3428 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3429 SchedWriteVecMoveLS, 1>,
3430 TB, XS, EVEX_CD8<32, CD8VF>;
3432 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3433 SchedWriteVecMoveLS, 0, null_frag>,
3434 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3435 SchedWriteVecMoveLS>,
3436 TB, XS, REX_W, EVEX_CD8<64, CD8VF>;
3438 // Special instructions to help with spilling when we don't have VLX. We need
3439 // to load or store from a ZMM register instead. These are converted in
3440 // expandPostRAPseudos.
3441 let isReMaterializable = 1, canFoldAsLoad = 1,
3442 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3443 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3444 "", []>, Sched<[WriteFLoadX]>;
3445 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3446 "", []>, Sched<[WriteFLoadY]>;
3447 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3448 "", []>, Sched<[WriteFLoadX]>;
3449 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3450 "", []>, Sched<[WriteFLoadY]>;
3453 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3454 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3455 "", []>, Sched<[WriteFStoreX]>;
3456 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3457 "", []>, Sched<[WriteFStoreY]>;
3458 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3459 "", []>, Sched<[WriteFStoreX]>;
3460 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3461 "", []>, Sched<[WriteFStoreY]>;
3464 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3465 (v8i64 VR512:$src))),
3466 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3469 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3470 (v16i32 VR512:$src))),
3471 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3473 // These patterns exist to prevent the above patterns from introducing a second
3474 // mask inversion when one already exists.
3475 def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3476 (v8i64 immAllZerosV),
3477 (v8i64 VR512:$src))),
3478 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3479 def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3480 (v16i32 immAllZerosV),
3481 (v16i32 VR512:$src))),
3482 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3484 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3485 X86VectorVTInfo Wide> {
3486 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3487 Narrow.RC:$src1, Narrow.RC:$src0)),
3490 (!cast<Instruction>(InstrStr#"rrk")
3491 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3492 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3493 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3496 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3497 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3500 (!cast<Instruction>(InstrStr#"rrkz")
3501 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3502 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3506 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3507 // available. Use a 512-bit operation and extract.
3508 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
3509 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3510 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3511 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3512 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3514 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3515 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3516 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3517 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3520 let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
3521 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3522 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3524 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3525 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3527 defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3528 defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3530 defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3531 defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3534 let Predicates = [HasAVX512] in {
3536 def : Pat<(alignedloadv16i32 addr:$src),
3537 (VMOVDQA64Zrm addr:$src)>;
3538 def : Pat<(alignedloadv32i16 addr:$src),
3539 (VMOVDQA64Zrm addr:$src)>;
3540 def : Pat<(alignedloadv32f16 addr:$src),
3541 (VMOVAPSZrm addr:$src)>;
3542 def : Pat<(alignedloadv32bf16 addr:$src),
3543 (VMOVAPSZrm addr:$src)>;
3544 def : Pat<(alignedloadv64i8 addr:$src),
3545 (VMOVDQA64Zrm addr:$src)>;
3546 def : Pat<(loadv16i32 addr:$src),
3547 (VMOVDQU64Zrm addr:$src)>;
3548 def : Pat<(loadv32i16 addr:$src),
3549 (VMOVDQU64Zrm addr:$src)>;
3550 def : Pat<(loadv32f16 addr:$src),
3551 (VMOVUPSZrm addr:$src)>;
3552 def : Pat<(loadv32bf16 addr:$src),
3553 (VMOVUPSZrm addr:$src)>;
3554 def : Pat<(loadv64i8 addr:$src),
3555 (VMOVDQU64Zrm addr:$src)>;
3558 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3559 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3560 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3561 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3562 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3563 (VMOVAPSZmr addr:$dst, VR512:$src)>;
3564 def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3565 (VMOVAPSZmr addr:$dst, VR512:$src)>;
3566 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3567 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3568 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3569 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3570 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3571 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3572 def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3573 (VMOVUPSZmr addr:$dst, VR512:$src)>;
3574 def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3575 (VMOVUPSZmr addr:$dst, VR512:$src)>;
3576 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3577 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3580 let Predicates = [HasVLX] in {
3582 def : Pat<(alignedloadv4i32 addr:$src),
3583 (VMOVDQA64Z128rm addr:$src)>;
3584 def : Pat<(alignedloadv8i16 addr:$src),
3585 (VMOVDQA64Z128rm addr:$src)>;
3586 def : Pat<(alignedloadv8f16 addr:$src),
3587 (VMOVAPSZ128rm addr:$src)>;
3588 def : Pat<(alignedloadv8bf16 addr:$src),
3589 (VMOVAPSZ128rm addr:$src)>;
3590 def : Pat<(alignedloadv16i8 addr:$src),
3591 (VMOVDQA64Z128rm addr:$src)>;
3592 def : Pat<(loadv4i32 addr:$src),
3593 (VMOVDQU64Z128rm addr:$src)>;
3594 def : Pat<(loadv8i16 addr:$src),
3595 (VMOVDQU64Z128rm addr:$src)>;
3596 def : Pat<(loadv8f16 addr:$src),
3597 (VMOVUPSZ128rm addr:$src)>;
3598 def : Pat<(loadv8bf16 addr:$src),
3599 (VMOVUPSZ128rm addr:$src)>;
3600 def : Pat<(loadv16i8 addr:$src),
3601 (VMOVDQU64Z128rm addr:$src)>;
3604 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3605 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3606 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3607 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3608 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3609 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3610 def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3611 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3612 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3613 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3614 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3615 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3616 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3617 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3618 def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3619 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3620 def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3621 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3622 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3623 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3626 def : Pat<(alignedloadv8i32 addr:$src),
3627 (VMOVDQA64Z256rm addr:$src)>;
3628 def : Pat<(alignedloadv16i16 addr:$src),
3629 (VMOVDQA64Z256rm addr:$src)>;
3630 def : Pat<(alignedloadv16f16 addr:$src),
3631 (VMOVAPSZ256rm addr:$src)>;
3632 def : Pat<(alignedloadv16bf16 addr:$src),
3633 (VMOVAPSZ256rm addr:$src)>;
3634 def : Pat<(alignedloadv32i8 addr:$src),
3635 (VMOVDQA64Z256rm addr:$src)>;
3636 def : Pat<(loadv8i32 addr:$src),
3637 (VMOVDQU64Z256rm addr:$src)>;
3638 def : Pat<(loadv16i16 addr:$src),
3639 (VMOVDQU64Z256rm addr:$src)>;
3640 def : Pat<(loadv16f16 addr:$src),
3641 (VMOVUPSZ256rm addr:$src)>;
3642 def : Pat<(loadv16bf16 addr:$src),
3643 (VMOVUPSZ256rm addr:$src)>;
3644 def : Pat<(loadv32i8 addr:$src),
3645 (VMOVDQU64Z256rm addr:$src)>;
3648 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3649 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3650 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3651 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3652 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3653 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3654 def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3655 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3656 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3657 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3658 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3659 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3660 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3661 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3662 def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3663 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3664 def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3665 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3666 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3667 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3670 multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3671 let Predicates = [HasBWI] in {
3672 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3673 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3674 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3675 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3676 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3677 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3678 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3679 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3680 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3681 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3682 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3683 (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3684 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3685 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3686 (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3687 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3688 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3689 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3690 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3691 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3692 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3693 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3695 def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3696 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3698 let Predicates = [HasBWI, HasVLX] in {
3699 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3700 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3701 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3702 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3703 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3704 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3705 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3706 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3707 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3708 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3709 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3710 (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3711 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3712 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3713 (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3714 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3715 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3716 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3717 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3718 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3719 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3720 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3722 def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3723 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3725 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3726 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3727 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3728 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3729 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3730 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3731 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3732 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3733 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3734 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3735 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3736 (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3737 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3738 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3739 (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3740 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3741 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3742 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3743 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3744 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3745 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3746 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3748 def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3749 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3753 defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3754 defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
3756 // Move Int Doubleword to Packed Double Int
3758 let ExeDomain = SSEPackedInt in {
3759 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3760 "vmovd\t{$src, $dst|$dst, $src}",
3762 (v4i32 (scalar_to_vector GR32:$src)))]>,
3763 EVEX, Sched<[WriteVecMoveFromGpr]>;
3764 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3765 "vmovd\t{$src, $dst|$dst, $src}",
3767 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3768 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3769 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3770 "vmovq\t{$src, $dst|$dst, $src}",
3772 (v2i64 (scalar_to_vector GR64:$src)))]>,
3773 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3774 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3775 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3777 "vmovq\t{$src, $dst|$dst, $src}", []>,
3778 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3779 let isCodeGenOnly = 1 in {
3780 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3781 "vmovq\t{$src, $dst|$dst, $src}",
3782 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3783 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3784 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3785 "vmovq\t{$src, $dst|$dst, $src}",
3786 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3787 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3789 } // ExeDomain = SSEPackedInt
3791 // Move Int Doubleword to Single Scalar
3793 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3794 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3795 "vmovd\t{$src, $dst|$dst, $src}",
3796 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3797 EVEX, Sched<[WriteVecMoveFromGpr]>;
3798 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3800 // Move doubleword from xmm register to r/m32
3802 let ExeDomain = SSEPackedInt in {
3803 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3804 "vmovd\t{$src, $dst|$dst, $src}",
3805 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3807 EVEX, Sched<[WriteVecMoveToGpr]>;
3808 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3809 (ins i32mem:$dst, VR128X:$src),
3810 "vmovd\t{$src, $dst|$dst, $src}",
3811 [(store (i32 (extractelt (v4i32 VR128X:$src),
3812 (iPTR 0))), addr:$dst)]>,
3813 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3814 } // ExeDomain = SSEPackedInt
3816 // Move quadword from xmm1 register to r/m64
3818 let ExeDomain = SSEPackedInt in {
3819 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3820 "vmovq\t{$src, $dst|$dst, $src}",
3821 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3823 TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
3824 Requires<[HasAVX512]>;
3826 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3827 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3828 "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD,
3829 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
3830 Requires<[HasAVX512, In64BitMode]>;
3832 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3833 (ins i64mem:$dst, VR128X:$src),
3834 "vmovq\t{$src, $dst|$dst, $src}",
3835 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3837 EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>,
3838 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3840 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3841 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3843 "vmovq\t{$src, $dst|$dst, $src}", []>,
3844 EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
3845 } // ExeDomain = SSEPackedInt
3847 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3848 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3850 let Predicates = [HasAVX512] in {
3851 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3852 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3855 // Move Scalar Single to Double Int
3857 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3858 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3860 "vmovd\t{$src, $dst|$dst, $src}",
3861 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3862 EVEX, Sched<[WriteVecMoveToGpr]>;
3863 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3865 // Move Quadword Int to Packed Quadword Int
3867 let ExeDomain = SSEPackedInt in {
3868 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3870 "vmovq\t{$src, $dst|$dst, $src}",
3872 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3873 EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3874 } // ExeDomain = SSEPackedInt
3876 // Allow "vmovd" but print "vmovq".
3877 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3878 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3879 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3880 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3882 // Conversions between masks and scalar fp.
3883 def : Pat<(v32i1 (bitconvert FR32X:$src)),
3884 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3885 def : Pat<(f32 (bitconvert VK32:$src)),
3886 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3888 def : Pat<(v64i1 (bitconvert FR64X:$src)),
3889 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3890 def : Pat<(f64 (bitconvert VK64:$src)),
3891 (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3893 //===----------------------------------------------------------------------===//
3894 // AVX-512 MOVSH, MOVSS, MOVSD
3895 //===----------------------------------------------------------------------===//
3897 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3898 X86VectorVTInfo _, Predicate prd = HasAVX512> {
3899 let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
3900 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3901 (ins _.RC:$src1, _.RC:$src2),
3902 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3903 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3904 _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
3905 let Predicates = [prd] in {
3906 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3907 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3908 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3909 "$dst {${mask}} {z}, $src1, $src2}"),
3910 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3911 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3913 _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3914 let Constraints = "$src0 = $dst" in
3915 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3916 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3917 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3918 "$dst {${mask}}, $src1, $src2}"),
3919 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3920 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3921 (_.VT _.RC:$src0))))],
3922 _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3923 let canFoldAsLoad = 1, isReMaterializable = 1 in {
3924 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3925 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3926 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3927 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3928 // _alt version uses FR32/FR64 register class.
3929 let isCodeGenOnly = 1 in
3930 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3931 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3932 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3933 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3935 let mayLoad = 1, hasSideEffects = 0 in {
3936 let Constraints = "$src0 = $dst" in
3937 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3938 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3939 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3940 "$dst {${mask}}, $src}"),
3941 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3942 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3943 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3944 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3945 "$dst {${mask}} {z}, $src}"),
3946 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3948 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3949 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3950 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
3951 EVEX, Sched<[WriteFStore]>;
3952 let mayStore = 1, hasSideEffects = 0 in
3953 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3954 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3955 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3956 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
3960 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3961 VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>;
3963 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3964 VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
3966 defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
3968 VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
3970 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3971 PatLeaf ZeroFP, X86VectorVTInfo _> {
3973 def : Pat<(_.VT (OpNode _.RC:$src0,
3974 (_.VT (scalar_to_vector
3975 (_.EltVT (X86selects VK1WM:$mask,
3976 (_.EltVT _.FRC:$src1),
3977 (_.EltVT _.FRC:$src2))))))),
3978 (!cast<Instruction>(InstrStr#rrk)
3979 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3982 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3984 def : Pat<(_.VT (OpNode _.RC:$src0,
3985 (_.VT (scalar_to_vector
3986 (_.EltVT (X86selects VK1WM:$mask,
3987 (_.EltVT _.FRC:$src1),
3988 (_.EltVT ZeroFP))))))),
3989 (!cast<Instruction>(InstrStr#rrkz)
3992 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3995 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3996 dag Mask, RegisterClass MaskRC> {
3998 def : Pat<(masked_store
3999 (_.info512.VT (insert_subvector undef,
4000 (_.info128.VT _.info128.RC:$src),
4001 (iPTR 0))), addr:$dst, Mask),
4002 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4003 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4004 _.info128.RC:$src)>;
4008 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4009 AVX512VLVectorVTInfo _,
4010 dag Mask, RegisterClass MaskRC,
4011 SubRegIndex subreg> {
4013 def : Pat<(masked_store
4014 (_.info512.VT (insert_subvector undef,
4015 (_.info128.VT _.info128.RC:$src),
4016 (iPTR 0))), addr:$dst, Mask),
4017 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4018 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4019 _.info128.RC:$src)>;
4023 // This matches the more recent codegen from clang that avoids emitting a 512
4024 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4025 // bits on AVX512F only targets.
4026 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4027 AVX512VLVectorVTInfo _,
4028 dag Mask512, dag Mask128,
4029 RegisterClass MaskRC,
4030 SubRegIndex subreg> {
4033 def : Pat<(masked_store
4034 (_.info512.VT (insert_subvector undef,
4035 (_.info128.VT _.info128.RC:$src),
4036 (iPTR 0))), addr:$dst, Mask512),
4037 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4038 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4039 _.info128.RC:$src)>;
4041 // AVX512VL pattern.
4042 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4043 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4044 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4045 _.info128.RC:$src)>;
4048 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4049 dag Mask, RegisterClass MaskRC> {
4051 def : Pat<(_.info128.VT (extract_subvector
4052 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4053 _.info512.ImmAllZerosV)),
4055 (!cast<Instruction>(InstrStr#rmkz)
4056 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4059 def : Pat<(_.info128.VT (extract_subvector
4060 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4061 (_.info512.VT (insert_subvector undef,
4062 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4065 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4066 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4071 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4072 AVX512VLVectorVTInfo _,
4073 dag Mask, RegisterClass MaskRC,
4074 SubRegIndex subreg> {
4076 def : Pat<(_.info128.VT (extract_subvector
4077 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4078 _.info512.ImmAllZerosV)),
4080 (!cast<Instruction>(InstrStr#rmkz)
4081 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4084 def : Pat<(_.info128.VT (extract_subvector
4085 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4086 (_.info512.VT (insert_subvector undef,
4087 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4090 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4091 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4096 // This matches the more recent codegen from clang that avoids emitting a 512
4097 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4098 // bits on AVX512F only targets.
4099 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4100 AVX512VLVectorVTInfo _,
4101 dag Mask512, dag Mask128,
4102 RegisterClass MaskRC,
4103 SubRegIndex subreg> {
4104 // AVX512F patterns.
4105 def : Pat<(_.info128.VT (extract_subvector
4106 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4107 _.info512.ImmAllZerosV)),
4109 (!cast<Instruction>(InstrStr#rmkz)
4110 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4113 def : Pat<(_.info128.VT (extract_subvector
4114 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4115 (_.info512.VT (insert_subvector undef,
4116 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4119 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4120 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4123 // AVX512Vl patterns.
4124 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4125 _.info128.ImmAllZerosV)),
4126 (!cast<Instruction>(InstrStr#rmkz)
4127 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4130 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4131 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4132 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4133 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4137 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4138 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4140 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4141 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4142 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4143 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4144 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4145 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4147 let Predicates = [HasFP16] in {
4148 defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4149 defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4150 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4151 defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4152 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4153 defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4154 (v32i1 (insert_subvector
4155 (v32i1 immAllZerosV),
4156 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4158 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4161 defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4162 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4163 defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4164 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4165 defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4166 (v32i1 (insert_subvector
4167 (v32i1 immAllZerosV),
4168 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4170 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4173 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4174 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4175 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4176 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4177 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4179 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4180 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4181 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4184 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4185 (v16i1 (insert_subvector
4186 (v16i1 immAllZerosV),
4187 (v4i1 (extract_subvector
4188 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4191 (v4i1 (extract_subvector
4192 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4193 (iPTR 0))), GR8, sub_8bit>;
4194 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4199 (v16i1 immAllZerosV),
4200 (v2i1 (extract_subvector
4201 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4205 (v2i1 (extract_subvector
4206 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4207 (iPTR 0))), GR8, sub_8bit>;
4209 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4210 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4211 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4212 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4213 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4214 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4216 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4217 (v16i1 (insert_subvector
4218 (v16i1 immAllZerosV),
4219 (v4i1 (extract_subvector
4220 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4223 (v4i1 (extract_subvector
4224 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4225 (iPTR 0))), GR8, sub_8bit>;
4226 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4231 (v16i1 immAllZerosV),
4232 (v2i1 (extract_subvector
4233 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4237 (v2i1 (extract_subvector
4238 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4239 (iPTR 0))), GR8, sub_8bit>;
4241 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4242 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4243 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4244 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4245 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4247 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4248 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4249 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4251 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4253 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4254 VK1WM:$mask, addr:$src)),
4256 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4257 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4259 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4260 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4261 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4262 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4263 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4265 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4266 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4267 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4269 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4271 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4272 VK1WM:$mask, addr:$src)),
4274 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4275 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4278 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4279 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4280 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4281 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4283 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4284 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4285 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4286 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4288 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4289 let Predicates = [HasFP16] in {
4290 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4291 (ins VR128X:$src1, VR128X:$src2),
4292 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4293 []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4294 Sched<[SchedWriteFShuffle.XMM]>;
4296 let Constraints = "$src0 = $dst" in
4297 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4298 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4299 VR128X:$src1, VR128X:$src2),
4300 "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4301 "$dst {${mask}}, $src1, $src2}",
4302 []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG,
4303 Sched<[SchedWriteFShuffle.XMM]>;
4305 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4306 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4307 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4308 "$dst {${mask}} {z}, $src1, $src2}",
4309 []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4310 Sched<[SchedWriteFShuffle.XMM]>;
4312 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4313 (ins VR128X:$src1, VR128X:$src2),
4314 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4315 []>, TB, XS, EVEX, VVVV, VEX_LIG,
4316 Sched<[SchedWriteFShuffle.XMM]>;
4318 let Constraints = "$src0 = $dst" in
4319 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4320 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4321 VR128X:$src1, VR128X:$src2),
4322 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4323 "$dst {${mask}}, $src1, $src2}",
4324 []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG,
4325 Sched<[SchedWriteFShuffle.XMM]>;
4327 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4328 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4329 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4330 "$dst {${mask}} {z}, $src1, $src2}",
4331 []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG,
4332 Sched<[SchedWriteFShuffle.XMM]>;
4334 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4335 (ins VR128X:$src1, VR128X:$src2),
4336 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4337 []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W,
4338 Sched<[SchedWriteFShuffle.XMM]>;
4340 let Constraints = "$src0 = $dst" in
4341 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4342 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4343 VR128X:$src1, VR128X:$src2),
4344 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4345 "$dst {${mask}}, $src1, $src2}",
4346 []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG,
4347 REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4349 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4350 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4352 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4353 "$dst {${mask}} {z}, $src1, $src2}",
4354 []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG,
4355 REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4358 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4359 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4360 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4361 "$dst {${mask}}, $src1, $src2}",
4362 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4363 VR128X:$src1, VR128X:$src2), 0>;
4364 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4365 "$dst {${mask}} {z}, $src1, $src2}",
4366 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4367 VR128X:$src1, VR128X:$src2), 0>;
4368 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4369 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4370 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4371 "$dst {${mask}}, $src1, $src2}",
4372 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4373 VR128X:$src1, VR128X:$src2), 0>;
4374 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4375 "$dst {${mask}} {z}, $src1, $src2}",
4376 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4377 VR128X:$src1, VR128X:$src2), 0>;
4378 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4379 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4380 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4381 "$dst {${mask}}, $src1, $src2}",
4382 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4383 VR128X:$src1, VR128X:$src2), 0>;
4384 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4385 "$dst {${mask}} {z}, $src1, $src2}",
4386 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4387 VR128X:$src1, VR128X:$src2), 0>;
4389 let Predicates = [HasAVX512, OptForSize] in {
4390 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4391 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4392 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4393 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4395 // Move low f32 and clear high bits.
4396 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4397 (SUBREG_TO_REG (i32 0),
4398 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4399 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4400 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4401 (SUBREG_TO_REG (i32 0),
4402 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4403 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4405 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4406 (SUBREG_TO_REG (i32 0),
4407 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4408 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4409 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4410 (SUBREG_TO_REG (i32 0),
4411 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4412 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4415 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4416 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4417 let Predicates = [HasAVX512, OptForSpeed] in {
4418 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4419 (SUBREG_TO_REG (i32 0),
4420 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4421 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4422 (i8 1))), sub_xmm)>;
4423 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4424 (SUBREG_TO_REG (i32 0),
4425 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4426 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4427 (i8 3))), sub_xmm)>;
4430 let Predicates = [HasAVX512] in {
4431 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4432 (VMOVSSZrm addr:$src)>;
4433 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4434 (VMOVSDZrm addr:$src)>;
4436 // Represent the same patterns above but in the form they appear for
4438 def : Pat<(v8f32 (X86vzload32 addr:$src)),
4439 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4440 def : Pat<(v4f64 (X86vzload64 addr:$src)),
4441 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4443 // Represent the same patterns above but in the form they appear for
4445 def : Pat<(v16f32 (X86vzload32 addr:$src)),
4446 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4447 def : Pat<(v8f64 (X86vzload64 addr:$src)),
4448 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4450 let Predicates = [HasFP16] in {
4451 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4452 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4453 def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4454 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4456 // FIXME we need better canonicalization in dag combine
4457 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4458 (SUBREG_TO_REG (i32 0),
4459 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4460 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4461 def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4462 (SUBREG_TO_REG (i32 0),
4463 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4464 (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4466 // FIXME we need better canonicalization in dag combine
4467 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4468 (SUBREG_TO_REG (i32 0),
4469 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4470 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4471 def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4472 (SUBREG_TO_REG (i32 0),
4473 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4474 (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4476 def : Pat<(v8f16 (X86vzload16 addr:$src)),
4477 (VMOVSHZrm addr:$src)>;
4479 def : Pat<(v16f16 (X86vzload16 addr:$src)),
4480 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4482 def : Pat<(v32f16 (X86vzload16 addr:$src)),
4483 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4486 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4487 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4489 "vmovq\t{$src, $dst|$dst, $src}",
4490 [(set VR128X:$dst, (v2i64 (X86vzmovl
4491 (v2i64 VR128X:$src))))]>,
4495 let Predicates = [HasAVX512] in {
4496 def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4497 (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4498 GR8:$src, sub_8bit)))>;
4499 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4500 (VMOVDI2PDIZrr GR32:$src)>;
4502 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4503 (VMOV64toPQIZrr GR64:$src)>;
4505 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4506 def : Pat<(v4i32 (X86vzload32 addr:$src)),
4507 (VMOVDI2PDIZrm addr:$src)>;
4508 def : Pat<(v8i32 (X86vzload32 addr:$src)),
4509 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4510 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4511 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4512 def : Pat<(v2i64 (X86vzload64 addr:$src)),
4513 (VMOVQI2PQIZrm addr:$src)>;
4514 def : Pat<(v4i64 (X86vzload64 addr:$src)),
4515 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4517 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4518 def : Pat<(v16i32 (X86vzload32 addr:$src)),
4519 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4520 def : Pat<(v8i64 (X86vzload64 addr:$src)),
4521 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4523 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4524 (SUBREG_TO_REG (i32 0),
4525 (v2f64 (VMOVZPQILo2PQIZrr
4526 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4528 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4529 (SUBREG_TO_REG (i32 0),
4530 (v2i64 (VMOVZPQILo2PQIZrr
4531 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4534 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4535 (SUBREG_TO_REG (i32 0),
4536 (v2f64 (VMOVZPQILo2PQIZrr
4537 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4539 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4540 (SUBREG_TO_REG (i32 0),
4541 (v2i64 (VMOVZPQILo2PQIZrr
4542 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4546 //===----------------------------------------------------------------------===//
4547 // AVX-512 - Non-temporals
4548 //===----------------------------------------------------------------------===//
4550 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4551 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4552 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4553 EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4555 let Predicates = [HasVLX] in {
4556 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4558 "vmovntdqa\t{$src, $dst|$dst, $src}",
4559 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4560 EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4562 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4564 "vmovntdqa\t{$src, $dst|$dst, $src}",
4565 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4566 EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4569 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4570 X86SchedWriteMoveLS Sched,
4571 PatFrag st_frag = alignednontemporalstore> {
4572 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4573 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4574 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4575 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4576 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4579 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4580 AVX512VLVectorVTInfo VTInfo,
4581 X86SchedWriteMoveLSWidths Sched> {
4582 let Predicates = [HasAVX512] in
4583 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4585 let Predicates = [HasAVX512, HasVLX] in {
4586 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4587 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4591 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4592 SchedWriteVecMoveLSNT>, TB, PD;
4593 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4594 SchedWriteFMoveLSNT>, TB, PD, REX_W;
4595 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4596 SchedWriteFMoveLSNT>, TB;
4598 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4599 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4600 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4601 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4602 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4603 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4604 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4606 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4607 (VMOVNTDQAZrm addr:$src)>;
4608 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4609 (VMOVNTDQAZrm addr:$src)>;
4610 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4611 (VMOVNTDQAZrm addr:$src)>;
4612 def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4613 (VMOVNTDQAZrm addr:$src)>;
4614 def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4615 (VMOVNTDQAZrm addr:$src)>;
4616 def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4617 (VMOVNTDQAZrm addr:$src)>;
4620 let Predicates = [HasVLX], AddedComplexity = 400 in {
4621 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4622 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4623 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4624 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4625 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4626 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4628 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4629 (VMOVNTDQAZ256rm addr:$src)>;
4630 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4631 (VMOVNTDQAZ256rm addr:$src)>;
4632 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4633 (VMOVNTDQAZ256rm addr:$src)>;
4634 def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4635 (VMOVNTDQAZ256rm addr:$src)>;
4636 def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4637 (VMOVNTDQAZ256rm addr:$src)>;
4638 def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4639 (VMOVNTDQAZ256rm addr:$src)>;
4641 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4642 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4643 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4644 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4645 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4646 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4648 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4649 (VMOVNTDQAZ128rm addr:$src)>;
4650 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4651 (VMOVNTDQAZ128rm addr:$src)>;
4652 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4653 (VMOVNTDQAZ128rm addr:$src)>;
4654 def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4655 (VMOVNTDQAZ128rm addr:$src)>;
4656 def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4657 (VMOVNTDQAZ128rm addr:$src)>;
4658 def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4659 (VMOVNTDQAZ128rm addr:$src)>;
4662 //===----------------------------------------------------------------------===//
4663 // AVX-512 - Integer arithmetic
4665 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4666 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4667 bit IsCommutable = 0> {
4668 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4669 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4670 "$src2, $src1", "$src1, $src2",
4671 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4672 IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV,
4675 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4676 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4677 "$src2, $src1", "$src1, $src2",
4678 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4679 AVX512BIBase, EVEX, VVVV,
4680 Sched<[sched.Folded, sched.ReadAfterFold]>;
4683 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4684 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4685 bit IsCommutable = 0> :
4686 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4687 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4688 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4689 "${src2}"#_.BroadcastStr#", $src1",
4690 "$src1, ${src2}"#_.BroadcastStr,
4691 (_.VT (OpNode _.RC:$src1,
4692 (_.BroadcastLdFrag addr:$src2)))>,
4693 AVX512BIBase, EVEX, VVVV, EVEX_B,
4694 Sched<[sched.Folded, sched.ReadAfterFold]>;
4697 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4698 AVX512VLVectorVTInfo VTInfo,
4699 X86SchedWriteWidths sched, Predicate prd,
4700 bit IsCommutable = 0> {
4701 let Predicates = [prd] in
4702 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4703 IsCommutable>, EVEX_V512;
4705 let Predicates = [prd, HasVLX] in {
4706 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4707 sched.YMM, IsCommutable>, EVEX_V256;
4708 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4709 sched.XMM, IsCommutable>, EVEX_V128;
4713 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4714 AVX512VLVectorVTInfo VTInfo,
4715 X86SchedWriteWidths sched, Predicate prd,
4716 bit IsCommutable = 0> {
4717 let Predicates = [prd] in
4718 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4719 IsCommutable>, EVEX_V512;
4721 let Predicates = [prd, HasVLX] in {
4722 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4723 sched.YMM, IsCommutable>, EVEX_V256;
4724 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4725 sched.XMM, IsCommutable>, EVEX_V128;
4729 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4730 X86SchedWriteWidths sched, Predicate prd,
4731 bit IsCommutable = 0> {
4732 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4733 sched, prd, IsCommutable>,
4734 REX_W, EVEX_CD8<64, CD8VF>;
4737 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4738 X86SchedWriteWidths sched, Predicate prd,
4739 bit IsCommutable = 0> {
4740 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4741 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4744 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4745 X86SchedWriteWidths sched, Predicate prd,
4746 bit IsCommutable = 0> {
4747 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4748 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4752 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4753 X86SchedWriteWidths sched, Predicate prd,
4754 bit IsCommutable = 0> {
4755 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4756 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4760 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4761 SDNode OpNode, X86SchedWriteWidths sched,
4762 Predicate prd, bit IsCommutable = 0> {
4763 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4766 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4770 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4771 SDNode OpNode, X86SchedWriteWidths sched,
4772 Predicate prd, bit IsCommutable = 0> {
4773 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4776 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4780 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4781 bits<8> opc_d, bits<8> opc_q,
4782 string OpcodeStr, SDNode OpNode,
4783 X86SchedWriteWidths sched,
4784 bit IsCommutable = 0> {
4785 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4786 sched, HasAVX512, IsCommutable>,
4787 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4788 sched, HasBWI, IsCommutable>;
4791 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4792 X86FoldableSchedWrite sched,
4793 SDNode OpNode,X86VectorVTInfo _Src,
4794 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4795 bit IsCommutable = 0> {
4796 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4797 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4798 "$src2, $src1","$src1, $src2",
4800 (_Src.VT _Src.RC:$src1),
4801 (_Src.VT _Src.RC:$src2))),
4803 AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
4804 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4805 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4806 "$src2, $src1", "$src1, $src2",
4807 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4808 (_Src.LdFrag addr:$src2)))>,
4809 AVX512BIBase, EVEX, VVVV,
4810 Sched<[sched.Folded, sched.ReadAfterFold]>;
4812 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4813 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4815 "${src2}"#_Brdct.BroadcastStr#", $src1",
4816 "$src1, ${src2}"#_Brdct.BroadcastStr,
4817 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4818 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4819 AVX512BIBase, EVEX, VVVV, EVEX_B,
4820 Sched<[sched.Folded, sched.ReadAfterFold]>;
4823 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4824 SchedWriteVecALU, 1>;
4825 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4826 SchedWriteVecALU, 0>;
4827 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4828 SchedWriteVecALU, HasBWI, 1>;
4829 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4830 SchedWriteVecALU, HasBWI, 0>;
4831 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4832 SchedWriteVecALU, HasBWI, 1>;
4833 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4834 SchedWriteVecALU, HasBWI, 0>;
4835 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4836 SchedWritePMULLD, HasAVX512, 1>, T8;
4837 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4838 SchedWriteVecIMul, HasBWI, 1>;
4839 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4840 SchedWriteVecIMul, HasDQI, 1>, T8;
4841 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4843 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4845 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4846 SchedWriteVecIMul, HasBWI, 1>, T8;
4847 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
4848 SchedWriteVecALU, HasBWI, 1>;
4849 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4850 SchedWriteVecIMul, HasAVX512, 1>, T8;
4851 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4852 SchedWriteVecIMul, HasAVX512, 1>;
4854 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4855 X86SchedWriteWidths sched,
4856 AVX512VLVectorVTInfo _SrcVTInfo,
4857 AVX512VLVectorVTInfo _DstVTInfo,
4858 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4859 let Predicates = [prd] in
4860 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4861 _SrcVTInfo.info512, _DstVTInfo.info512,
4862 v8i64_info, IsCommutable>,
4863 EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W;
4864 let Predicates = [HasVLX, prd] in {
4865 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4866 _SrcVTInfo.info256, _DstVTInfo.info256,
4867 v4i64x_info, IsCommutable>,
4868 EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W;
4869 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4870 _SrcVTInfo.info128, _DstVTInfo.info128,
4871 v2i64x_info, IsCommutable>,
4872 EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W;
4876 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4877 avx512vl_i8_info, avx512vl_i8_info,
4878 X86multishift, HasVBMI, 0>, T8;
4880 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4881 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4882 X86FoldableSchedWrite sched> {
4883 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4884 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4886 "${src2}"#_Src.BroadcastStr#", $src1",
4887 "$src1, ${src2}"#_Src.BroadcastStr,
4888 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4889 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4890 EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4891 Sched<[sched.Folded, sched.ReadAfterFold]>;
4894 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4895 SDNode OpNode,X86VectorVTInfo _Src,
4896 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4897 bit IsCommutable = 0> {
4898 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4899 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4900 "$src2, $src1","$src1, $src2",
4902 (_Src.VT _Src.RC:$src1),
4903 (_Src.VT _Src.RC:$src2))),
4904 IsCommutable, IsCommutable>,
4905 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>;
4906 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4907 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4908 "$src2, $src1", "$src1, $src2",
4909 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4910 (_Src.LdFrag addr:$src2)))>,
4911 EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>,
4912 Sched<[sched.Folded, sched.ReadAfterFold]>;
4915 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4917 let Predicates = [HasBWI] in
4918 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4919 v32i16_info, SchedWriteShuffle.ZMM>,
4920 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4921 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4922 let Predicates = [HasBWI, HasVLX] in {
4923 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4924 v16i16x_info, SchedWriteShuffle.YMM>,
4925 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4926 v16i16x_info, SchedWriteShuffle.YMM>,
4928 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4929 v8i16x_info, SchedWriteShuffle.XMM>,
4930 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4931 v8i16x_info, SchedWriteShuffle.XMM>,
4935 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4937 let Predicates = [HasBWI] in
4938 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4939 SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
4940 let Predicates = [HasBWI, HasVLX] in {
4941 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4942 v32i8x_info, SchedWriteShuffle.YMM>,
4944 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4945 v16i8x_info, SchedWriteShuffle.XMM>,
4950 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4951 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4952 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4953 let Predicates = [HasBWI] in
4954 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4955 _Dst.info512, SchedWriteVecIMul.ZMM,
4956 IsCommutable>, EVEX_V512;
4957 let Predicates = [HasBWI, HasVLX] in {
4958 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4959 _Dst.info256, SchedWriteVecIMul.YMM,
4960 IsCommutable>, EVEX_V256;
4961 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4962 _Dst.info128, SchedWriteVecIMul.XMM,
4963 IsCommutable>, EVEX_V128;
4967 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4968 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4969 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4970 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4972 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4973 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG;
4974 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4975 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
4977 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4978 SchedWriteVecALU, HasBWI, 1>, T8;
4979 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4980 SchedWriteVecALU, HasBWI, 1>;
4981 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4982 SchedWriteVecALU, HasAVX512, 1>, T8;
4983 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4984 SchedWriteVecALU, HasAVX512, 1>, T8;
4986 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4987 SchedWriteVecALU, HasBWI, 1>;
4988 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4989 SchedWriteVecALU, HasBWI, 1>, T8;
4990 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4991 SchedWriteVecALU, HasAVX512, 1>, T8;
4992 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4993 SchedWriteVecALU, HasAVX512, 1>, T8;
4995 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4996 SchedWriteVecALU, HasBWI, 1>, T8;
4997 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4998 SchedWriteVecALU, HasBWI, 1>;
4999 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5000 SchedWriteVecALU, HasAVX512, 1>, T8;
5001 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5002 SchedWriteVecALU, HasAVX512, 1>, T8;
5004 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5005 SchedWriteVecALU, HasBWI, 1>;
5006 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5007 SchedWriteVecALU, HasBWI, 1>, T8;
5008 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5009 SchedWriteVecALU, HasAVX512, 1>, T8;
5010 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5011 SchedWriteVecALU, HasAVX512, 1>, T8;
5013 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512.
5014 let Predicates = [HasDQI, NoVLX, HasEVEX512] in {
5015 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5018 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5019 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5021 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5024 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5028 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5031 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5032 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5034 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5037 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5042 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5043 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5045 (!cast<Instruction>(Instr#"rr")
5046 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5047 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5049 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5051 (!cast<Instruction>(Instr#"rmb")
5052 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5056 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5058 (!cast<Instruction>(Instr#"rr")
5059 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5060 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5062 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5064 (!cast<Instruction>(Instr#"rmb")
5065 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5070 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
5071 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5072 defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5073 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5074 defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5077 //===----------------------------------------------------------------------===//
5078 // AVX-512 Logical Instructions
5079 //===----------------------------------------------------------------------===//
5081 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5082 SchedWriteVecLogic, HasAVX512, 1>;
5083 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5084 SchedWriteVecLogic, HasAVX512, 1>;
5085 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5086 SchedWriteVecLogic, HasAVX512, 1>;
5087 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5088 SchedWriteVecLogic, HasAVX512>;
5090 let Predicates = [HasVLX] in {
5091 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5092 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5093 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5094 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5096 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5097 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5098 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5099 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5101 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5102 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5103 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5104 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5106 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5107 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5108 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5109 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5111 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5112 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5113 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5114 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5116 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5117 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5118 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5119 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5121 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5122 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5123 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5124 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5126 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5127 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5128 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5129 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5131 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5132 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5133 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5134 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5136 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5137 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5138 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5139 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5141 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5142 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5143 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5144 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5146 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5147 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5148 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5149 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5151 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5152 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5153 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5154 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5156 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5157 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5158 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5159 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5161 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5162 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5163 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5164 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5166 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5167 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5168 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5169 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5172 let Predicates = [HasAVX512] in {
5173 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5174 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5175 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5176 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5178 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5179 (VPORQZrr VR512:$src1, VR512:$src2)>;
5180 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5181 (VPORQZrr VR512:$src1, VR512:$src2)>;
5183 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5184 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5185 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5186 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5188 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5189 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5190 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5191 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5193 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5194 (VPANDQZrm VR512:$src1, addr:$src2)>;
5195 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5196 (VPANDQZrm VR512:$src1, addr:$src2)>;
5198 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5199 (VPORQZrm VR512:$src1, addr:$src2)>;
5200 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5201 (VPORQZrm VR512:$src1, addr:$src2)>;
5203 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5204 (VPXORQZrm VR512:$src1, addr:$src2)>;
5205 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5206 (VPXORQZrm VR512:$src1, addr:$src2)>;
5208 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5209 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5210 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5211 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5214 // Patterns to catch vselect with different type than logic op.
5215 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5217 X86VectorVTInfo IntInfo> {
5218 // Masked register-register logical operations.
5219 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5220 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5222 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5223 _.RC:$src1, _.RC:$src2)>;
5225 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5226 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5228 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5231 // Masked register-memory logical operations.
5232 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5233 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5234 (load addr:$src2)))),
5236 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5237 _.RC:$src1, addr:$src2)>;
5238 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5239 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5240 (load addr:$src2)))),
5242 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5246 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5248 X86VectorVTInfo IntInfo> {
5249 // Register-broadcast logical operations.
5250 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5252 (IntInfo.VT (OpNode _.RC:$src1,
5253 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5255 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5256 _.RC:$src1, addr:$src2)>;
5257 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5259 (IntInfo.VT (OpNode _.RC:$src1,
5260 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5262 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5263 _.RC:$src1, addr:$src2)>;
5266 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5267 AVX512VLVectorVTInfo SelectInfo,
5268 AVX512VLVectorVTInfo IntInfo> {
5269 let Predicates = [HasVLX] in {
5270 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5272 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5275 let Predicates = [HasAVX512] in {
5276 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5281 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5282 AVX512VLVectorVTInfo SelectInfo,
5283 AVX512VLVectorVTInfo IntInfo> {
5284 let Predicates = [HasVLX] in {
5285 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5286 SelectInfo.info128, IntInfo.info128>;
5287 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5288 SelectInfo.info256, IntInfo.info256>;
5290 let Predicates = [HasAVX512] in {
5291 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5292 SelectInfo.info512, IntInfo.info512>;
5296 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5297 // i64 vselect with i32/i16/i8 logic op
5298 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5300 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5302 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5305 // i32 vselect with i64/i16/i8 logic op
5306 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5308 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5310 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5313 // f32 vselect with i64/i32/i16/i8 logic op
5314 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5316 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5318 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5320 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5323 // f64 vselect with i64/i32/i16/i8 logic op
5324 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5326 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5328 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5330 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5333 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5336 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5341 defm : avx512_logical_lowering_types<"VPAND", and>;
5342 defm : avx512_logical_lowering_types<"VPOR", or>;
5343 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5344 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5346 //===----------------------------------------------------------------------===//
5347 // AVX-512 FP arithmetic
5348 //===----------------------------------------------------------------------===//
5350 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5351 SDPatternOperator OpNode, SDNode VecNode,
5352 X86FoldableSchedWrite sched, bit IsCommutable> {
5353 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5354 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5355 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5356 "$src2, $src1", "$src1, $src2",
5357 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5360 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5361 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5362 "$src2, $src1", "$src1, $src2",
5363 (_.VT (VecNode _.RC:$src1,
5364 (_.ScalarIntMemFrags addr:$src2)))>,
5365 Sched<[sched.Folded, sched.ReadAfterFold]>;
5366 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5367 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5368 (ins _.FRC:$src1, _.FRC:$src2),
5369 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5370 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5372 let isCommutable = IsCommutable;
5374 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5375 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5376 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5377 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5378 (_.ScalarLdFrag addr:$src2)))]>,
5379 Sched<[sched.Folded, sched.ReadAfterFold]>;
5384 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5385 SDNode VecNode, X86FoldableSchedWrite sched> {
5386 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5387 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5388 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5389 "$rc, $src2, $src1", "$src1, $src2, $rc",
5390 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5392 EVEX_B, EVEX_RC, Sched<[sched]>;
5394 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5395 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5396 X86FoldableSchedWrite sched, bit IsCommutable> {
5397 let ExeDomain = _.ExeDomain in {
5398 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5399 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5400 "$src2, $src1", "$src1, $src2",
5401 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5402 Sched<[sched]>, SIMD_EXC;
5404 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5405 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5406 "$src2, $src1", "$src1, $src2",
5407 (_.VT (VecNode _.RC:$src1,
5408 (_.ScalarIntMemFrags addr:$src2)))>,
5409 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5411 let isCodeGenOnly = 1, Predicates = [HasAVX512],
5412 Uses = [MXCSR], mayRaiseFPException = 1 in {
5413 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5414 (ins _.FRC:$src1, _.FRC:$src2),
5415 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5416 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5418 let isCommutable = IsCommutable;
5420 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5421 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5422 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5423 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5424 (_.ScalarLdFrag addr:$src2)))]>,
5425 Sched<[sched.Folded, sched.ReadAfterFold]>;
5428 let Uses = [MXCSR] in
5429 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5430 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5431 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5432 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5433 EVEX_B, Sched<[sched]>;
5437 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5438 SDNode VecNode, SDNode RndNode,
5439 X86SchedWriteSizes sched, bit IsCommutable> {
5440 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5441 sched.PS.Scl, IsCommutable>,
5442 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5444 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5445 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5446 sched.PD.Scl, IsCommutable>,
5447 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5449 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5450 let Predicates = [HasFP16] in
5451 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5452 VecNode, sched.PH.Scl, IsCommutable>,
5453 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5455 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5458 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5459 SDNode VecNode, SDNode SaeNode,
5460 X86SchedWriteSizes sched, bit IsCommutable> {
5461 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5462 VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5463 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5464 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5465 VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5466 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5467 let Predicates = [HasFP16] in {
5468 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5469 VecNode, SaeNode, sched.PH.Scl, IsCommutable>,
5470 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5473 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5474 SchedWriteFAddSizes, 1>;
5475 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5476 SchedWriteFMulSizes, 1>;
5477 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5478 SchedWriteFAddSizes, 0>;
5479 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5480 SchedWriteFDivSizes, 0>;
5481 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5482 SchedWriteFCmpSizes, 0>;
5483 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5484 SchedWriteFCmpSizes, 0>;
5486 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5487 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5488 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5489 X86VectorVTInfo _, SDNode OpNode,
5490 X86FoldableSchedWrite sched> {
5491 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5492 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5493 (ins _.FRC:$src1, _.FRC:$src2),
5494 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5495 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5497 let isCommutable = 1;
5499 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5500 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5501 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5502 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5503 (_.ScalarLdFrag addr:$src2)))]>,
5504 Sched<[sched.Folded, sched.ReadAfterFold]>;
5507 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5508 SchedWriteFCmp.Scl>, TB, XS,
5509 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5511 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5512 SchedWriteFCmp.Scl>, TB, XD,
5513 REX_W, EVEX, VVVV, VEX_LIG,
5514 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5516 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5517 SchedWriteFCmp.Scl>, TB, XS,
5518 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5520 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5521 SchedWriteFCmp.Scl>, TB, XD,
5522 REX_W, EVEX, VVVV, VEX_LIG,
5523 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5525 defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5526 SchedWriteFCmp.Scl>, T_MAP5, XS,
5527 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5529 defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5530 SchedWriteFCmp.Scl>, T_MAP5, XS,
5531 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5533 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5534 SDPatternOperator MaskOpNode,
5535 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5537 bit IsKCommutable = IsCommutable,
5538 string suffix = _.Suffix,
5539 string ClobberConstraint = "",
5540 bit MayRaiseFPException = 1> {
5541 let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5542 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5543 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5544 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5545 "$src2, $src1", "$src1, $src2",
5546 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5547 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5548 IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>;
5549 let mayLoad = 1 in {
5550 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5551 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5552 "$src2, $src1", "$src1, $src2",
5553 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5554 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5555 ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5556 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5557 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5558 "${src2}"#_.BroadcastStr#", $src1",
5559 "$src1, ${src2}"#_.BroadcastStr,
5560 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5561 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5562 ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5567 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5568 SDPatternOperator OpNodeRnd,
5569 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5570 string suffix = _.Suffix,
5571 string ClobberConstraint = ""> {
5572 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5573 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5574 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5575 "$rc, $src2, $src1", "$src1, $src2, $rc",
5576 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5577 0, 0, 0, vselect_mask, ClobberConstraint>,
5578 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
5581 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5582 SDPatternOperator OpNodeSAE,
5583 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5584 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5585 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5586 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5587 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5588 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5589 EVEX, VVVV, EVEX_B, Sched<[sched]>;
5592 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5593 SDPatternOperator MaskOpNode,
5594 Predicate prd, X86SchedWriteSizes sched,
5595 bit IsCommutable = 0,
5596 bit IsPD128Commutable = IsCommutable> {
5597 let Predicates = [prd] in {
5598 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5599 sched.PS.ZMM, IsCommutable>, EVEX_V512, TB,
5600 EVEX_CD8<32, CD8VF>;
5601 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5602 sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W,
5603 EVEX_CD8<64, CD8VF>;
5606 // Define only if AVX512VL feature is present.
5607 let Predicates = [prd, HasVLX] in {
5608 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5609 sched.PS.XMM, IsCommutable>, EVEX_V128, TB,
5610 EVEX_CD8<32, CD8VF>;
5611 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5612 sched.PS.YMM, IsCommutable>, EVEX_V256, TB,
5613 EVEX_CD8<32, CD8VF>;
5614 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5615 sched.PD.XMM, IsPD128Commutable,
5616 IsCommutable>, EVEX_V128, TB, PD, REX_W,
5617 EVEX_CD8<64, CD8VF>;
5618 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5619 sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W,
5620 EVEX_CD8<64, CD8VF>;
5624 multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5625 SDPatternOperator MaskOpNode,
5626 X86SchedWriteSizes sched, bit IsCommutable = 0> {
5627 let Predicates = [HasFP16] in {
5628 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5629 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5,
5630 EVEX_CD8<16, CD8VF>;
5632 let Predicates = [HasVLX, HasFP16] in {
5633 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5634 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5,
5635 EVEX_CD8<16, CD8VF>;
5636 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5637 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5,
5638 EVEX_CD8<16, CD8VF>;
5642 let Uses = [MXCSR] in
5643 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5644 X86SchedWriteSizes sched> {
5645 let Predicates = [HasFP16] in {
5646 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5648 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5650 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5652 EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5653 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5655 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5658 let Uses = [MXCSR] in
5659 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5660 X86SchedWriteSizes sched> {
5661 let Predicates = [HasFP16] in {
5662 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5664 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5666 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5668 EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5669 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5671 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5674 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5675 SchedWriteFAddSizes, 1>,
5676 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5677 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5678 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5679 SchedWriteFMulSizes, 1>,
5680 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5681 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5682 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5683 SchedWriteFAddSizes>,
5684 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5685 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5686 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5687 SchedWriteFDivSizes>,
5688 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5689 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5690 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5691 SchedWriteFCmpSizes, 0>,
5692 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5693 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5694 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5695 SchedWriteFCmpSizes, 0>,
5696 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5697 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5698 let isCodeGenOnly = 1 in {
5699 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5700 SchedWriteFCmpSizes, 1>,
5701 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5702 SchedWriteFCmpSizes, 1>;
5703 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5704 SchedWriteFCmpSizes, 1>,
5705 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5706 SchedWriteFCmpSizes, 1>;
5708 let Uses = []<Register>, mayRaiseFPException = 0 in {
5709 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5710 SchedWriteFLogicSizes, 1>;
5711 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5712 SchedWriteFLogicSizes, 0>;
5713 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5714 SchedWriteFLogicSizes, 1>;
5715 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5716 SchedWriteFLogicSizes, 1>;
5719 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5720 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5721 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5722 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5723 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5724 "$src2, $src1", "$src1, $src2",
5725 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5726 EVEX, VVVV, Sched<[sched]>;
5727 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5728 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5729 "$src2, $src1", "$src1, $src2",
5730 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5731 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5732 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5733 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5734 "${src2}"#_.BroadcastStr#", $src1",
5735 "$src1, ${src2}"#_.BroadcastStr,
5736 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5737 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5741 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5742 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5743 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5744 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5745 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5746 "$src2, $src1", "$src1, $src2",
5747 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5749 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5750 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5751 "$src2, $src1", "$src1, $src2",
5752 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5753 Sched<[sched.Folded, sched.ReadAfterFold]>;
5757 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5758 X86SchedWriteWidths sched> {
5759 let Predicates = [HasFP16] in {
5760 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5761 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5762 EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
5763 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5764 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5765 EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>;
5767 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5768 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5769 EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD;
5770 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5771 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5772 EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5773 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5774 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5775 X86scalefsRnd, sched.Scl>,
5776 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD;
5777 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5778 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5779 X86scalefsRnd, sched.Scl>,
5780 EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD;
5782 // Define only if AVX512VL feature is present.
5783 let Predicates = [HasVLX] in {
5784 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5785 EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD;
5786 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5787 EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD;
5788 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5789 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5790 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5791 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5794 let Predicates = [HasFP16, HasVLX] in {
5795 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
5796 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5797 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
5798 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5801 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>;
5803 //===----------------------------------------------------------------------===//
5804 // AVX-512 VPTESTM instructions
5805 //===----------------------------------------------------------------------===//
5807 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5808 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5809 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5810 // There are just too many permutations due to commutability and bitcasts.
5811 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5812 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5813 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5814 "$src2, $src1", "$src1, $src2",
5815 (null_frag), (null_frag), 1>,
5816 EVEX, VVVV, Sched<[sched]>;
5818 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5819 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5820 "$src2, $src1", "$src1, $src2",
5821 (null_frag), (null_frag)>,
5822 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5823 Sched<[sched.Folded, sched.ReadAfterFold]>;
5827 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5828 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5829 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5830 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5831 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5832 "${src2}"#_.BroadcastStr#", $src1",
5833 "$src1, ${src2}"#_.BroadcastStr,
5834 (null_frag), (null_frag)>,
5835 EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5836 Sched<[sched.Folded, sched.ReadAfterFold]>;
5839 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5840 X86SchedWriteWidths sched,
5841 AVX512VLVectorVTInfo _> {
5842 let Predicates = [HasAVX512] in
5843 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
5844 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5846 let Predicates = [HasAVX512, HasVLX] in {
5847 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
5848 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5849 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
5850 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5854 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5855 X86SchedWriteWidths sched> {
5856 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5858 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5859 avx512vl_i64_info>, REX_W;
5862 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5863 X86SchedWriteWidths sched> {
5864 let Predicates = [HasBWI] in {
5865 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5866 v32i16_info>, EVEX_V512, REX_W;
5867 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5868 v64i8_info>, EVEX_V512;
5871 let Predicates = [HasVLX, HasBWI] in {
5872 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5873 v16i16x_info>, EVEX_V256, REX_W;
5874 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5875 v8i16x_info>, EVEX_V128, REX_W;
5876 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5877 v32i8x_info>, EVEX_V256;
5878 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5879 v16i8x_info>, EVEX_V128;
5883 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5884 X86SchedWriteWidths sched> :
5885 avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5886 avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5888 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5889 SchedWriteVecLogic>, T8, PD;
5890 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5891 SchedWriteVecLogic>, T8, XS;
5893 //===----------------------------------------------------------------------===//
5894 // AVX-512 Shift instructions
5895 //===----------------------------------------------------------------------===//
5897 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5898 string OpcodeStr, SDNode OpNode,
5899 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5900 let ExeDomain = _.ExeDomain in {
5901 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5902 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5903 "$src2, $src1", "$src1, $src2",
5904 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5906 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5907 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5908 "$src2, $src1", "$src1, $src2",
5909 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5911 Sched<[sched.Folded]>;
5915 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5916 string OpcodeStr, SDNode OpNode,
5917 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5918 let ExeDomain = _.ExeDomain in
5919 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5920 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5921 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5922 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5923 EVEX_B, Sched<[sched.Folded]>;
5926 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5927 X86FoldableSchedWrite sched, ValueType SrcVT,
5928 X86VectorVTInfo _> {
5929 // src2 is always 128-bit
5930 let ExeDomain = _.ExeDomain in {
5931 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5932 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5933 "$src2, $src1", "$src1, $src2",
5934 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5935 AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
5936 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5937 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5938 "$src2, $src1", "$src1, $src2",
5939 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5941 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5945 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5946 X86SchedWriteWidths sched, ValueType SrcVT,
5947 AVX512VLVectorVTInfo VTInfo,
5949 let Predicates = [prd] in
5950 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5951 VTInfo.info512>, EVEX_V512,
5952 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5953 let Predicates = [prd, HasVLX] in {
5954 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5955 VTInfo.info256>, EVEX_V256,
5956 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5957 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5958 VTInfo.info128>, EVEX_V128,
5959 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5963 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5964 string OpcodeStr, SDNode OpNode,
5965 X86SchedWriteWidths sched> {
5966 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5967 avx512vl_i32_info, HasAVX512>;
5968 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5969 avx512vl_i64_info, HasAVX512>, REX_W;
5970 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5971 avx512vl_i16_info, HasBWI>;
5974 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5975 string OpcodeStr, SDNode OpNode,
5976 X86SchedWriteWidths sched,
5977 AVX512VLVectorVTInfo VTInfo> {
5978 let Predicates = [HasAVX512] in
5979 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5980 sched.ZMM, VTInfo.info512>,
5981 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5982 VTInfo.info512>, EVEX_V512;
5983 let Predicates = [HasAVX512, HasVLX] in {
5984 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5985 sched.YMM, VTInfo.info256>,
5986 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5987 VTInfo.info256>, EVEX_V256;
5988 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5989 sched.XMM, VTInfo.info128>,
5990 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5991 VTInfo.info128>, EVEX_V128;
5995 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5996 string OpcodeStr, SDNode OpNode,
5997 X86SchedWriteWidths sched> {
5998 let Predicates = [HasBWI] in
5999 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6000 sched.ZMM, v32i16_info>, EVEX_V512, WIG;
6001 let Predicates = [HasVLX, HasBWI] in {
6002 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6003 sched.YMM, v16i16x_info>, EVEX_V256, WIG;
6004 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6005 sched.XMM, v8i16x_info>, EVEX_V128, WIG;
6009 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6010 Format ImmFormR, Format ImmFormM,
6011 string OpcodeStr, SDNode OpNode,
6012 X86SchedWriteWidths sched> {
6013 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6014 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6015 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6016 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
6019 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6020 SchedWriteVecShiftImm>,
6021 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6022 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6024 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6025 SchedWriteVecShiftImm>,
6026 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6027 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6029 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6030 SchedWriteVecShiftImm>,
6031 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6032 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6034 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6035 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6036 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6037 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6039 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6040 SchedWriteVecShift>;
6041 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6042 SchedWriteVecShift>;
6043 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6044 SchedWriteVecShift>;
6046 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6047 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6048 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6049 (EXTRACT_SUBREG (v8i64
6051 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6052 VR128X:$src2)), sub_ymm)>;
6054 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6055 (EXTRACT_SUBREG (v8i64
6057 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6058 VR128X:$src2)), sub_xmm)>;
6060 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6061 (EXTRACT_SUBREG (v8i64
6063 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6064 timm:$src2)), sub_ymm)>;
6066 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6067 (EXTRACT_SUBREG (v8i64
6069 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6070 timm:$src2)), sub_xmm)>;
6073 //===-------------------------------------------------------------------===//
6074 // Variable Bit Shifts
6075 //===-------------------------------------------------------------------===//
6077 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6078 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6079 let ExeDomain = _.ExeDomain in {
6080 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6081 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6082 "$src2, $src1", "$src1, $src2",
6083 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6084 AVX5128IBase, EVEX, VVVV, Sched<[sched]>;
6085 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6086 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6087 "$src2, $src1", "$src1, $src2",
6088 (_.VT (OpNode _.RC:$src1,
6089 (_.VT (_.LdFrag addr:$src2))))>,
6090 AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6091 Sched<[sched.Folded, sched.ReadAfterFold]>;
6095 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6096 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6097 let ExeDomain = _.ExeDomain in
6098 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6099 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6100 "${src2}"#_.BroadcastStr#", $src1",
6101 "$src1, ${src2}"#_.BroadcastStr,
6102 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6103 AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6104 Sched<[sched.Folded, sched.ReadAfterFold]>;
6107 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6108 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6109 let Predicates = [HasAVX512] in
6110 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6111 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6113 let Predicates = [HasAVX512, HasVLX] in {
6114 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6115 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6116 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6117 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6121 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6122 SDNode OpNode, X86SchedWriteWidths sched> {
6123 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6125 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6126 avx512vl_i64_info>, REX_W;
6129 // Use 512bit version to implement 128/256 bit in case NoVLX.
6130 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6131 SDNode OpNode, list<Predicate> p> {
6132 let Predicates = p in {
6133 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6134 (_.info256.VT _.info256.RC:$src2))),
6136 (!cast<Instruction>(OpcodeStr#"Zrr")
6137 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6138 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6141 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6142 (_.info128.VT _.info128.RC:$src2))),
6144 (!cast<Instruction>(OpcodeStr#"Zrr")
6145 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6146 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6150 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6151 SDNode OpNode, X86SchedWriteWidths sched> {
6152 let Predicates = [HasBWI] in
6153 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6155 let Predicates = [HasVLX, HasBWI] in {
6157 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6159 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6164 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6165 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6167 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6168 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6170 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6171 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6173 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6174 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6176 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>;
6177 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>;
6178 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>;
6179 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>;
6182 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6183 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6184 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6185 (EXTRACT_SUBREG (v8i64
6187 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6188 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6190 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6191 (EXTRACT_SUBREG (v8i64
6193 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6194 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6197 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6198 (EXTRACT_SUBREG (v16i32
6200 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6201 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6203 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6204 (EXTRACT_SUBREG (v16i32
6206 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6207 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6210 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6211 (EXTRACT_SUBREG (v8i64
6213 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6214 timm:$src2)), sub_xmm)>;
6215 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6216 (EXTRACT_SUBREG (v8i64
6218 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6219 timm:$src2)), sub_ymm)>;
6221 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6222 (EXTRACT_SUBREG (v16i32
6224 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6225 timm:$src2)), sub_xmm)>;
6226 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6227 (EXTRACT_SUBREG (v16i32
6229 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6230 timm:$src2)), sub_ymm)>;
6233 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6234 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6235 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6236 (EXTRACT_SUBREG (v8i64
6238 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6239 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6241 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6242 (EXTRACT_SUBREG (v8i64
6244 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6245 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6248 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6249 (EXTRACT_SUBREG (v16i32
6251 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6252 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6254 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6255 (EXTRACT_SUBREG (v16i32
6257 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6258 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6261 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6262 (EXTRACT_SUBREG (v8i64
6264 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6265 timm:$src2)), sub_xmm)>;
6266 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6267 (EXTRACT_SUBREG (v8i64
6269 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6270 timm:$src2)), sub_ymm)>;
6272 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6273 (EXTRACT_SUBREG (v16i32
6275 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6276 timm:$src2)), sub_xmm)>;
6277 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6278 (EXTRACT_SUBREG (v16i32
6280 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6281 timm:$src2)), sub_ymm)>;
6284 //===-------------------------------------------------------------------===//
6285 // 1-src variable permutation VPERMW/D/Q
6286 //===-------------------------------------------------------------------===//
6288 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6289 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6290 let Predicates = [HasAVX512] in
6291 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6292 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6294 let Predicates = [HasAVX512, HasVLX] in
6295 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6296 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6299 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6300 string OpcodeStr, SDNode OpNode,
6301 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6302 let Predicates = [HasAVX512] in
6303 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6304 sched, VTInfo.info512>,
6305 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6306 sched, VTInfo.info512>, EVEX_V512;
6307 let Predicates = [HasAVX512, HasVLX] in
6308 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6309 sched, VTInfo.info256>,
6310 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6311 sched, VTInfo.info256>, EVEX_V256;
6314 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6315 Predicate prd, SDNode OpNode,
6316 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6317 let Predicates = [prd] in
6318 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6320 let Predicates = [HasVLX, prd] in {
6321 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6323 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6328 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6329 WriteVarShuffle256, avx512vl_i16_info>, REX_W;
6330 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6331 WriteVarShuffle256, avx512vl_i8_info>;
6333 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6334 WriteVarShuffle256, avx512vl_i32_info>;
6335 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6336 WriteVarShuffle256, avx512vl_i64_info>, REX_W;
6337 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6338 WriteFVarShuffle256, avx512vl_f32_info>;
6339 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6340 WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
6342 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6343 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6344 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6345 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6346 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6347 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6349 //===----------------------------------------------------------------------===//
6350 // AVX-512 - VPERMIL
6351 //===----------------------------------------------------------------------===//
6353 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6354 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6355 X86VectorVTInfo Ctrl> {
6356 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6357 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6358 "$src2, $src1", "$src1, $src2",
6359 (_.VT (OpNode _.RC:$src1,
6360 (Ctrl.VT Ctrl.RC:$src2)))>,
6361 T8, PD, EVEX, VVVV, Sched<[sched]>;
6362 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6363 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6364 "$src2, $src1", "$src1, $src2",
6367 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6368 T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6369 Sched<[sched.Folded, sched.ReadAfterFold]>;
6370 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6371 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6372 "${src2}"#_.BroadcastStr#", $src1",
6373 "$src1, ${src2}"#_.BroadcastStr,
6376 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6377 T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6378 Sched<[sched.Folded, sched.ReadAfterFold]>;
6381 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6382 X86SchedWriteWidths sched,
6383 AVX512VLVectorVTInfo _,
6384 AVX512VLVectorVTInfo Ctrl> {
6385 let Predicates = [HasAVX512] in {
6386 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6387 _.info512, Ctrl.info512>, EVEX_V512;
6389 let Predicates = [HasAVX512, HasVLX] in {
6390 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6391 _.info128, Ctrl.info128>, EVEX_V128;
6392 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6393 _.info256, Ctrl.info256>, EVEX_V256;
6397 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6398 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6399 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6401 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6402 X86VPermilpi, SchedWriteFShuffle, _>,
6403 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6406 let ExeDomain = SSEPackedSingle in
6407 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6409 let ExeDomain = SSEPackedDouble in
6410 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6411 avx512vl_i64_info>, REX_W;
6413 //===----------------------------------------------------------------------===//
6414 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6415 //===----------------------------------------------------------------------===//
6417 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6418 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6419 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6420 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6421 X86PShufhw, SchedWriteShuffle>,
6422 EVEX, AVX512XSIi8Base;
6423 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6424 X86PShuflw, SchedWriteShuffle>,
6425 EVEX, AVX512XDIi8Base;
6427 //===----------------------------------------------------------------------===//
6428 // AVX-512 - VPSHUFB
6429 //===----------------------------------------------------------------------===//
6431 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6432 X86SchedWriteWidths sched> {
6433 let Predicates = [HasBWI] in
6434 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6437 let Predicates = [HasVLX, HasBWI] in {
6438 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6440 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6445 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6446 SchedWriteVarShuffle>, WIG;
6448 //===----------------------------------------------------------------------===//
6449 // Move Low to High and High to Low packed FP Instructions
6450 //===----------------------------------------------------------------------===//
6452 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6453 (ins VR128X:$src1, VR128X:$src2),
6454 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6455 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6456 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6457 let isCommutable = 1 in
6458 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6459 (ins VR128X:$src1, VR128X:$src2),
6460 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6461 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6462 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6464 //===----------------------------------------------------------------------===//
6465 // VMOVHPS/PD VMOVLPS Instructions
6466 // All patterns was taken from SSS implementation.
6467 //===----------------------------------------------------------------------===//
6469 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6470 SDPatternOperator OpNode,
6471 X86VectorVTInfo _> {
6472 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6473 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6474 (ins _.RC:$src1, f64mem:$src2),
6475 !strconcat(OpcodeStr,
6476 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6480 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6481 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV;
6484 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6485 // SSE1. And MOVLPS pattern is even more complex.
6486 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6487 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6488 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6489 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6490 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6491 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6492 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6493 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6495 let Predicates = [HasAVX512] in {
6497 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6498 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6501 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6502 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6505 let SchedRW = [WriteFStore] in {
6506 let mayStore = 1, hasSideEffects = 0 in
6507 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6508 (ins f64mem:$dst, VR128X:$src),
6509 "vmovhps\t{$src, $dst|$dst, $src}",
6510 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6511 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6512 (ins f64mem:$dst, VR128X:$src),
6513 "vmovhpd\t{$src, $dst|$dst, $src}",
6514 [(store (f64 (extractelt
6515 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6516 (iPTR 0))), addr:$dst)]>,
6517 EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6518 let mayStore = 1, hasSideEffects = 0 in
6519 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6520 (ins f64mem:$dst, VR128X:$src),
6521 "vmovlps\t{$src, $dst|$dst, $src}",
6522 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6523 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6524 (ins f64mem:$dst, VR128X:$src),
6525 "vmovlpd\t{$src, $dst|$dst, $src}",
6526 [(store (f64 (extractelt (v2f64 VR128X:$src),
6527 (iPTR 0))), addr:$dst)]>,
6528 EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6531 let Predicates = [HasAVX512] in {
6533 def : Pat<(store (f64 (extractelt
6534 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6535 (iPTR 0))), addr:$dst),
6536 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6538 //===----------------------------------------------------------------------===//
6539 // FMA - Fused Multiply Operations
6542 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6543 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6544 X86VectorVTInfo _> {
6545 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6546 Uses = [MXCSR], mayRaiseFPException = 1 in {
6547 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6548 (ins _.RC:$src2, _.RC:$src3),
6549 OpcodeStr, "$src3, $src2", "$src2, $src3",
6550 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6551 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6552 EVEX, VVVV, Sched<[sched]>;
6554 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6555 (ins _.RC:$src2, _.MemOp:$src3),
6556 OpcodeStr, "$src3, $src2", "$src2, $src3",
6557 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6558 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6559 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6560 sched.ReadAfterFold]>;
6562 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6563 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6564 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6565 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6567 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6568 (MaskOpNode _.RC:$src2,
6569 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6570 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6571 sched.ReadAfterFold]>;
6575 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6576 X86FoldableSchedWrite sched,
6577 X86VectorVTInfo _> {
6578 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6580 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6581 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6582 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6583 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6584 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6585 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6588 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6589 SDNode MaskOpNode, SDNode OpNodeRnd,
6590 X86SchedWriteWidths sched,
6591 AVX512VLVectorVTInfo _,
6592 Predicate prd = HasAVX512> {
6593 let Predicates = [prd] in {
6594 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6595 sched.ZMM, _.info512>,
6596 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6598 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6600 let Predicates = [HasVLX, prd] in {
6601 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6602 sched.YMM, _.info256>,
6603 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6604 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6605 sched.XMM, _.info128>,
6606 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6610 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6611 SDNode MaskOpNode, SDNode OpNodeRnd> {
6612 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6613 OpNodeRnd, SchedWriteFMA,
6614 avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6615 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6616 OpNodeRnd, SchedWriteFMA,
6617 avx512vl_f32_info>, T8, PD;
6618 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6619 OpNodeRnd, SchedWriteFMA,
6620 avx512vl_f64_info>, T8, PD, REX_W;
6623 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6625 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6626 X86Fmsub, X86FmsubRnd>;
6627 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6628 X86Fmaddsub, X86FmaddsubRnd>;
6629 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6630 X86Fmsubadd, X86FmsubaddRnd>;
6631 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6632 X86Fnmadd, X86FnmaddRnd>;
6633 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6634 X86Fnmsub, X86FnmsubRnd>;
6637 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6638 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6639 X86VectorVTInfo _> {
6640 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6641 Uses = [MXCSR], mayRaiseFPException = 1 in {
6642 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6643 (ins _.RC:$src2, _.RC:$src3),
6644 OpcodeStr, "$src3, $src2", "$src2, $src3",
6646 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6647 EVEX, VVVV, Sched<[sched]>;
6649 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6650 (ins _.RC:$src2, _.MemOp:$src3),
6651 OpcodeStr, "$src3, $src2", "$src2, $src3",
6652 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6653 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6654 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6655 sched.ReadAfterFold]>;
6657 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6658 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6659 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6660 "$src2, ${src3}"#_.BroadcastStr,
6661 (_.VT (OpNode _.RC:$src2,
6662 (_.VT (_.BroadcastLdFrag addr:$src3)),
6664 (_.VT (MaskOpNode _.RC:$src2,
6665 (_.VT (_.BroadcastLdFrag addr:$src3)),
6666 _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B,
6667 Sched<[sched.Folded, sched.ReadAfterFold,
6668 sched.ReadAfterFold]>;
6672 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6673 X86FoldableSchedWrite sched,
6674 X86VectorVTInfo _> {
6675 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6677 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6678 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6679 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6681 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6682 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6685 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6686 SDNode MaskOpNode, SDNode OpNodeRnd,
6687 X86SchedWriteWidths sched,
6688 AVX512VLVectorVTInfo _,
6689 Predicate prd = HasAVX512> {
6690 let Predicates = [prd] in {
6691 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6692 sched.ZMM, _.info512>,
6693 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6695 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6697 let Predicates = [HasVLX, prd] in {
6698 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6699 sched.YMM, _.info256>,
6700 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6701 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6702 sched.XMM, _.info128>,
6703 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6707 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6708 SDNode MaskOpNode, SDNode OpNodeRnd > {
6709 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6710 OpNodeRnd, SchedWriteFMA,
6711 avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6712 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6713 OpNodeRnd, SchedWriteFMA,
6714 avx512vl_f32_info>, T8, PD;
6715 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6716 OpNodeRnd, SchedWriteFMA,
6717 avx512vl_f64_info>, T8, PD, REX_W;
6720 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6722 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6723 X86Fmsub, X86FmsubRnd>;
6724 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6725 X86Fmaddsub, X86FmaddsubRnd>;
6726 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6727 X86Fmsubadd, X86FmsubaddRnd>;
6728 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6729 X86Fnmadd, X86FnmaddRnd>;
6730 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6731 X86Fnmsub, X86FnmsubRnd>;
6733 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6734 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6735 X86VectorVTInfo _> {
6736 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6737 Uses = [MXCSR], mayRaiseFPException = 1 in {
6738 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6739 (ins _.RC:$src2, _.RC:$src3),
6740 OpcodeStr, "$src3, $src2", "$src2, $src3",
6742 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6743 EVEX, VVVV, Sched<[sched]>;
6745 // Pattern is 312 order so that the load is in a different place from the
6746 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6747 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6748 (ins _.RC:$src2, _.MemOp:$src3),
6749 OpcodeStr, "$src3, $src2", "$src2, $src3",
6750 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6751 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6752 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6753 sched.ReadAfterFold]>;
6755 // Pattern is 312 order so that the load is in a different place from the
6756 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6757 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6758 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6759 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6760 "$src2, ${src3}"#_.BroadcastStr,
6761 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6762 _.RC:$src1, _.RC:$src2)),
6763 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6764 _.RC:$src1, _.RC:$src2)), 1, 0>,
6765 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6766 sched.ReadAfterFold]>;
6770 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6771 X86FoldableSchedWrite sched,
6772 X86VectorVTInfo _> {
6773 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6775 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6776 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6777 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6779 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6780 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6783 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6784 SDNode MaskOpNode, SDNode OpNodeRnd,
6785 X86SchedWriteWidths sched,
6786 AVX512VLVectorVTInfo _,
6787 Predicate prd = HasAVX512> {
6788 let Predicates = [prd] in {
6789 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6790 sched.ZMM, _.info512>,
6791 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6793 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6795 let Predicates = [HasVLX, prd] in {
6796 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6797 sched.YMM, _.info256>,
6798 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6799 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6800 sched.XMM, _.info128>,
6801 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6805 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6806 SDNode MaskOpNode, SDNode OpNodeRnd > {
6807 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6808 OpNodeRnd, SchedWriteFMA,
6809 avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6810 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6811 OpNodeRnd, SchedWriteFMA,
6812 avx512vl_f32_info>, T8, PD;
6813 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6814 OpNodeRnd, SchedWriteFMA,
6815 avx512vl_f64_info>, T8, PD, REX_W;
6818 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
6820 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6821 X86Fmsub, X86FmsubRnd>;
6822 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6823 X86Fmaddsub, X86FmaddsubRnd>;
6824 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6825 X86Fmsubadd, X86FmsubaddRnd>;
6826 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6827 X86Fnmadd, X86FnmaddRnd>;
6828 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6829 X86Fnmsub, X86FnmsubRnd>;
6832 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6833 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6834 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6835 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6836 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6837 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6838 EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6841 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6842 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6843 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6844 EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6845 SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6847 let Uses = [MXCSR] in
6848 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6849 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6850 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6851 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6853 let isCodeGenOnly = 1, isCommutable = 1 in {
6854 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6855 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6856 !strconcat(OpcodeStr,
6857 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6858 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC;
6859 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
6860 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6861 !strconcat(OpcodeStr,
6862 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6863 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6864 SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC;
6866 let Uses = [MXCSR] in
6867 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6868 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6869 !strconcat(OpcodeStr,
6870 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6871 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6872 Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV;
6873 }// isCodeGenOnly = 1
6874 }// Constraints = "$src1 = $dst"
6877 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6878 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
6879 X86VectorVTInfo _, string SUFF> {
6880 let ExeDomain = _.ExeDomain in {
6881 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6882 // Operands for intrinsic are in 123 order to preserve passthu
6884 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6886 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6887 (_.ScalarLdFrag addr:$src3)))),
6888 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6889 _.FRC:$src3, (i32 timm:$rc)))), 0>;
6891 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6892 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6894 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6895 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6896 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6897 _.FRC:$src1, (i32 timm:$rc)))), 1>;
6899 // One pattern is 312 order so that the load is in a different place from the
6900 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6901 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6902 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6904 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6905 _.FRC:$src1, _.FRC:$src2))),
6906 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6907 _.FRC:$src2, (i32 timm:$rc)))), 1>;
6911 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6912 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
6913 let Predicates = [HasAVX512] in {
6914 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6915 OpNodeRnd, f32x_info, "SS">,
6916 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD;
6917 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6918 OpNodeRnd, f64x_info, "SD">,
6919 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD;
6921 let Predicates = [HasFP16] in {
6922 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6923 OpNodeRnd, f16x_info, "SH">,
6924 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD;
6928 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
6929 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6930 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6931 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6933 multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
6934 SDNode RndOp, string Prefix,
6935 string Suffix, SDNode Move,
6936 X86VectorVTInfo _, PatLeaf ZeroFP,
6937 Predicate prd = HasAVX512> {
6938 let Predicates = [prd] in {
6939 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6941 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6943 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6944 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6945 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6947 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6948 (Op _.FRC:$src2, _.FRC:$src3,
6949 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6950 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6951 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6952 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6954 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6956 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6957 (_.ScalarLdFrag addr:$src3)))))),
6958 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6959 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6962 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6963 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6964 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6965 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6966 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6969 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6970 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6971 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6972 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6973 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6976 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6977 (X86selects_mask VK1WM:$mask,
6978 (MaskedOp _.FRC:$src2,
6979 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6981 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6982 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6983 VR128X:$src1, VK1WM:$mask,
6984 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6985 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6987 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6988 (X86selects_mask VK1WM:$mask,
6989 (MaskedOp _.FRC:$src2,
6990 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6991 (_.ScalarLdFrag addr:$src3)),
6992 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6993 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6994 VR128X:$src1, VK1WM:$mask,
6995 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6997 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6998 (X86selects_mask VK1WM:$mask,
6999 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7000 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7001 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7002 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7003 VR128X:$src1, VK1WM:$mask,
7004 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7006 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7007 (X86selects_mask VK1WM:$mask,
7008 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7009 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7010 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7011 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7012 VR128X:$src1, VK1WM:$mask,
7013 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7014 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7016 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7017 (X86selects_mask VK1WM:$mask,
7018 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7019 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7020 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7021 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7022 VR128X:$src1, VK1WM:$mask,
7023 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7025 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7026 (X86selects_mask VK1WM:$mask,
7027 (MaskedOp _.FRC:$src2,
7028 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7030 (_.EltVT ZeroFP)))))),
7031 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7032 VR128X:$src1, VK1WM:$mask,
7033 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7034 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7036 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7037 (X86selects_mask VK1WM:$mask,
7038 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7039 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7040 (_.EltVT ZeroFP)))))),
7041 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7042 VR128X:$src1, VK1WM:$mask,
7043 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7044 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7046 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7047 (X86selects_mask VK1WM:$mask,
7048 (MaskedOp _.FRC:$src2,
7049 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7050 (_.ScalarLdFrag addr:$src3)),
7051 (_.EltVT ZeroFP)))))),
7052 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7053 VR128X:$src1, VK1WM:$mask,
7054 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7056 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7057 (X86selects_mask VK1WM:$mask,
7058 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7059 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7060 (_.EltVT ZeroFP)))))),
7061 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7062 VR128X:$src1, VK1WM:$mask,
7063 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7065 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7066 (X86selects_mask VK1WM:$mask,
7067 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7068 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7069 (_.EltVT ZeroFP)))))),
7070 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7071 VR128X:$src1, VK1WM:$mask,
7072 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7074 // Patterns with rounding mode.
7075 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7077 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7078 _.FRC:$src3, (i32 timm:$rc)))))),
7079 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7080 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7081 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7083 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7084 (RndOp _.FRC:$src2, _.FRC:$src3,
7085 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7086 (i32 timm:$rc)))))),
7087 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7088 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7089 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7091 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7092 (X86selects_mask VK1WM:$mask,
7094 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7095 _.FRC:$src3, (i32 timm:$rc)),
7096 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7097 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7098 VR128X:$src1, VK1WM:$mask,
7099 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7100 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7102 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7103 (X86selects_mask VK1WM:$mask,
7104 (RndOp _.FRC:$src2, _.FRC:$src3,
7105 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7107 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7108 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7109 VR128X:$src1, VK1WM:$mask,
7110 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7111 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7113 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7114 (X86selects_mask VK1WM:$mask,
7116 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7117 _.FRC:$src3, (i32 timm:$rc)),
7118 (_.EltVT ZeroFP)))))),
7119 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7120 VR128X:$src1, VK1WM:$mask,
7121 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7122 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7124 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7125 (X86selects_mask VK1WM:$mask,
7126 (RndOp _.FRC:$src2, _.FRC:$src3,
7127 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7129 (_.EltVT ZeroFP)))))),
7130 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7131 VR128X:$src1, VK1WM:$mask,
7132 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7133 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7136 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7137 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7138 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7139 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7140 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7141 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7142 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7143 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7145 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7146 "SS", X86Movss, v4f32x_info, fp32imm0>;
7147 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7148 "SS", X86Movss, v4f32x_info, fp32imm0>;
7149 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7150 "SS", X86Movss, v4f32x_info, fp32imm0>;
7151 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7152 "SS", X86Movss, v4f32x_info, fp32imm0>;
7154 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7155 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7156 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7157 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7158 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7159 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7160 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7161 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7163 //===----------------------------------------------------------------------===//
7164 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7165 //===----------------------------------------------------------------------===//
7166 let Constraints = "$src1 = $dst" in {
7167 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7168 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7169 // NOTE: The SDNode have the multiply operands first with the add last.
7170 // This enables commuted load patterns to be autogenerated by tablegen.
7171 let ExeDomain = _.ExeDomain in {
7172 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7173 (ins _.RC:$src2, _.RC:$src3),
7174 OpcodeStr, "$src3, $src2", "$src2, $src3",
7175 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7176 T8, PD, EVEX, VVVV, Sched<[sched]>;
7178 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7179 (ins _.RC:$src2, _.MemOp:$src3),
7180 OpcodeStr, "$src3, $src2", "$src2, $src3",
7181 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7182 T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
7183 sched.ReadAfterFold]>;
7185 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7186 (ins _.RC:$src2, _.ScalarMemOp:$src3),
7187 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
7188 !strconcat("$src2, ${src3}", _.BroadcastStr ),
7190 (_.VT (_.BroadcastLdFrag addr:$src3)),
7192 T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7193 sched.ReadAfterFold]>;
7196 } // Constraints = "$src1 = $dst"
7198 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7199 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7200 let Predicates = [HasIFMA] in {
7201 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7202 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7204 let Predicates = [HasVLX, HasIFMA] in {
7205 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7206 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7207 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7208 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7212 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7213 SchedWriteVecIMul, avx512vl_i64_info>,
7215 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7216 SchedWriteVecIMul, avx512vl_i64_info>,
7219 //===----------------------------------------------------------------------===//
7220 // AVX-512 Scalar convert from sign integer to float/double
7221 //===----------------------------------------------------------------------===//
7223 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7224 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7225 X86MemOperand x86memop, PatFrag ld_frag, string asm,
7226 string mem, list<Register> _Uses = [MXCSR],
7227 bit _mayRaiseFPException = 1> {
7228 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7229 mayRaiseFPException = _mayRaiseFPException in {
7230 let hasSideEffects = 0, isCodeGenOnly = 1 in {
7231 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7232 (ins DstVT.FRC:$src1, SrcRC:$src),
7233 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7234 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7236 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7237 (ins DstVT.FRC:$src1, x86memop:$src),
7238 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7239 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7240 } // hasSideEffects = 0
7241 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7242 (ins DstVT.RC:$src1, SrcRC:$src2),
7243 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7244 [(set DstVT.RC:$dst,
7245 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7246 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7248 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7249 (ins DstVT.RC:$src1, x86memop:$src2),
7250 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7251 [(set DstVT.RC:$dst,
7252 (OpNode (DstVT.VT DstVT.RC:$src1),
7253 (ld_frag addr:$src2)))]>,
7254 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7256 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7257 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7258 DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7261 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7262 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7263 X86VectorVTInfo DstVT, string asm,
7265 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7266 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7267 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7269 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7270 [(set DstVT.RC:$dst,
7271 (OpNode (DstVT.VT DstVT.RC:$src1),
7274 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7275 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7276 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7277 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7280 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7281 X86FoldableSchedWrite sched,
7282 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7283 X86MemOperand x86memop, PatFrag ld_frag,
7284 string asm, string mem> {
7285 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7286 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7287 ld_frag, asm, mem>, VEX_LIG;
7290 let Predicates = [HasAVX512] in {
7291 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7293 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7294 TB, XS, EVEX_CD8<32, CD8VT1>;
7295 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7297 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7298 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7299 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7300 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7301 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7302 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7304 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7305 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7307 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7308 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7309 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7310 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7312 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7313 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7314 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7315 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7316 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7317 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7318 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7319 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7321 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7322 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7323 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7324 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7325 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7326 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7327 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7328 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7330 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7332 v4f32x_info, i32mem, loadi32,
7333 "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>;
7334 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7336 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7337 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7338 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7339 i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7340 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7341 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7343 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7344 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7346 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7347 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7348 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7349 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7351 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7352 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7353 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7354 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7355 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7356 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7357 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7358 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7360 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7361 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7362 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7363 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7364 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7365 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7366 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7367 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7370 //===----------------------------------------------------------------------===//
7371 // AVX-512 Scalar convert from float/double to integer
7372 //===----------------------------------------------------------------------===//
7374 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7375 X86VectorVTInfo DstVT, SDNode OpNode,
7377 X86FoldableSchedWrite sched, string asm,
7378 string aliasStr, Predicate prd = HasAVX512> {
7379 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7380 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7381 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7382 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7383 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7384 let Uses = [MXCSR] in
7385 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7386 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7387 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7388 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7390 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7391 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7392 [(set DstVT.RC:$dst, (OpNode
7393 (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7394 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7395 } // Predicates = [prd]
7397 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7398 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7399 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7400 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7401 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7402 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7403 SrcVT.IntScalarMemOp:$src), 0, "att">;
7406 // Convert float/double to signed/unsigned int 32/64
7407 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7408 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7409 TB, XS, EVEX_CD8<32, CD8VT1>;
7410 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7411 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7412 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7413 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7414 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7415 TB, XS, EVEX_CD8<32, CD8VT1>;
7416 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7417 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7418 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7419 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7420 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7421 TB, XD, EVEX_CD8<64, CD8VT1>;
7422 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7423 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7424 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7425 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7426 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7427 TB, XD, EVEX_CD8<64, CD8VT1>;
7428 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7429 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7430 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7432 multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7433 X86VectorVTInfo DstVT, SDNode OpNode,
7434 X86FoldableSchedWrite sched> {
7435 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7436 let isCodeGenOnly = 1 in {
7437 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7438 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7439 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7440 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7441 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7442 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7443 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7444 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7446 } // Predicates = [HasAVX512]
7449 defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7450 lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>;
7451 defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7452 llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7453 defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7454 lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>;
7455 defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7456 llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7458 let Predicates = [HasAVX512] in {
7459 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7460 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7462 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7463 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7466 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7467 // which produce unnecessary vmovs{s,d} instructions
7468 let Predicates = [HasAVX512] in {
7469 def : Pat<(v4f32 (X86Movss
7470 (v4f32 VR128X:$dst),
7471 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7472 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7474 def : Pat<(v4f32 (X86Movss
7475 (v4f32 VR128X:$dst),
7476 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7477 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7479 def : Pat<(v4f32 (X86Movss
7480 (v4f32 VR128X:$dst),
7481 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7482 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7484 def : Pat<(v4f32 (X86Movss
7485 (v4f32 VR128X:$dst),
7486 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7487 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7489 def : Pat<(v2f64 (X86Movsd
7490 (v2f64 VR128X:$dst),
7491 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7492 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7494 def : Pat<(v2f64 (X86Movsd
7495 (v2f64 VR128X:$dst),
7496 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7497 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7499 def : Pat<(v2f64 (X86Movsd
7500 (v2f64 VR128X:$dst),
7501 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7502 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7504 def : Pat<(v2f64 (X86Movsd
7505 (v2f64 VR128X:$dst),
7506 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7507 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7509 def : Pat<(v4f32 (X86Movss
7510 (v4f32 VR128X:$dst),
7511 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7512 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7514 def : Pat<(v4f32 (X86Movss
7515 (v4f32 VR128X:$dst),
7516 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7517 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7519 def : Pat<(v4f32 (X86Movss
7520 (v4f32 VR128X:$dst),
7521 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7522 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7524 def : Pat<(v4f32 (X86Movss
7525 (v4f32 VR128X:$dst),
7526 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7527 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7529 def : Pat<(v2f64 (X86Movsd
7530 (v2f64 VR128X:$dst),
7531 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7532 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7534 def : Pat<(v2f64 (X86Movsd
7535 (v2f64 VR128X:$dst),
7536 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7537 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7539 def : Pat<(v2f64 (X86Movsd
7540 (v2f64 VR128X:$dst),
7541 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7542 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7544 def : Pat<(v2f64 (X86Movsd
7545 (v2f64 VR128X:$dst),
7546 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7547 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7548 } // Predicates = [HasAVX512]
7550 // Convert float/double to signed/unsigned int 32/64 with truncation
7551 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7552 X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7553 SDNode OpNodeInt, SDNode OpNodeSAE,
7554 X86FoldableSchedWrite sched, string aliasStr,
7555 Predicate prd = HasAVX512> {
7556 let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7557 let isCodeGenOnly = 1 in {
7558 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7559 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7560 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7561 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7562 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7563 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7564 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7565 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7568 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7569 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7570 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7571 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7572 let Uses = [MXCSR] in
7573 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7574 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7575 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7576 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7577 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7578 (ins _SrcRC.IntScalarMemOp:$src),
7579 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7580 [(set _DstRC.RC:$dst,
7581 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7582 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7583 } // Predicates = [prd]
7585 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7586 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7587 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7588 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7589 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7590 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7591 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7594 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7595 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7596 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7597 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7598 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7599 "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7600 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7601 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7602 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7603 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7604 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7605 "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7607 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7608 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7609 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7610 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7611 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7612 "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>;
7613 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7614 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7615 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7616 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7617 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7618 "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7620 //===----------------------------------------------------------------------===//
7621 // AVX-512 Convert form float to double and back
7622 //===----------------------------------------------------------------------===//
7624 let Uses = [MXCSR], mayRaiseFPException = 1 in
7625 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7626 X86VectorVTInfo _Src, SDNode OpNode,
7627 X86FoldableSchedWrite sched> {
7628 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7629 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7630 "$src2, $src1", "$src1, $src2",
7631 (_.VT (OpNode (_.VT _.RC:$src1),
7632 (_Src.VT _Src.RC:$src2)))>,
7633 EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7634 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7635 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7636 "$src2, $src1", "$src1, $src2",
7637 (_.VT (OpNode (_.VT _.RC:$src1),
7638 (_Src.ScalarIntMemFrags addr:$src2)))>,
7639 EVEX, VVVV, VEX_LIG,
7640 Sched<[sched.Folded, sched.ReadAfterFold]>;
7642 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7643 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7644 (ins _.FRC:$src1, _Src.FRC:$src2),
7645 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7646 EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7648 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7649 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7650 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7651 EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7655 // Scalar Conversion with SAE - suppress all exceptions
7656 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7657 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7658 X86FoldableSchedWrite sched> {
7659 let Uses = [MXCSR] in
7660 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7661 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7662 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7663 (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7664 (_Src.VT _Src.RC:$src2)))>,
7665 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
7668 // Scalar Conversion with rounding control (RC)
7669 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7670 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7671 X86FoldableSchedWrite sched> {
7672 let Uses = [MXCSR] in
7673 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7674 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7675 "$rc, $src2, $src1", "$src1, $src2, $rc",
7676 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7677 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7678 EVEX, VVVV, VEX_LIG, Sched<[sched]>,
7681 multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7682 SDNode OpNode, SDNode OpNodeRnd,
7683 X86FoldableSchedWrite sched,
7684 X86VectorVTInfo _src, X86VectorVTInfo _dst,
7685 Predicate prd = HasAVX512> {
7686 let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7687 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7688 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7689 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7693 multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7694 SDNode OpNode, SDNode OpNodeSAE,
7695 X86FoldableSchedWrite sched,
7696 X86VectorVTInfo _src, X86VectorVTInfo _dst,
7697 Predicate prd = HasAVX512> {
7698 let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7699 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7700 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7701 EVEX_CD8<_src.EltSize, CD8VT1>;
7704 defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7705 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7706 f32x_info>, TB, XD, REX_W;
7707 defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7708 X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7710 defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7711 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7712 f16x_info, HasFP16>, T_MAP5, XD, REX_W;
7713 defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7714 X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7715 f64x_info, HasFP16>, T_MAP5, XS;
7716 defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7717 X86froundsRnd, WriteCvtSD2SS, f32x_info,
7718 f16x_info, HasFP16>, T_MAP5;
7719 defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7720 X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7721 f32x_info, HasFP16>, T_MAP6;
7723 def : Pat<(f64 (any_fpextend FR32X:$src)),
7724 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7725 Requires<[HasAVX512]>;
7726 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7727 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7728 Requires<[HasAVX512, OptForSize]>;
7730 def : Pat<(f32 (any_fpround FR64X:$src)),
7731 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7732 Requires<[HasAVX512]>;
7734 def : Pat<(f32 (any_fpextend FR16X:$src)),
7735 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7736 Requires<[HasFP16]>;
7737 def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7738 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7739 Requires<[HasFP16, OptForSize]>;
7741 def : Pat<(f64 (any_fpextend FR16X:$src)),
7742 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7743 Requires<[HasFP16]>;
7744 def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7745 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7746 Requires<[HasFP16, OptForSize]>;
7748 def : Pat<(f16 (any_fpround FR32X:$src)),
7749 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7750 Requires<[HasFP16]>;
7751 def : Pat<(f16 (any_fpround FR64X:$src)),
7752 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7753 Requires<[HasFP16]>;
7755 def : Pat<(v4f32 (X86Movss
7756 (v4f32 VR128X:$dst),
7757 (v4f32 (scalar_to_vector
7758 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7759 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7760 Requires<[HasAVX512]>;
7762 def : Pat<(v2f64 (X86Movsd
7763 (v2f64 VR128X:$dst),
7764 (v2f64 (scalar_to_vector
7765 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7766 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7767 Requires<[HasAVX512]>;
7769 //===----------------------------------------------------------------------===//
7770 // AVX-512 Vector convert from signed/unsigned integer to float/double
7771 // and from float/double to signed/unsigned integer
7772 //===----------------------------------------------------------------------===//
7774 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7775 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7776 X86FoldableSchedWrite sched,
7777 string Broadcast = _.BroadcastStr,
7778 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7779 RegisterClass MaskRC = _.KRCWM,
7780 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7781 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7782 let Uses = [MXCSR], mayRaiseFPException = 1 in {
7783 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7785 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7786 (ins MaskRC:$mask, _Src.RC:$src),
7787 OpcodeStr, "$src", "$src",
7788 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7789 (vselect_mask MaskRC:$mask,
7790 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7792 (vselect_mask MaskRC:$mask,
7793 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7795 EVEX, Sched<[sched]>;
7797 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7799 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7800 (ins MaskRC:$mask, MemOp:$src),
7801 OpcodeStr#Alias, "$src", "$src",
7803 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7804 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7805 EVEX, Sched<[sched.Folded]>;
7807 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7808 (ins _Src.ScalarMemOp:$src),
7809 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7810 (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7812 "${src}"#Broadcast, "${src}"#Broadcast,
7813 (_.VT (OpNode (_Src.VT
7814 (_Src.BroadcastLdFrag addr:$src))
7816 (vselect_mask MaskRC:$mask,
7820 (_Src.BroadcastLdFrag addr:$src)))),
7822 (vselect_mask MaskRC:$mask,
7826 (_Src.BroadcastLdFrag addr:$src)))),
7828 EVEX, EVEX_B, Sched<[sched.Folded]>;
7831 // Conversion with SAE - suppress all exceptions
7832 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7833 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7834 X86FoldableSchedWrite sched> {
7835 let Uses = [MXCSR] in
7836 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7837 (ins _Src.RC:$src), OpcodeStr,
7838 "{sae}, $src", "$src, {sae}",
7839 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7840 EVEX, EVEX_B, Sched<[sched]>;
7843 // Conversion with rounding control (RC)
7844 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7845 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
7846 X86FoldableSchedWrite sched> {
7847 let Uses = [MXCSR] in
7848 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7849 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7850 "$rc, $src", "$src, $rc",
7851 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7852 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7855 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7856 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7857 X86VectorVTInfo _Src, SDPatternOperator OpNode,
7859 X86FoldableSchedWrite sched,
7860 string Broadcast = _.BroadcastStr,
7861 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7862 RegisterClass MaskRC = _.KRCWM>
7863 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7864 Alias, MemOp, MaskRC,
7865 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7866 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7868 // Extend [Float to Double, Half to Float]
7869 multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
7870 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7871 X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
7872 let Predicates = [prd] in {
7873 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256,
7874 any_fpextend, fpextend, sched.ZMM>,
7875 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
7876 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7878 let Predicates = [prd, HasVLX] in {
7879 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
7880 X86any_vfpext, X86vfpext, sched.XMM,
7881 _dst.info128.BroadcastStr,
7882 "", f64mem>, EVEX_V128;
7883 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
7884 any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7888 // Truncate [Double to Float, Float to Half]
7889 multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
7890 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7891 X86SchedWriteWidths sched, Predicate prd = HasAVX512,
7892 PatFrag bcast128 = _src.info128.BroadcastLdFrag,
7893 PatFrag loadVT128 = _src.info128.LdFrag,
7894 RegisterClass maskRC128 = _src.info128.KRCWM> {
7895 let Predicates = [prd] in {
7896 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
7897 X86any_vfpround, X86vfpround, sched.ZMM>,
7898 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
7899 X86vfproundRnd, sched.ZMM>, EVEX_V512;
7901 let Predicates = [prd, HasVLX] in {
7902 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
7903 null_frag, null_frag, sched.XMM,
7904 _src.info128.BroadcastStr, "{x}",
7905 f128mem, maskRC128>, EVEX_V128;
7906 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
7907 X86any_vfpround, X86vfpround,
7908 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
7910 // Special patterns to allow use of X86vmfpround for masking. Instruction
7911 // patterns have been disabled with null_frag.
7912 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
7913 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
7914 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
7916 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
7917 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
7919 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
7921 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
7922 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
7923 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
7925 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7926 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
7928 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
7930 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
7931 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
7932 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7933 (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
7934 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7935 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7936 _dst.info128.ImmAllZerosV, maskRC128:$mask),
7937 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
7940 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7941 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7942 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7943 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7944 VK2WM:$mask, VR128X:$src), 0, "att">;
7945 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7946 "$dst {${mask}} {z}, $src}",
7947 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7948 VK2WM:$mask, VR128X:$src), 0, "att">;
7949 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7950 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7951 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7952 "$dst {${mask}}, ${src}{1to2}}",
7953 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7954 VK2WM:$mask, f64mem:$src), 0, "att">;
7955 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7956 "$dst {${mask}} {z}, ${src}{1to2}}",
7957 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7958 VK2WM:$mask, f64mem:$src), 0, "att">;
7960 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7961 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7962 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7963 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7964 VK4WM:$mask, VR256X:$src), 0, "att">;
7965 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7966 "$dst {${mask}} {z}, $src}",
7967 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7968 VK4WM:$mask, VR256X:$src), 0, "att">;
7969 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7970 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7971 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7972 "$dst {${mask}}, ${src}{1to4}}",
7973 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7974 VK4WM:$mask, f64mem:$src), 0, "att">;
7975 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7976 "$dst {${mask}} {z}, ${src}{1to4}}",
7977 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7978 VK4WM:$mask, f64mem:$src), 0, "att">;
7981 defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
7982 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
7983 REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
7984 defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
7985 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
7986 TB, EVEX_CD8<32, CD8VH>;
7988 // Extend Half to Double
7989 multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
7990 X86SchedWriteWidths sched> {
7991 let Predicates = [HasFP16] in {
7992 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
7993 any_fpextend, fpextend, sched.ZMM>,
7994 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
7995 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7996 def : Pat<(v8f64 (extloadv8f16 addr:$src)),
7997 (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
7999 let Predicates = [HasFP16, HasVLX] in {
8000 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8001 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8003 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8004 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8009 // Truncate Double to Half
8010 multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8011 let Predicates = [HasFP16] in {
8012 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8013 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8014 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8015 X86vfproundRnd, sched.ZMM>, EVEX_V512;
8017 let Predicates = [HasFP16, HasVLX] in {
8018 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8019 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8021 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8022 null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8025 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8026 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8027 VR128X:$src), 0, "att">;
8028 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8029 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8030 VK2WM:$mask, VR128X:$src), 0, "att">;
8031 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8032 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8033 VK2WM:$mask, VR128X:$src), 0, "att">;
8034 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8035 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8036 i64mem:$src), 0, "att">;
8037 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8038 "$dst {${mask}}, ${src}{1to2}}",
8039 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8040 VK2WM:$mask, i64mem:$src), 0, "att">;
8041 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8042 "$dst {${mask}} {z}, ${src}{1to2}}",
8043 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8044 VK2WM:$mask, i64mem:$src), 0, "att">;
8046 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8047 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8048 VR256X:$src), 0, "att">;
8049 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8050 "$dst {${mask}}, $src}",
8051 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8052 VK4WM:$mask, VR256X:$src), 0, "att">;
8053 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8054 "$dst {${mask}} {z}, $src}",
8055 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8056 VK4WM:$mask, VR256X:$src), 0, "att">;
8057 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8058 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8059 i64mem:$src), 0, "att">;
8060 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8061 "$dst {${mask}}, ${src}{1to4}}",
8062 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8063 VK4WM:$mask, i64mem:$src), 0, "att">;
8064 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8065 "$dst {${mask}} {z}, ${src}{1to4}}",
8066 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8067 VK4WM:$mask, i64mem:$src), 0, "att">;
8069 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8070 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8071 VR512:$src), 0, "att">;
8072 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8073 "$dst {${mask}}, $src}",
8074 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8075 VK8WM:$mask, VR512:$src), 0, "att">;
8076 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8077 "$dst {${mask}} {z}, $src}",
8078 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8079 VK8WM:$mask, VR512:$src), 0, "att">;
8080 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8081 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8082 i64mem:$src), 0, "att">;
8083 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8084 "$dst {${mask}}, ${src}{1to8}}",
8085 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8086 VK8WM:$mask, i64mem:$src), 0, "att">;
8087 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8088 "$dst {${mask}} {z}, ${src}{1to8}}",
8089 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8090 VK8WM:$mask, i64mem:$src), 0, "att">;
8093 defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8094 avx512vl_f32_info, SchedWriteCvtPD2PS,
8095 HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>;
8096 defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8097 avx512vl_f16_info, SchedWriteCvtPS2PD,
8098 HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>;
8099 defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8100 REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>;
8101 defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8102 T_MAP5, EVEX_CD8<16, CD8VQ>;
8104 let Predicates = [HasFP16, HasVLX] in {
8105 // Special patterns to allow use of X86vmfpround for masking. Instruction
8106 // patterns have been disabled with null_frag.
8107 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8108 (VCVTPD2PHZ256rr VR256X:$src)>;
8109 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8111 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8112 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8114 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8116 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8117 (VCVTPD2PHZ256rm addr:$src)>;
8118 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8120 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8121 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8123 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8125 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8126 (VCVTPD2PHZ256rmb addr:$src)>;
8127 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8128 (v8f16 VR128X:$src0), VK4WM:$mask),
8129 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8130 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8131 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8132 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8134 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8135 (VCVTPD2PHZ128rr VR128X:$src)>;
8136 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8138 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8139 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8141 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8143 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8144 (VCVTPD2PHZ128rm addr:$src)>;
8145 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8147 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8148 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8150 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8152 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8153 (VCVTPD2PHZ128rmb addr:$src)>;
8154 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8155 (v8f16 VR128X:$src0), VK2WM:$mask),
8156 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8157 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8158 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8159 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8162 // Convert Signed/Unsigned Doubleword to Double
8163 let Uses = []<Register>, mayRaiseFPException = 0 in
8164 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8165 SDNode MaskOpNode, SDPatternOperator OpNode128,
8166 SDNode MaskOpNode128,
8167 X86SchedWriteWidths sched> {
8168 // No rounding in this op
8169 let Predicates = [HasAVX512] in
8170 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8171 MaskOpNode, sched.ZMM>, EVEX_V512;
8173 let Predicates = [HasVLX] in {
8174 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8175 OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8177 (v2f64 (OpNode128 (bc_v4i32
8179 (scalar_to_vector (loadi64 addr:$src)))))),
8180 (v2f64 (MaskOpNode128 (bc_v4i32
8182 (scalar_to_vector (loadi64 addr:$src))))))>,
8184 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8185 MaskOpNode, sched.YMM>, EVEX_V256;
8189 // Convert Signed/Unsigned Doubleword to Float
8190 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8191 SDNode MaskOpNode, SDNode OpNodeRnd,
8192 X86SchedWriteWidths sched> {
8193 let Predicates = [HasAVX512] in
8194 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8195 MaskOpNode, sched.ZMM>,
8196 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8197 OpNodeRnd, sched.ZMM>, EVEX_V512;
8199 let Predicates = [HasVLX] in {
8200 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8201 MaskOpNode, sched.XMM>, EVEX_V128;
8202 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8203 MaskOpNode, sched.YMM>, EVEX_V256;
8207 // Convert Float to Signed/Unsigned Doubleword with truncation
8208 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8210 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8211 let Predicates = [HasAVX512] in {
8212 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8213 MaskOpNode, sched.ZMM>,
8214 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8215 OpNodeSAE, sched.ZMM>, EVEX_V512;
8217 let Predicates = [HasVLX] in {
8218 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8219 MaskOpNode, sched.XMM>, EVEX_V128;
8220 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8221 MaskOpNode, sched.YMM>, EVEX_V256;
8225 // Convert Float to Signed/Unsigned Doubleword
8226 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8227 SDNode MaskOpNode, SDNode OpNodeRnd,
8228 X86SchedWriteWidths sched> {
8229 let Predicates = [HasAVX512] in {
8230 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8231 MaskOpNode, sched.ZMM>,
8232 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8233 OpNodeRnd, sched.ZMM>, EVEX_V512;
8235 let Predicates = [HasVLX] in {
8236 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8237 MaskOpNode, sched.XMM>, EVEX_V128;
8238 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8239 MaskOpNode, sched.YMM>, EVEX_V256;
8243 // Convert Double to Signed/Unsigned Doubleword with truncation
8244 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8245 SDNode MaskOpNode, SDNode OpNodeSAE,
8246 X86SchedWriteWidths sched> {
8247 let Predicates = [HasAVX512] in {
8248 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8249 MaskOpNode, sched.ZMM>,
8250 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8251 OpNodeSAE, sched.ZMM>, EVEX_V512;
8253 let Predicates = [HasVLX] in {
8254 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8255 // memory forms of these instructions in Asm Parser. They have the same
8256 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8257 // due to the same reason.
8258 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8259 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8261 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8262 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8265 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8266 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8267 VR128X:$src), 0, "att">;
8268 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8269 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8270 VK2WM:$mask, VR128X:$src), 0, "att">;
8271 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8272 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8273 VK2WM:$mask, VR128X:$src), 0, "att">;
8274 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8275 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8276 f64mem:$src), 0, "att">;
8277 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8278 "$dst {${mask}}, ${src}{1to2}}",
8279 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8280 VK2WM:$mask, f64mem:$src), 0, "att">;
8281 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8282 "$dst {${mask}} {z}, ${src}{1to2}}",
8283 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8284 VK2WM:$mask, f64mem:$src), 0, "att">;
8286 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8287 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8288 VR256X:$src), 0, "att">;
8289 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8290 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8291 VK4WM:$mask, VR256X:$src), 0, "att">;
8292 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8293 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8294 VK4WM:$mask, VR256X:$src), 0, "att">;
8295 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8296 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8297 f64mem:$src), 0, "att">;
8298 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8299 "$dst {${mask}}, ${src}{1to4}}",
8300 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8301 VK4WM:$mask, f64mem:$src), 0, "att">;
8302 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8303 "$dst {${mask}} {z}, ${src}{1to4}}",
8304 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8305 VK4WM:$mask, f64mem:$src), 0, "att">;
8308 // Convert Double to Signed/Unsigned Doubleword
8309 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8310 SDNode MaskOpNode, SDNode OpNodeRnd,
8311 X86SchedWriteWidths sched> {
8312 let Predicates = [HasAVX512] in {
8313 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8314 MaskOpNode, sched.ZMM>,
8315 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8316 OpNodeRnd, sched.ZMM>, EVEX_V512;
8318 let Predicates = [HasVLX] in {
8319 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8320 // memory forms of these instructions in Asm Parcer. They have the same
8321 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8322 // due to the same reason.
8323 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8324 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8326 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8327 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8330 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8331 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8332 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8333 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8334 VK2WM:$mask, VR128X:$src), 0, "att">;
8335 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8336 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8337 VK2WM:$mask, VR128X:$src), 0, "att">;
8338 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8339 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8340 f64mem:$src), 0, "att">;
8341 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8342 "$dst {${mask}}, ${src}{1to2}}",
8343 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8344 VK2WM:$mask, f64mem:$src), 0, "att">;
8345 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8346 "$dst {${mask}} {z}, ${src}{1to2}}",
8347 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8348 VK2WM:$mask, f64mem:$src), 0, "att">;
8350 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8351 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8352 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8353 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8354 VK4WM:$mask, VR256X:$src), 0, "att">;
8355 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8356 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8357 VK4WM:$mask, VR256X:$src), 0, "att">;
8358 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8359 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8360 f64mem:$src), 0, "att">;
8361 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8362 "$dst {${mask}}, ${src}{1to4}}",
8363 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8364 VK4WM:$mask, f64mem:$src), 0, "att">;
8365 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8366 "$dst {${mask}} {z}, ${src}{1to4}}",
8367 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8368 VK4WM:$mask, f64mem:$src), 0, "att">;
8371 // Convert Double to Signed/Unsigned Quardword
8372 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8373 SDNode MaskOpNode, SDNode OpNodeRnd,
8374 X86SchedWriteWidths sched> {
8375 let Predicates = [HasDQI] in {
8376 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8377 MaskOpNode, sched.ZMM>,
8378 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8379 OpNodeRnd, sched.ZMM>, EVEX_V512;
8381 let Predicates = [HasDQI, HasVLX] in {
8382 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8383 MaskOpNode, sched.XMM>, EVEX_V128;
8384 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8385 MaskOpNode, sched.YMM>, EVEX_V256;
8389 // Convert Double to Signed/Unsigned Quardword with truncation
8390 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8391 SDNode MaskOpNode, SDNode OpNodeRnd,
8392 X86SchedWriteWidths sched> {
8393 let Predicates = [HasDQI] in {
8394 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8395 MaskOpNode, sched.ZMM>,
8396 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8397 OpNodeRnd, sched.ZMM>, EVEX_V512;
8399 let Predicates = [HasDQI, HasVLX] in {
8400 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8401 MaskOpNode, sched.XMM>, EVEX_V128;
8402 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8403 MaskOpNode, sched.YMM>, EVEX_V256;
8407 // Convert Signed/Unsigned Quardword to Double
8408 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8409 SDNode MaskOpNode, SDNode OpNodeRnd,
8410 X86SchedWriteWidths sched> {
8411 let Predicates = [HasDQI] in {
8412 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8413 MaskOpNode, sched.ZMM>,
8414 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8415 OpNodeRnd, sched.ZMM>, EVEX_V512;
8417 let Predicates = [HasDQI, HasVLX] in {
8418 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8419 MaskOpNode, sched.XMM>, EVEX_V128;
8420 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8421 MaskOpNode, sched.YMM>, EVEX_V256;
8425 // Convert Float to Signed/Unsigned Quardword
8426 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8427 SDNode MaskOpNode, SDNode OpNodeRnd,
8428 X86SchedWriteWidths sched> {
8429 let Predicates = [HasDQI] in {
8430 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8431 MaskOpNode, sched.ZMM>,
8432 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8433 OpNodeRnd, sched.ZMM>, EVEX_V512;
8435 let Predicates = [HasDQI, HasVLX] in {
8436 // Explicitly specified broadcast string, since we take only 2 elements
8437 // from v4f32x_info source
8438 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8439 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8440 (v2i64 (OpNode (bc_v4f32
8442 (scalar_to_vector (loadf64 addr:$src)))))),
8443 (v2i64 (MaskOpNode (bc_v4f32
8445 (scalar_to_vector (loadf64 addr:$src))))))>,
8447 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8448 MaskOpNode, sched.YMM>, EVEX_V256;
8452 // Convert Float to Signed/Unsigned Quardword with truncation
8453 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8454 SDNode MaskOpNode, SDNode OpNodeRnd,
8455 X86SchedWriteWidths sched> {
8456 let Predicates = [HasDQI] in {
8457 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8458 MaskOpNode, sched.ZMM>,
8459 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8460 OpNodeRnd, sched.ZMM>, EVEX_V512;
8462 let Predicates = [HasDQI, HasVLX] in {
8463 // Explicitly specified broadcast string, since we take only 2 elements
8464 // from v4f32x_info source
8465 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8466 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8467 (v2i64 (OpNode (bc_v4f32
8469 (scalar_to_vector (loadf64 addr:$src)))))),
8470 (v2i64 (MaskOpNode (bc_v4f32
8472 (scalar_to_vector (loadf64 addr:$src))))))>,
8474 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8475 MaskOpNode, sched.YMM>, EVEX_V256;
8479 // Convert Signed/Unsigned Quardword to Float
8480 // Also Convert Signed/Unsigned Doubleword to Half
8481 multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8482 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8483 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8484 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8485 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8486 let Predicates = [prd] in {
8487 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8488 MaskOpNode, sched.ZMM>,
8489 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8490 OpNodeRnd, sched.ZMM>, EVEX_V512;
8492 let Predicates = [prd, HasVLX] in {
8493 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8494 // memory forms of these instructions in Asm Parcer. They have the same
8495 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8496 // due to the same reason.
8497 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8498 null_frag, sched.XMM, _src.info128.BroadcastStr,
8499 "{x}", i128mem, _src.info128.KRCWM>,
8501 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8502 MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8505 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8506 // patterns have been disabled with null_frag.
8507 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8508 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8509 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8510 _src.info128.KRCWM:$mask),
8511 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8512 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8513 _src.info128.KRCWM:$mask),
8514 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8516 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8517 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8518 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8519 _src.info128.KRCWM:$mask),
8520 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8521 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8522 _src.info128.KRCWM:$mask),
8523 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8525 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8526 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8527 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8528 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8529 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8530 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8531 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8532 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8535 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8536 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8537 VR128X:$src), 0, "att">;
8538 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8539 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8540 VK2WM:$mask, VR128X:$src), 0, "att">;
8541 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8542 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8543 VK2WM:$mask, VR128X:$src), 0, "att">;
8544 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8545 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8546 i64mem:$src), 0, "att">;
8547 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8548 "$dst {${mask}}, ${src}{1to2}}",
8549 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8550 VK2WM:$mask, i64mem:$src), 0, "att">;
8551 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8552 "$dst {${mask}} {z}, ${src}{1to2}}",
8553 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8554 VK2WM:$mask, i64mem:$src), 0, "att">;
8556 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8557 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8558 VR256X:$src), 0, "att">;
8559 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8560 "$dst {${mask}}, $src}",
8561 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8562 VK4WM:$mask, VR256X:$src), 0, "att">;
8563 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8564 "$dst {${mask}} {z}, $src}",
8565 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8566 VK4WM:$mask, VR256X:$src), 0, "att">;
8567 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8568 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8569 i64mem:$src), 0, "att">;
8570 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8571 "$dst {${mask}}, ${src}{1to4}}",
8572 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8573 VK4WM:$mask, i64mem:$src), 0, "att">;
8574 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8575 "$dst {${mask}} {z}, ${src}{1to4}}",
8576 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8577 VK4WM:$mask, i64mem:$src), 0, "att">;
8580 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8581 X86any_VSintToFP, X86VSintToFP,
8582 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8584 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8585 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8586 TB, EVEX_CD8<32, CD8VF>;
8588 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8589 X86cvttp2si, X86cvttp2siSAE,
8590 SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>;
8592 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8593 X86cvttp2si, X86cvttp2siSAE,
8594 SchedWriteCvtPD2DQ>,
8595 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
8597 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8598 X86cvttp2ui, X86cvttp2uiSAE,
8599 SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>;
8601 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8602 X86cvttp2ui, X86cvttp2uiSAE,
8603 SchedWriteCvtPD2DQ>,
8604 TB, REX_W, EVEX_CD8<64, CD8VF>;
8606 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8607 uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8608 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8610 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8611 uint_to_fp, X86VUintToFpRnd,
8612 SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>;
8614 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8615 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8616 EVEX_CD8<32, CD8VF>;
8618 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8619 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD,
8620 REX_W, EVEX_CD8<64, CD8VF>;
8622 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8623 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8624 TB, EVEX_CD8<32, CD8VF>;
8626 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8627 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8628 TB, EVEX_CD8<64, CD8VF>;
8630 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8631 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
8632 TB, PD, EVEX_CD8<64, CD8VF>;
8634 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8635 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8636 EVEX_CD8<32, CD8VH>;
8638 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8639 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8640 TB, PD, EVEX_CD8<64, CD8VF>;
8642 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8643 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8644 EVEX_CD8<32, CD8VH>;
8646 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8647 X86cvttp2si, X86cvttp2siSAE,
8648 SchedWriteCvtPD2DQ>, REX_W,
8649 TB, PD, EVEX_CD8<64, CD8VF>;
8651 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8652 X86cvttp2si, X86cvttp2siSAE,
8653 SchedWriteCvtPS2DQ>, TB, PD,
8654 EVEX_CD8<32, CD8VH>;
8656 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8657 X86cvttp2ui, X86cvttp2uiSAE,
8658 SchedWriteCvtPD2DQ>, REX_W,
8659 TB, PD, EVEX_CD8<64, CD8VF>;
8661 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8662 X86cvttp2ui, X86cvttp2uiSAE,
8663 SchedWriteCvtPS2DQ>, TB, PD,
8664 EVEX_CD8<32, CD8VH>;
8666 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8667 sint_to_fp, X86VSintToFpRnd,
8668 SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8670 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8671 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8672 REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8674 defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8675 X86any_VSintToFP, X86VMSintToFP,
8676 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8677 SchedWriteCvtDQ2PS, HasFP16>,
8678 T_MAP5, EVEX_CD8<32, CD8VF>;
8680 defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8681 X86any_VUintToFP, X86VMUintToFP,
8682 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8683 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD,
8684 EVEX_CD8<32, CD8VF>;
8686 defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8687 X86any_VSintToFP, X86VMSintToFP,
8688 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8689 SchedWriteCvtDQ2PS>, REX_W, TB,
8690 EVEX_CD8<64, CD8VF>;
8692 defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8693 X86any_VUintToFP, X86VMUintToFP,
8694 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8695 SchedWriteCvtDQ2PS>, REX_W, TB, XD,
8696 EVEX_CD8<64, CD8VF>;
8698 let Predicates = [HasVLX] in {
8699 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8700 // patterns have been disabled with null_frag.
8701 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8702 (VCVTPD2DQZ128rr VR128X:$src)>;
8703 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8705 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8706 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8708 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8710 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8711 (VCVTPD2DQZ128rm addr:$src)>;
8712 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8714 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8715 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8717 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8719 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8720 (VCVTPD2DQZ128rmb addr:$src)>;
8721 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8722 (v4i32 VR128X:$src0), VK2WM:$mask),
8723 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8724 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8725 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8726 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8728 // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8729 // patterns have been disabled with null_frag.
8730 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8731 (VCVTTPD2DQZ128rr VR128X:$src)>;
8732 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8734 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8735 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8737 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8739 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8740 (VCVTTPD2DQZ128rm addr:$src)>;
8741 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8743 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8744 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8746 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8748 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8749 (VCVTTPD2DQZ128rmb addr:$src)>;
8750 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8751 (v4i32 VR128X:$src0), VK2WM:$mask),
8752 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8753 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8754 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8755 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8757 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8758 // patterns have been disabled with null_frag.
8759 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8760 (VCVTPD2UDQZ128rr VR128X:$src)>;
8761 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8763 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8764 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8766 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8768 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8769 (VCVTPD2UDQZ128rm addr:$src)>;
8770 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8772 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8773 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8775 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8777 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8778 (VCVTPD2UDQZ128rmb addr:$src)>;
8779 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8780 (v4i32 VR128X:$src0), VK2WM:$mask),
8781 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8782 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8783 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8784 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8786 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8787 // patterns have been disabled with null_frag.
8788 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8789 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8790 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8792 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8793 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8795 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8797 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8798 (VCVTTPD2UDQZ128rm addr:$src)>;
8799 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8801 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8802 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8804 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8806 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8807 (VCVTTPD2UDQZ128rmb addr:$src)>;
8808 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8809 (v4i32 VR128X:$src0), VK2WM:$mask),
8810 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8811 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8812 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8813 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8815 def : Pat<(v4i32 (lrint VR128X:$src)), (VCVTPS2DQZ128rr VR128X:$src)>;
8816 def : Pat<(v4i32 (lrint (loadv4f32 addr:$src))), (VCVTPS2DQZ128rm addr:$src)>;
8817 def : Pat<(v8i32 (lrint VR256X:$src)), (VCVTPS2DQZ256rr VR256X:$src)>;
8818 def : Pat<(v8i32 (lrint (loadv8f32 addr:$src))), (VCVTPS2DQZ256rm addr:$src)>;
8819 def : Pat<(v4i32 (lrint VR256X:$src)), (VCVTPD2DQZ256rr VR256X:$src)>;
8820 def : Pat<(v4i32 (lrint (loadv4f64 addr:$src))), (VCVTPD2DQZ256rm addr:$src)>;
8822 def : Pat<(v16i32 (lrint VR512:$src)), (VCVTPS2DQZrr VR512:$src)>;
8823 def : Pat<(v16i32 (lrint (loadv16f32 addr:$src))), (VCVTPS2DQZrm addr:$src)>;
8824 def : Pat<(v8i32 (lrint VR512:$src)), (VCVTPD2DQZrr VR512:$src)>;
8825 def : Pat<(v8i32 (lrint (loadv8f64 addr:$src))), (VCVTPD2DQZrm addr:$src)>;
8827 let Predicates = [HasDQI, HasVLX] in {
8828 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8829 (VCVTPS2QQZ128rm addr:$src)>;
8830 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8831 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8833 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8834 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8835 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8836 v2i64x_info.ImmAllZerosV)),
8837 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8839 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8840 (VCVTPS2UQQZ128rm addr:$src)>;
8841 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8842 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8844 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8845 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8846 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8847 v2i64x_info.ImmAllZerosV)),
8848 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8850 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8851 (VCVTTPS2QQZ128rm addr:$src)>;
8852 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8853 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8855 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8856 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8857 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8858 v2i64x_info.ImmAllZerosV)),
8859 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8861 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8862 (VCVTTPS2UQQZ128rm addr:$src)>;
8863 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8864 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8866 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8867 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8868 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8869 v2i64x_info.ImmAllZerosV)),
8870 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8872 def : Pat<(v4i64 (lrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>;
8873 def : Pat<(v4i64 (lrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>;
8874 def : Pat<(v4i64 (llrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>;
8875 def : Pat<(v4i64 (llrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>;
8876 def : Pat<(v2i64 (lrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>;
8877 def : Pat<(v2i64 (lrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>;
8878 def : Pat<(v4i64 (lrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>;
8879 def : Pat<(v4i64 (lrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>;
8880 def : Pat<(v2i64 (llrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>;
8881 def : Pat<(v2i64 (llrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>;
8882 def : Pat<(v4i64 (llrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>;
8883 def : Pat<(v4i64 (llrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>;
8886 let Predicates = [HasDQI] in {
8887 def : Pat<(v8i64 (lrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>;
8888 def : Pat<(v8i64 (lrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>;
8889 def : Pat<(v8i64 (llrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>;
8890 def : Pat<(v8i64 (llrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>;
8891 def : Pat<(v8i64 (lrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>;
8892 def : Pat<(v8i64 (lrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>;
8893 def : Pat<(v8i64 (llrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>;
8894 def : Pat<(v8i64 (llrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>;
8897 let Predicates = [HasVLX] in {
8898 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8899 (VCVTDQ2PDZ128rm addr:$src)>;
8900 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8901 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8903 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8904 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8905 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8906 v2f64x_info.ImmAllZerosV)),
8907 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8909 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8910 (VCVTUDQ2PDZ128rm addr:$src)>;
8911 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8912 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8914 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8915 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8916 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8917 v2f64x_info.ImmAllZerosV)),
8918 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8921 //===----------------------------------------------------------------------===//
8922 // Half precision conversion instructions
8923 //===----------------------------------------------------------------------===//
8925 let Uses = [MXCSR], mayRaiseFPException = 1 in
8926 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8927 X86MemOperand x86memop, dag ld_dag,
8928 X86FoldableSchedWrite sched> {
8929 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8930 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8931 (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8932 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8933 T8, PD, Sched<[sched]>;
8934 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8935 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8936 (X86any_cvtph2ps (_src.VT ld_dag)),
8937 (X86cvtph2ps (_src.VT ld_dag))>,
8938 T8, PD, Sched<[sched.Folded]>;
8941 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8942 X86FoldableSchedWrite sched> {
8943 let Uses = [MXCSR] in
8944 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8945 (ins _src.RC:$src), "vcvtph2ps",
8946 "{sae}, $src", "$src, {sae}",
8947 (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8948 T8, PD, EVEX_B, Sched<[sched]>;
8951 let Predicates = [HasAVX512] in
8952 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8953 (load addr:$src), WriteCvtPH2PSZ>,
8954 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8955 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8957 let Predicates = [HasVLX] in {
8958 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8959 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8960 EVEX_CD8<32, CD8VH>;
8961 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8962 (bitconvert (v2i64 (X86vzload64 addr:$src))),
8963 WriteCvtPH2PS>, EVEX, EVEX_V128,
8964 EVEX_CD8<32, CD8VH>;
8966 // Pattern match vcvtph2ps of a scalar i64 load.
8967 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8968 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8969 (VCVTPH2PSZ128rm addr:$src)>;
8972 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8973 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8974 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8975 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8976 (ins _src.RC:$src1, i32u8imm:$src2),
8977 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8978 [(set _dest.RC:$dst,
8979 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8981 let Constraints = "$src0 = $dst" in
8982 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8983 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8984 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8985 [(set _dest.RC:$dst,
8986 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8987 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8988 Sched<[RR]>, EVEX_K;
8989 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8990 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8991 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8992 [(set _dest.RC:$dst,
8993 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8994 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8995 Sched<[RR]>, EVEX_KZ;
8996 let hasSideEffects = 0, mayStore = 1 in {
8997 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8998 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8999 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9001 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9002 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9003 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9004 EVEX_K, Sched<[MR]>;
9009 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9011 let hasSideEffects = 0, Uses = [MXCSR] in {
9012 def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9013 (ins _src.RC:$src1, i32u8imm:$src2),
9014 "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
9015 [(set _dest.RC:$dst,
9016 (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9017 EVEX_B, Sched<[Sched]>;
9018 let Constraints = "$src0 = $dst" in
9019 def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9020 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9021 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
9022 [(set _dest.RC:$dst,
9023 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9024 _dest.RC:$src0, _src.KRCWM:$mask))]>,
9025 EVEX_B, Sched<[Sched]>, EVEX_K;
9026 def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9027 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9028 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
9029 [(set _dest.RC:$dst,
9030 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9031 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9032 EVEX_B, Sched<[Sched]>, EVEX_KZ;
9036 let Predicates = [HasAVX512] in {
9037 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9038 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9039 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9040 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9042 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9043 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9046 let Predicates = [HasVLX] in {
9047 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9048 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9049 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9050 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9051 WriteCvtPS2PH, WriteCvtPS2PHSt>,
9052 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9054 def : Pat<(store (f64 (extractelt
9055 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9056 (iPTR 0))), addr:$dst),
9057 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9058 def : Pat<(store (i64 (extractelt
9059 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9060 (iPTR 0))), addr:$dst),
9061 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9062 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9063 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9066 // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9067 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9068 string OpcodeStr, Domain d,
9069 X86FoldableSchedWrite sched = WriteFComX> {
9070 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9071 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9072 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9073 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9076 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9077 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9078 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9079 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9080 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9081 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9082 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9083 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9084 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9087 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9088 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9089 "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9090 EVEX_CD8<32, CD8VT1>;
9091 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9092 "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9093 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9094 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9095 "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9096 EVEX_CD8<32, CD8VT1>;
9097 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9098 "comisd", SSEPackedDouble>, TB, PD, EVEX,
9099 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9100 let isCodeGenOnly = 1 in {
9101 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9102 sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9103 EVEX_CD8<32, CD8VT1>;
9104 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9105 sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9106 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9108 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9109 sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9110 EVEX_CD8<32, CD8VT1>;
9111 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9112 sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX,
9113 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9117 let Defs = [EFLAGS], Predicates = [HasFP16] in {
9118 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9119 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9120 EVEX_CD8<16, CD8VT1>;
9121 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9122 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9123 EVEX_CD8<16, CD8VT1>;
9124 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9125 "ucomish", SSEPackedSingle>, T_MAP5, EVEX,
9126 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9127 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9128 "comish", SSEPackedSingle>, T_MAP5, EVEX,
9129 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9130 let isCodeGenOnly = 1 in {
9131 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9132 sse_load_f16, "ucomish", SSEPackedSingle>,
9133 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9135 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9136 sse_load_f16, "comish", SSEPackedSingle>,
9137 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9141 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9142 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9143 X86FoldableSchedWrite sched, X86VectorVTInfo _,
9144 Predicate prd = HasAVX512> {
9145 let Predicates = [prd], ExeDomain = _.ExeDomain in {
9146 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9147 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9148 "$src2, $src1", "$src1, $src2",
9149 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9150 EVEX, VVVV, VEX_LIG, Sched<[sched]>;
9151 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9152 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9153 "$src2, $src1", "$src1, $src2",
9154 (OpNode (_.VT _.RC:$src1),
9155 (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG,
9156 Sched<[sched.Folded, sched.ReadAfterFold]>;
9160 defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9161 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9163 defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9164 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9165 EVEX_CD8<16, CD8VT1>, T_MAP6, PD;
9166 let Uses = [MXCSR] in {
9167 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9168 f32x_info>, EVEX_CD8<32, CD8VT1>,
9170 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9171 f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
9173 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9174 SchedWriteFRsqrt.Scl, f32x_info>,
9175 EVEX_CD8<32, CD8VT1>, T8, PD;
9176 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9177 SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
9178 EVEX_CD8<64, CD8VT1>, T8, PD;
9181 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9182 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9183 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9184 let ExeDomain = _.ExeDomain in {
9185 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9186 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9187 (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD,
9189 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9190 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9192 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD,
9193 Sched<[sched.Folded, sched.ReadAfterFold]>;
9194 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9195 (ins _.ScalarMemOp:$src), OpcodeStr,
9196 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9198 (_.BroadcastLdFrag addr:$src)))>,
9199 EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9203 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9204 X86SchedWriteWidths sched> {
9205 let Uses = [MXCSR] in {
9206 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9207 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9208 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9209 v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9211 let Predicates = [HasFP16] in
9212 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9213 v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>;
9215 // Define only if AVX512VL feature is present.
9216 let Predicates = [HasVLX], Uses = [MXCSR] in {
9217 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9218 OpNode, sched.XMM, v4f32x_info>,
9219 EVEX_V128, EVEX_CD8<32, CD8VF>;
9220 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9221 OpNode, sched.YMM, v8f32x_info>,
9222 EVEX_V256, EVEX_CD8<32, CD8VF>;
9223 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9224 OpNode, sched.XMM, v2f64x_info>,
9225 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
9226 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9227 OpNode, sched.YMM, v4f64x_info>,
9228 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
9230 let Predicates = [HasFP16, HasVLX] in {
9231 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9232 OpNode, sched.XMM, v8f16x_info>,
9233 EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>;
9234 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9235 OpNode, sched.YMM, v16f16x_info>,
9236 EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>;
9240 defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9241 defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9243 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9244 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9245 SDNode OpNode, SDNode OpNodeSAE,
9246 X86FoldableSchedWrite sched> {
9247 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9248 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9249 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9250 "$src2, $src1", "$src1, $src2",
9251 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9252 Sched<[sched]>, SIMD_EXC;
9254 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9255 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9256 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9257 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9258 EVEX_B, Sched<[sched]>;
9260 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9261 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9262 "$src2, $src1", "$src1, $src2",
9263 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9264 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9268 multiclass avx512_fp28_s_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9269 X86FoldableSchedWrite sched> {
9270 let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in {
9271 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9272 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9273 "$src2, $src1", "$src1, $src2",
9274 (null_frag)>, Sched<[sched]>, SIMD_EXC;
9275 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9276 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9277 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9278 (null_frag)>, EVEX_B, Sched<[sched]>;
9280 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9281 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9282 "$src2, $src1", "$src1, $src2",
9284 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9288 multiclass avx512_eri_s_ass<bits<8> opc, string OpcodeStr,
9289 X86FoldableSchedWrite sched> {
9290 defm SSZ : avx512_fp28_s_ass<opc, OpcodeStr#"ss", f32x_info, sched>,
9291 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9292 defm SDZ : avx512_fp28_s_ass<opc, OpcodeStr#"sd", f64x_info, sched>,
9293 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9296 defm VRCP28 : avx512_eri_s_ass<0xCB, "vrcp28", SchedWriteFRcp.Scl>;
9297 defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28", SchedWriteFRsqrt.Scl>;
9299 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9300 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9301 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9302 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9303 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9304 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9307 multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9308 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9309 let Predicates = [HasFP16] in
9310 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>,
9311 EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
9314 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9315 SchedWriteFRnd.Scl>,
9316 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9317 SchedWriteFRnd.Scl>;
9318 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9320 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9321 SDNode OpNode, X86FoldableSchedWrite sched> {
9322 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9323 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9324 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9325 (OpNode (_.VT _.RC:$src))>,
9328 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9329 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9331 (bitconvert (_.LdFrag addr:$src))))>,
9332 Sched<[sched.Folded, sched.ReadAfterFold]>;
9334 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9335 (ins _.ScalarMemOp:$src), OpcodeStr,
9336 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9338 (_.BroadcastLdFrag addr:$src)))>,
9339 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9342 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9343 SDNode OpNode, X86FoldableSchedWrite sched> {
9344 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9345 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9346 (ins _.RC:$src), OpcodeStr,
9347 "{sae}, $src", "$src, {sae}",
9348 (OpNode (_.VT _.RC:$src))>,
9349 EVEX_B, Sched<[sched]>;
9352 multiclass avx512_fp28_p_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9353 X86FoldableSchedWrite sched> {
9354 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1,
9355 hasSideEffects = 0 in {
9356 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9357 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9358 (null_frag)>, Sched<[sched]>;
9360 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9361 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9363 Sched<[sched.Folded, sched.ReadAfterFold]>;
9365 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9366 (ins _.ScalarMemOp:$src), OpcodeStr,
9367 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9369 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9372 multiclass avx512_fp28_p_sae_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9373 X86FoldableSchedWrite sched> {
9374 let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in
9375 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9376 (ins _.RC:$src), OpcodeStr,
9377 "{sae}, $src", "$src, {sae}",
9378 (null_frag)>, Sched<[sched]>, EVEX_B;
9381 multiclass avx512_eri_ass<bits<8> opc, string OpcodeStr,
9382 X86SchedWriteWidths sched> {
9383 defm PSZ : avx512_fp28_p_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
9384 avx512_fp28_p_sae_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
9385 T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9386 defm PDZ : avx512_fp28_p_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
9387 avx512_fp28_p_sae_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
9388 T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9391 defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28", SchedWriteFRsqrt>, EVEX;
9392 defm VRCP28 : avx512_eri_ass<0xCA, "vrcp28", SchedWriteFRcp>, EVEX;
9393 defm VEXP2 : avx512_eri_ass<0xC8, "vexp2", SchedWriteFAdd>, EVEX;
9395 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9396 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9397 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9398 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9399 T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9400 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9401 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9402 T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9405 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9406 SDNode OpNode, X86SchedWriteWidths sched> {
9407 // Define only if AVX512VL feature is present.
9408 let Predicates = [HasVLX] in {
9409 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9411 EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>;
9412 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9414 EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>;
9415 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9417 EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9418 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9420 EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9424 multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9425 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9426 let Predicates = [HasFP16] in
9427 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9428 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9429 T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9430 let Predicates = [HasFP16, HasVLX] in {
9431 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9432 EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9433 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9434 EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9437 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9439 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9441 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9442 SchedWriteFRnd>, EVEX;
9444 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9445 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9446 let ExeDomain = _.ExeDomain in
9447 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9448 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9449 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9450 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9453 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9454 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9455 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9456 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9457 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9458 (_.VT (any_fsqrt _.RC:$src)),
9459 (_.VT (fsqrt _.RC:$src))>, EVEX,
9461 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9462 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9463 (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9464 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9465 Sched<[sched.Folded, sched.ReadAfterFold]>;
9466 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9467 (ins _.ScalarMemOp:$src), OpcodeStr,
9468 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9469 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9470 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9471 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9475 let Uses = [MXCSR], mayRaiseFPException = 1 in
9476 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9477 X86SchedWriteSizes sched> {
9478 let Predicates = [HasFP16] in
9479 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9480 sched.PH.ZMM, v32f16_info>,
9481 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9482 let Predicates = [HasFP16, HasVLX] in {
9483 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9484 sched.PH.XMM, v8f16x_info>,
9485 EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>;
9486 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9487 sched.PH.YMM, v16f16x_info>,
9488 EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>;
9490 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9491 sched.PS.ZMM, v16f32_info>,
9492 EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9493 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9494 sched.PD.ZMM, v8f64_info>,
9495 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9496 // Define only if AVX512VL feature is present.
9497 let Predicates = [HasVLX] in {
9498 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9499 sched.PS.XMM, v4f32x_info>,
9500 EVEX_V128, TB, EVEX_CD8<32, CD8VF>;
9501 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9502 sched.PS.YMM, v8f32x_info>,
9503 EVEX_V256, TB, EVEX_CD8<32, CD8VF>;
9504 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9505 sched.PD.XMM, v2f64x_info>,
9506 EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9507 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9508 sched.PD.YMM, v4f64x_info>,
9509 EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9513 let Uses = [MXCSR] in
9514 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9515 X86SchedWriteSizes sched> {
9516 let Predicates = [HasFP16] in
9517 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9518 sched.PH.ZMM, v32f16_info>,
9519 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9520 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9521 sched.PS.ZMM, v16f32_info>,
9522 EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9523 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9524 sched.PD.ZMM, v8f64_info>,
9525 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9528 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9529 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9530 let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9531 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9532 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9533 "$src2, $src1", "$src1, $src2",
9534 (X86fsqrts (_.VT _.RC:$src1),
9535 (_.VT _.RC:$src2))>,
9536 Sched<[sched]>, SIMD_EXC;
9537 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9538 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9539 "$src2, $src1", "$src1, $src2",
9540 (X86fsqrts (_.VT _.RC:$src1),
9541 (_.ScalarIntMemFrags addr:$src2))>,
9542 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9543 let Uses = [MXCSR] in
9544 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9545 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9546 "$rc, $src2, $src1", "$src1, $src2, $rc",
9547 (X86fsqrtRnds (_.VT _.RC:$src1),
9550 EVEX_B, EVEX_RC, Sched<[sched]>;
9552 let isCodeGenOnly = 1, hasSideEffects = 0 in {
9553 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9554 (ins _.FRC:$src1, _.FRC:$src2),
9555 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9556 Sched<[sched]>, SIMD_EXC;
9558 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9559 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9560 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9561 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9565 let Predicates = [prd] in {
9566 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9567 (!cast<Instruction>(Name#Zr)
9568 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9571 let Predicates = [prd, OptForSize] in {
9572 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9573 (!cast<Instruction>(Name#Zm)
9574 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9578 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9579 X86SchedWriteSizes sched> {
9580 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9581 EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS;
9582 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9583 EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS;
9584 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9585 EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W;
9588 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9589 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9591 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9593 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9594 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9595 let ExeDomain = _.ExeDomain in {
9596 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9597 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9598 "$src3, $src2, $src1", "$src1, $src2, $src3",
9599 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9600 (i32 timm:$src3)))>,
9601 Sched<[sched]>, SIMD_EXC;
9603 let Uses = [MXCSR] in
9604 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9605 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9606 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9607 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9608 (i32 timm:$src3)))>, EVEX_B,
9611 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9612 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9614 "$src3, $src2, $src1", "$src1, $src2, $src3",
9615 (_.VT (X86RndScales _.RC:$src1,
9616 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9617 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9619 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9620 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9621 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9622 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9623 []>, Sched<[sched]>, SIMD_EXC;
9626 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9627 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9628 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9629 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9633 let Predicates = [HasAVX512] in {
9634 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9635 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9636 _.FRC:$src1, timm:$src2))>;
9639 let Predicates = [HasAVX512, OptForSize] in {
9640 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9641 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9642 addr:$src1, timm:$src2))>;
9646 let Predicates = [HasFP16] in
9647 defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9648 SchedWriteFRnd.Scl, f16x_info>,
9649 AVX512PSIi8Base, TA, EVEX, VVVV,
9650 EVEX_CD8<16, CD8VT1>;
9652 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9653 SchedWriteFRnd.Scl, f32x_info>,
9654 AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9655 EVEX_CD8<32, CD8VT1>;
9657 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9658 SchedWriteFRnd.Scl, f64x_info>,
9659 REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9660 EVEX_CD8<64, CD8VT1>;
9662 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9663 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9664 dag OutMask, Predicate BasePredicate> {
9665 let Predicates = [BasePredicate] in {
9666 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9667 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9668 (extractelt _.VT:$dst, (iPTR 0))))),
9669 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9670 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9672 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9673 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9675 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9676 OutMask, _.VT:$src2, _.VT:$src1)>;
9680 defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9681 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9682 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>;
9683 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9684 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9685 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9686 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9687 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9688 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9691 //-------------------------------------------------
9692 // Integer truncate and extend operations
9693 //-------------------------------------------------
9695 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9696 SDPatternOperator MaskNode,
9697 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9698 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9699 let ExeDomain = DestInfo.ExeDomain in {
9700 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9701 (ins SrcInfo.RC:$src),
9702 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9703 [(set DestInfo.RC:$dst,
9704 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9705 EVEX, Sched<[sched]>;
9706 let Constraints = "$src0 = $dst" in
9707 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9708 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9709 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9710 [(set DestInfo.RC:$dst,
9711 (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9712 (DestInfo.VT DestInfo.RC:$src0),
9713 SrcInfo.KRCWM:$mask))]>,
9714 EVEX, EVEX_K, Sched<[sched]>;
9715 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9716 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9717 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9718 [(set DestInfo.RC:$dst,
9719 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9720 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9721 EVEX, EVEX_KZ, Sched<[sched]>;
9724 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9725 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9726 (ins x86memop:$dst, SrcInfo.RC:$src),
9727 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9728 EVEX, Sched<[sched.Folded]>;
9730 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9731 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9732 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9733 EVEX, EVEX_K, Sched<[sched.Folded]>;
9734 }//mayStore = 1, hasSideEffects = 0
9737 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9738 PatFrag truncFrag, PatFrag mtruncFrag,
9741 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9742 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9743 addr:$dst, SrcInfo.RC:$src)>;
9745 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9746 SrcInfo.KRCWM:$mask),
9747 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9748 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9751 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9752 SDNode OpNode256, SDNode OpNode512,
9753 SDPatternOperator MaskNode128,
9754 SDPatternOperator MaskNode256,
9755 SDPatternOperator MaskNode512,
9756 X86SchedWriteWidths sched,
9757 AVX512VLVectorVTInfo VTSrcInfo,
9758 X86VectorVTInfo DestInfoZ128,
9759 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9760 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9761 X86MemOperand x86memopZ, PatFrag truncFrag,
9762 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9764 let Predicates = [HasVLX, prd] in {
9765 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9766 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9767 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9768 mtruncFrag, NAME>, EVEX_V128;
9770 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9771 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9772 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9773 mtruncFrag, NAME>, EVEX_V256;
9775 let Predicates = [prd] in
9776 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9777 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9778 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9779 mtruncFrag, NAME>, EVEX_V512;
9782 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9783 X86SchedWriteWidths sched, PatFrag StoreNode,
9784 PatFrag MaskedStoreNode, SDNode InVecNode,
9785 SDPatternOperator InVecMaskNode> {
9786 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9787 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9788 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9789 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9790 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9793 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9794 SDPatternOperator MaskNode,
9795 X86SchedWriteWidths sched, PatFrag StoreNode,
9796 PatFrag MaskedStoreNode, SDNode InVecNode,
9797 SDPatternOperator InVecMaskNode> {
9798 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9799 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9800 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9801 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9802 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9805 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9806 SDPatternOperator MaskNode,
9807 X86SchedWriteWidths sched, PatFrag StoreNode,
9808 PatFrag MaskedStoreNode, SDNode InVecNode,
9809 SDPatternOperator InVecMaskNode> {
9810 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9811 InVecMaskNode, MaskNode, MaskNode, sched,
9812 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9813 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9814 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9817 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9818 SDPatternOperator MaskNode,
9819 X86SchedWriteWidths sched, PatFrag StoreNode,
9820 PatFrag MaskedStoreNode, SDNode InVecNode,
9821 SDPatternOperator InVecMaskNode> {
9822 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9823 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9824 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9825 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9826 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9829 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9830 SDPatternOperator MaskNode,
9831 X86SchedWriteWidths sched, PatFrag StoreNode,
9832 PatFrag MaskedStoreNode, SDNode InVecNode,
9833 SDPatternOperator InVecMaskNode> {
9834 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9835 InVecMaskNode, MaskNode, MaskNode, sched,
9836 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9837 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9838 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9841 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9842 SDPatternOperator MaskNode,
9843 X86SchedWriteWidths sched, PatFrag StoreNode,
9844 PatFrag MaskedStoreNode, SDNode InVecNode,
9845 SDPatternOperator InVecMaskNode> {
9846 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9847 InVecMaskNode, MaskNode, MaskNode, sched,
9848 avx512vl_i16_info, v16i8x_info, v16i8x_info,
9849 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9850 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9853 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb",
9854 SchedWriteVecTruncate, truncstorevi8,
9855 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9856 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb",
9857 SchedWriteVecTruncate, truncstore_s_vi8,
9858 masked_truncstore_s_vi8, X86vtruncs,
9860 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb",
9861 SchedWriteVecTruncate, truncstore_us_vi8,
9862 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
9864 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9865 SchedWriteVecTruncate, truncstorevi16,
9866 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9867 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
9868 SchedWriteVecTruncate, truncstore_s_vi16,
9869 masked_truncstore_s_vi16, X86vtruncs,
9871 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9872 select_truncus, SchedWriteVecTruncate,
9873 truncstore_us_vi16, masked_truncstore_us_vi16,
9874 X86vtruncus, X86vmtruncus>;
9876 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9877 SchedWriteVecTruncate, truncstorevi32,
9878 masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9879 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
9880 SchedWriteVecTruncate, truncstore_s_vi32,
9881 masked_truncstore_s_vi32, X86vtruncs,
9883 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9884 select_truncus, SchedWriteVecTruncate,
9885 truncstore_us_vi32, masked_truncstore_us_vi32,
9886 X86vtruncus, X86vmtruncus>;
9888 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9889 SchedWriteVecTruncate, truncstorevi8,
9890 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9891 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9892 SchedWriteVecTruncate, truncstore_s_vi8,
9893 masked_truncstore_s_vi8, X86vtruncs,
9895 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
9896 select_truncus, SchedWriteVecTruncate,
9897 truncstore_us_vi8, masked_truncstore_us_vi8,
9898 X86vtruncus, X86vmtruncus>;
9900 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9901 SchedWriteVecTruncate, truncstorevi16,
9902 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9903 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9904 SchedWriteVecTruncate, truncstore_s_vi16,
9905 masked_truncstore_s_vi16, X86vtruncs,
9907 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9908 select_truncus, SchedWriteVecTruncate,
9909 truncstore_us_vi16, masked_truncstore_us_vi16,
9910 X86vtruncus, X86vmtruncus>;
9912 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9913 SchedWriteVecTruncate, truncstorevi8,
9914 masked_truncstorevi8, X86vtrunc,
9916 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9917 SchedWriteVecTruncate, truncstore_s_vi8,
9918 masked_truncstore_s_vi8, X86vtruncs,
9920 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9921 select_truncus, SchedWriteVecTruncate,
9922 truncstore_us_vi8, masked_truncstore_us_vi8,
9923 X86vtruncus, X86vmtruncus>;
9925 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
9926 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9927 (v8i16 (EXTRACT_SUBREG
9928 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9929 VR256X:$src, sub_ymm)))), sub_xmm))>;
9930 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9931 (v4i32 (EXTRACT_SUBREG
9932 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9933 VR256X:$src, sub_ymm)))), sub_xmm))>;
9936 let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
9937 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9938 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9939 VR256X:$src, sub_ymm))), sub_xmm))>;
9942 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9943 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9944 X86VectorVTInfo DestInfo,
9945 X86VectorVTInfo SrcInfo> {
9946 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9948 SrcInfo.KRCWM:$mask)),
9949 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9950 SrcInfo.KRCWM:$mask,
9953 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9954 DestInfo.ImmAllZerosV,
9955 SrcInfo.KRCWM:$mask)),
9956 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9960 let Predicates = [HasVLX] in {
9961 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9962 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9963 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9966 let Predicates = [HasAVX512] in {
9967 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9968 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9969 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9971 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9972 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9973 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9975 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9976 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9977 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9980 multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9981 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9982 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9983 let ExeDomain = DestInfo.ExeDomain in {
9984 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9985 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9986 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9987 EVEX, Sched<[sched]>;
9989 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9990 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9991 (DestInfo.VT (LdFrag addr:$src))>,
9992 EVEX, Sched<[sched.Folded]>;
9996 multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
9997 SDNode OpNode, SDNode InVecNode, string ExtTy,
9998 X86SchedWriteWidths sched,
9999 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10000 let Predicates = [HasVLX, HasBWI] in {
10001 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
10002 v16i8x_info, i64mem, LdFrag, InVecNode>,
10003 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG;
10005 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
10006 v16i8x_info, i128mem, LdFrag, OpNode>,
10007 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG;
10009 let Predicates = [HasBWI] in {
10010 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
10011 v32i8x_info, i256mem, LdFrag, OpNode>,
10012 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG;
10016 multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
10017 SDNode OpNode, SDNode InVecNode, string ExtTy,
10018 X86SchedWriteWidths sched,
10019 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10020 let Predicates = [HasVLX, HasAVX512] in {
10021 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10022 v16i8x_info, i32mem, LdFrag, InVecNode>,
10023 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG;
10025 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10026 v16i8x_info, i64mem, LdFrag, InVecNode>,
10027 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG;
10029 let Predicates = [HasAVX512] in {
10030 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10031 v16i8x_info, i128mem, LdFrag, OpNode>,
10032 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG;
10036 multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
10037 SDNode InVecNode, string ExtTy,
10038 X86SchedWriteWidths sched,
10039 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10040 let Predicates = [HasVLX, HasAVX512] in {
10041 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10042 v16i8x_info, i16mem, LdFrag, InVecNode>,
10043 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG;
10045 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10046 v16i8x_info, i32mem, LdFrag, InVecNode>,
10047 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG;
10049 let Predicates = [HasAVX512] in {
10050 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10051 v16i8x_info, i64mem, LdFrag, InVecNode>,
10052 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG;
10056 multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
10057 SDNode OpNode, SDNode InVecNode, string ExtTy,
10058 X86SchedWriteWidths sched,
10059 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10060 let Predicates = [HasVLX, HasAVX512] in {
10061 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10062 v8i16x_info, i64mem, LdFrag, InVecNode>,
10063 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG;
10065 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10066 v8i16x_info, i128mem, LdFrag, OpNode>,
10067 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG;
10069 let Predicates = [HasAVX512] in {
10070 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10071 v16i16x_info, i256mem, LdFrag, OpNode>,
10072 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG;
10076 multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
10077 SDNode OpNode, SDNode InVecNode, string ExtTy,
10078 X86SchedWriteWidths sched,
10079 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10080 let Predicates = [HasVLX, HasAVX512] in {
10081 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10082 v8i16x_info, i32mem, LdFrag, InVecNode>,
10083 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG;
10085 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10086 v8i16x_info, i64mem, LdFrag, InVecNode>,
10087 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG;
10089 let Predicates = [HasAVX512] in {
10090 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10091 v8i16x_info, i128mem, LdFrag, OpNode>,
10092 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG;
10096 multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
10097 SDNode OpNode, SDNode InVecNode, string ExtTy,
10098 X86SchedWriteWidths sched,
10099 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10101 let Predicates = [HasVLX, HasAVX512] in {
10102 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10103 v4i32x_info, i64mem, LdFrag, InVecNode>,
10104 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128;
10106 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10107 v4i32x_info, i128mem, LdFrag, OpNode>,
10108 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256;
10110 let Predicates = [HasAVX512] in {
10111 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10112 v8i32x_info, i256mem, LdFrag, OpNode>,
10113 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512;
10117 defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10118 defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10119 defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>;
10120 defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10121 defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10122 defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10124 defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10125 defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10126 defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>;
10127 defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10128 defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10129 defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10132 // Patterns that we also need any extend versions of. aext_vector_inreg
10133 // is currently legalized to zext_vector_inreg.
10134 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10135 // 256-bit patterns
10136 let Predicates = [HasVLX, HasBWI] in {
10137 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10138 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10141 let Predicates = [HasVLX] in {
10142 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10143 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10145 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10146 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10149 // 512-bit patterns
10150 let Predicates = [HasBWI] in {
10151 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10152 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10154 let Predicates = [HasAVX512] in {
10155 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10156 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10157 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10158 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10160 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10161 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10163 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10164 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10168 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10170 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10171 // 128-bit patterns
10172 let Predicates = [HasVLX, HasBWI] in {
10173 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10174 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10175 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10176 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10177 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10178 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10180 let Predicates = [HasVLX] in {
10181 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10182 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10183 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10184 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10186 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10187 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10189 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10190 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10191 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10192 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10193 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10194 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10196 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10197 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10198 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10199 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10201 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10202 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10203 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10204 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10205 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10206 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10208 let Predicates = [HasVLX] in {
10209 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10210 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10211 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10212 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10213 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10214 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10216 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10217 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10218 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10219 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10221 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10222 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10223 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10224 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10225 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10226 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10228 // 512-bit patterns
10229 let Predicates = [HasAVX512] in {
10230 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10231 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10232 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10233 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10234 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10235 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10239 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10240 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10242 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10243 // ext+trunc aggressively making it impossible to legalize the DAG to this
10244 // pattern directly.
10245 let Predicates = [HasAVX512, NoBWI] in {
10246 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10247 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10248 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10249 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10252 //===----------------------------------------------------------------------===//
10253 // GATHER - SCATTER Operations
10255 // FIXME: Improve scheduling of gather/scatter instructions.
10256 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10257 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10258 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10259 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10260 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10261 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10262 !strconcat(OpcodeStr#_.Suffix,
10263 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10264 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10265 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10268 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10269 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10270 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10271 vy512xmem>, EVEX_V512, REX_W;
10272 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10273 vz512mem>, EVEX_V512, REX_W;
10274 let Predicates = [HasVLX] in {
10275 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10276 vx256xmem>, EVEX_V256, REX_W;
10277 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10278 vy256xmem>, EVEX_V256, REX_W;
10279 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10280 vx128xmem>, EVEX_V128, REX_W;
10281 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10282 vx128xmem>, EVEX_V128, REX_W;
10286 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10287 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10288 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10290 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10292 let Predicates = [HasVLX] in {
10293 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10294 vy256xmem>, EVEX_V256;
10295 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10296 vy128xmem>, EVEX_V256;
10297 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10298 vx128xmem>, EVEX_V128;
10299 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10300 vx64xmem, VK2WM>, EVEX_V128;
10305 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10306 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10308 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10309 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10311 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10312 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10314 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10315 hasSideEffects = 0 in
10317 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10318 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10319 !strconcat(OpcodeStr#_.Suffix,
10320 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10321 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10322 Sched<[WriteStore]>;
10325 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10326 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10327 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10328 vy512xmem>, EVEX_V512, REX_W;
10329 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10330 vz512mem>, EVEX_V512, REX_W;
10331 let Predicates = [HasVLX] in {
10332 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10333 vx256xmem>, EVEX_V256, REX_W;
10334 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10335 vy256xmem>, EVEX_V256, REX_W;
10336 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10337 vx128xmem>, EVEX_V128, REX_W;
10338 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10339 vx128xmem>, EVEX_V128, REX_W;
10343 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10344 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10345 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10347 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10349 let Predicates = [HasVLX] in {
10350 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10351 vy256xmem>, EVEX_V256;
10352 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10353 vy128xmem>, EVEX_V256;
10354 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10355 vx128xmem>, EVEX_V128;
10356 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10357 vx64xmem, VK2WM>, EVEX_V128;
10361 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10362 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10364 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10365 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10368 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10369 RegisterClass KRC, X86MemOperand memop> {
10370 let mayLoad = 1, mayStore = 1 in
10371 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10372 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10373 EVEX, EVEX_K, Sched<[WriteLoad]>;
10376 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10377 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10379 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10380 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10382 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10383 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10385 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10386 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10388 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10389 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10391 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10392 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10394 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10395 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10397 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10398 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10400 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10401 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10403 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10404 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10406 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10407 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10409 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10410 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10412 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10413 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10415 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10416 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10418 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10419 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10421 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10422 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10424 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10425 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10426 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10427 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10428 EVEX, Sched<[Sched]>;
10431 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10432 string OpcodeStr, Predicate prd> {
10433 let Predicates = [prd] in
10434 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10436 let Predicates = [prd, HasVLX] in {
10437 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10438 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10442 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10443 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
10444 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10445 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
10447 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10448 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10449 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10450 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10451 EVEX, Sched<[WriteMove]>;
10454 // Use 512bit version to implement 128/256 bit in case NoVLX.
10455 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10459 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10460 (_.KVT (COPY_TO_REGCLASS
10461 (!cast<Instruction>(Name#"Zrr")
10462 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10463 _.RC:$src, _.SubRegIdx)),
10467 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10468 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10469 let Predicates = [prd] in
10470 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10473 let Predicates = [prd, HasVLX] in {
10474 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10476 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10479 let Predicates = [prd, NoVLX, HasEVEX512] in {
10480 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10481 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10485 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10486 avx512vl_i8_info, HasBWI>;
10487 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10488 avx512vl_i16_info, HasBWI>, REX_W;
10489 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10490 avx512vl_i32_info, HasDQI>;
10491 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10492 avx512vl_i64_info, HasDQI>, REX_W;
10494 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10495 // is available, but BWI is not. We can't handle this in lowering because
10496 // a target independent DAG combine likes to combine sext and trunc.
10497 let Predicates = [HasDQI, NoBWI] in {
10498 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10499 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10500 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10501 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10504 let Predicates = [HasDQI, NoBWI, HasVLX] in {
10505 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10506 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10509 //===----------------------------------------------------------------------===//
10510 // AVX-512 - COMPRESS and EXPAND
10513 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10514 string OpcodeStr, X86FoldableSchedWrite sched> {
10515 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10516 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10517 (null_frag)>, AVX5128IBase,
10520 let mayStore = 1, hasSideEffects = 0 in
10521 def mr : AVX5128I<opc, MRMDestMem, (outs),
10522 (ins _.MemOp:$dst, _.RC:$src),
10523 OpcodeStr # "\t{$src, $dst|$dst, $src}",
10524 []>, EVEX_CD8<_.EltSize, CD8VT1>,
10525 Sched<[sched.Folded]>;
10527 def mrk : AVX5128I<opc, MRMDestMem, (outs),
10528 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10529 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10531 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10532 Sched<[sched.Folded]>;
10535 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10536 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10537 (!cast<Instruction>(Name#_.ZSuffix#mrk)
10538 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10540 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10541 (!cast<Instruction>(Name#_.ZSuffix#rrk)
10542 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10543 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10544 (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10545 _.KRCWM:$mask, _.RC:$src)>;
10548 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10549 X86FoldableSchedWrite sched,
10550 AVX512VLVectorVTInfo VTInfo,
10551 Predicate Pred = HasAVX512> {
10552 let Predicates = [Pred] in
10553 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10554 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10556 let Predicates = [Pred, HasVLX] in {
10557 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10558 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10559 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10560 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10564 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10565 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10566 avx512vl_i32_info>, EVEX;
10567 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10568 avx512vl_i64_info>, EVEX, REX_W;
10569 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10570 avx512vl_f32_info>, EVEX;
10571 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10572 avx512vl_f64_info>, EVEX, REX_W;
10575 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10576 string OpcodeStr, X86FoldableSchedWrite sched> {
10577 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10578 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10579 (null_frag)>, AVX5128IBase,
10582 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10583 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10585 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10586 Sched<[sched.Folded, sched.ReadAfterFold]>;
10589 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10591 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10592 (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10593 _.KRCWM:$mask, addr:$src)>;
10595 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10596 (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10597 _.KRCWM:$mask, addr:$src)>;
10599 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10600 (_.VT _.RC:$src0))),
10601 (!cast<Instruction>(Name#_.ZSuffix#rmk)
10602 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10604 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10605 (!cast<Instruction>(Name#_.ZSuffix#rrk)
10606 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10607 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10608 (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10609 _.KRCWM:$mask, _.RC:$src)>;
10612 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10613 X86FoldableSchedWrite sched,
10614 AVX512VLVectorVTInfo VTInfo,
10615 Predicate Pred = HasAVX512> {
10616 let Predicates = [Pred] in
10617 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10618 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10620 let Predicates = [Pred, HasVLX] in {
10621 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10622 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10623 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10624 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10628 // FIXME: Is there a better scheduler class for VPEXPAND?
10629 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10630 avx512vl_i32_info>, EVEX;
10631 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10632 avx512vl_i64_info>, EVEX, REX_W;
10633 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10634 avx512vl_f32_info>, EVEX;
10635 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10636 avx512vl_f64_info>, EVEX, REX_W;
10638 //handle instruction reg_vec1 = op(reg_vec,imm)
10640 // op(broadcast(eltVt),imm)
10641 //all instruction created with FROUND_CURRENT
10642 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10643 SDPatternOperator OpNode,
10644 SDPatternOperator MaskOpNode,
10645 X86FoldableSchedWrite sched,
10646 X86VectorVTInfo _> {
10647 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10648 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10649 (ins _.RC:$src1, i32u8imm:$src2),
10650 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10651 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10652 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10654 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10655 (ins _.MemOp:$src1, i32u8imm:$src2),
10656 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10657 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10659 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10660 (i32 timm:$src2))>,
10661 Sched<[sched.Folded, sched.ReadAfterFold]>;
10662 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10663 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10664 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10665 "${src1}"#_.BroadcastStr#", $src2",
10666 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10668 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10669 (i32 timm:$src2))>, EVEX_B,
10670 Sched<[sched.Folded, sched.ReadAfterFold]>;
10674 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10675 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10676 SDNode OpNode, X86FoldableSchedWrite sched,
10677 X86VectorVTInfo _> {
10678 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10679 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10680 (ins _.RC:$src1, i32u8imm:$src2),
10681 OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10682 "$src1, {sae}, $src2",
10683 (OpNode (_.VT _.RC:$src1),
10684 (i32 timm:$src2))>,
10685 EVEX_B, Sched<[sched]>;
10688 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10689 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10690 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10692 let Predicates = [prd] in {
10693 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10694 sched.ZMM, _.info512>,
10695 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10696 sched.ZMM, _.info512>, EVEX_V512;
10698 let Predicates = [prd, HasVLX] in {
10699 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10700 sched.XMM, _.info128>, EVEX_V128;
10701 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10702 sched.YMM, _.info256>, EVEX_V256;
10706 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10707 // op(reg_vec2,mem_vec,imm)
10708 // op(reg_vec2,broadcast(eltVt),imm)
10709 //all instruction created with FROUND_CURRENT
10710 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10711 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10712 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10713 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10714 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10715 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10716 (OpNode (_.VT _.RC:$src1),
10718 (i32 timm:$src3))>,
10720 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10721 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10722 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10723 (OpNode (_.VT _.RC:$src1),
10724 (_.VT (bitconvert (_.LdFrag addr:$src2))),
10725 (i32 timm:$src3))>,
10726 Sched<[sched.Folded, sched.ReadAfterFold]>;
10727 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10728 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10729 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10730 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10731 (OpNode (_.VT _.RC:$src1),
10732 (_.VT (_.BroadcastLdFrag addr:$src2)),
10733 (i32 timm:$src3))>, EVEX_B,
10734 Sched<[sched.Folded, sched.ReadAfterFold]>;
10738 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10739 // op(reg_vec2,mem_vec,imm)
10740 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10741 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10742 X86VectorVTInfo SrcInfo>{
10743 let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in {
10744 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10745 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10746 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10747 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10748 (SrcInfo.VT SrcInfo.RC:$src2),
10749 (i8 timm:$src3)))>,
10751 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10752 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10753 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10754 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10755 (SrcInfo.VT (bitconvert
10756 (SrcInfo.LdFrag addr:$src2))),
10757 (i8 timm:$src3)))>,
10758 Sched<[sched.Folded, sched.ReadAfterFold]>;
10762 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10763 // op(reg_vec2,mem_vec,imm)
10764 // op(reg_vec2,broadcast(eltVt),imm)
10765 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10766 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10767 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10769 let ExeDomain = _.ExeDomain, ImmT = Imm8 in
10770 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10771 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10772 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10773 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10774 (OpNode (_.VT _.RC:$src1),
10775 (_.VT (_.BroadcastLdFrag addr:$src2)),
10776 (i8 timm:$src3))>, EVEX_B,
10777 Sched<[sched.Folded, sched.ReadAfterFold]>;
10780 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10781 // op(reg_vec2,mem_scalar,imm)
10782 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10783 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10784 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10785 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10786 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10787 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10788 (OpNode (_.VT _.RC:$src1),
10790 (i32 timm:$src3))>,
10792 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10793 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10794 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10795 (OpNode (_.VT _.RC:$src1),
10796 (_.ScalarIntMemFrags addr:$src2),
10797 (i32 timm:$src3))>,
10798 Sched<[sched.Folded, sched.ReadAfterFold]>;
10802 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10803 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10804 SDNode OpNode, X86FoldableSchedWrite sched,
10805 X86VectorVTInfo _> {
10806 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10807 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10808 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10809 OpcodeStr, "$src3, {sae}, $src2, $src1",
10810 "$src1, $src2, {sae}, $src3",
10811 (OpNode (_.VT _.RC:$src1),
10813 (i32 timm:$src3))>,
10814 EVEX_B, Sched<[sched]>;
10817 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10818 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10819 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10820 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10821 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10822 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10823 OpcodeStr, "$src3, {sae}, $src2, $src1",
10824 "$src1, $src2, {sae}, $src3",
10825 (OpNode (_.VT _.RC:$src1),
10827 (i32 timm:$src3))>,
10828 EVEX_B, Sched<[sched]>;
10831 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10832 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10833 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10834 let Predicates = [prd] in {
10835 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10836 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10840 let Predicates = [prd, HasVLX] in {
10841 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10843 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10848 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10849 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10850 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10851 let Predicates = [Pred] in {
10852 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10853 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV;
10855 let Predicates = [Pred, HasVLX] in {
10856 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10857 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV;
10858 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10859 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV;
10863 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10864 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10865 Predicate Pred = HasAVX512> {
10866 let Predicates = [Pred] in {
10867 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10870 let Predicates = [Pred, HasVLX] in {
10871 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10873 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10878 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10879 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10880 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10881 let Predicates = [prd] in {
10882 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10883 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10887 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10888 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10889 SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10890 X86SchedWriteWidths sched, Predicate prd>{
10891 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10892 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
10893 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
10894 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10895 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10896 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
10897 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10898 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10899 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
10902 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10903 X86VReduce, X86VReduce, X86VReduceSAE,
10904 SchedWriteFRnd, HasDQI>;
10905 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10906 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10907 SchedWriteFRnd, HasAVX512>;
10908 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10909 X86VGetMant, X86VGetMant, X86VGetMantSAE,
10910 SchedWriteFRnd, HasAVX512>;
10912 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10913 0x50, X86VRange, X86VRangeSAE,
10914 SchedWriteFAdd, HasDQI>,
10915 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10916 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10917 0x50, X86VRange, X86VRangeSAE,
10918 SchedWriteFAdd, HasDQI>,
10919 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10921 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10922 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10923 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10924 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10925 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10926 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10928 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10929 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10930 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10931 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10932 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10933 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10934 defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
10935 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
10936 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10938 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10939 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10940 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10941 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10942 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10943 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10944 defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
10945 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
10946 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10948 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10949 X86FoldableSchedWrite sched,
10951 X86VectorVTInfo CastInfo> {
10952 let ExeDomain = _.ExeDomain in {
10953 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10954 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10955 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10957 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10958 (i8 timm:$src3)))))>,
10960 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10961 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10962 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10965 (CastInfo.VT (X86Shuf128 _.RC:$src1,
10966 (CastInfo.LdFrag addr:$src2),
10967 (i8 timm:$src3)))))>,
10968 Sched<[sched.Folded, sched.ReadAfterFold]>;
10969 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10970 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10971 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10972 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10976 (X86Shuf128 _.RC:$src1,
10977 (_.BroadcastLdFrag addr:$src2),
10978 (i8 timm:$src3)))))>, EVEX_B,
10979 Sched<[sched.Folded, sched.ReadAfterFold]>;
10983 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10984 AVX512VLVectorVTInfo _,
10985 AVX512VLVectorVTInfo CastInfo, bits<8> opc>{
10986 let Predicates = [HasAVX512] in
10987 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10988 _.info512, CastInfo.info512>, EVEX_V512;
10990 let Predicates = [HasAVX512, HasVLX] in
10991 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10992 _.info256, CastInfo.info256>, EVEX_V256;
10995 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10996 avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10997 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10998 avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10999 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11000 avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
11001 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11002 avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
11004 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11005 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11006 let ExeDomain = _.ExeDomain in {
11007 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11008 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11009 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11010 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11012 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11013 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11014 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11015 (_.VT (X86VAlign _.RC:$src1,
11016 (bitconvert (_.LdFrag addr:$src2)),
11017 (i8 timm:$src3)))>,
11018 Sched<[sched.Folded, sched.ReadAfterFold]>;
11020 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11021 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11022 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11023 "$src1, ${src2}"#_.BroadcastStr#", $src3",
11024 (X86VAlign _.RC:$src1,
11025 (_.VT (_.BroadcastLdFrag addr:$src2)),
11026 (i8 timm:$src3))>, EVEX_B,
11027 Sched<[sched.Folded, sched.ReadAfterFold]>;
11031 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11032 AVX512VLVectorVTInfo _> {
11033 let Predicates = [HasAVX512] in {
11034 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11035 AVX512AIi8Base, EVEX, VVVV, EVEX_V512;
11037 let Predicates = [HasAVX512, HasVLX] in {
11038 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11039 AVX512AIi8Base, EVEX, VVVV, EVEX_V128;
11040 // We can't really override the 256-bit version so change it back to unset.
11041 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11042 AVX512AIi8Base, EVEX, VVVV, EVEX_V256;
11046 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11047 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11048 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11049 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11052 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11053 SchedWriteShuffle, avx512vl_i8_info,
11054 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11056 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
11058 def ValignqImm32XForm : SDNodeXForm<timm, [{
11059 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11061 def ValignqImm8XForm : SDNodeXForm<timm, [{
11062 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11064 def ValigndImm8XForm : SDNodeXForm<timm, [{
11065 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11068 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11069 X86VectorVTInfo From, X86VectorVTInfo To,
11070 SDNodeXForm ImmXForm> {
11071 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11073 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11076 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11077 To.RC:$src1, To.RC:$src2,
11078 (ImmXForm timm:$src3))>;
11080 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11082 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11085 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11086 To.RC:$src1, To.RC:$src2,
11087 (ImmXForm timm:$src3))>;
11089 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11091 (From.VT (OpNode From.RC:$src1,
11092 (From.LdFrag addr:$src2),
11095 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11096 To.RC:$src1, addr:$src2,
11097 (ImmXForm timm:$src3))>;
11099 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11101 (From.VT (OpNode From.RC:$src1,
11102 (From.LdFrag addr:$src2),
11105 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11106 To.RC:$src1, addr:$src2,
11107 (ImmXForm timm:$src3))>;
11110 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11111 X86VectorVTInfo From,
11112 X86VectorVTInfo To,
11113 SDNodeXForm ImmXForm> :
11114 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11115 def : Pat<(From.VT (OpNode From.RC:$src1,
11116 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11118 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11119 (ImmXForm timm:$src3))>;
11121 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11123 (From.VT (OpNode From.RC:$src1,
11125 (To.VT (To.BroadcastLdFrag addr:$src2))),
11128 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11129 To.RC:$src1, addr:$src2,
11130 (ImmXForm timm:$src3))>;
11132 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11134 (From.VT (OpNode From.RC:$src1,
11136 (To.VT (To.BroadcastLdFrag addr:$src2))),
11139 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11140 To.RC:$src1, addr:$src2,
11141 (ImmXForm timm:$src3))>;
11144 let Predicates = [HasAVX512] in {
11145 // For 512-bit we lower to the widest element type we can. So we only need
11146 // to handle converting valignq to valignd.
11147 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11148 v16i32_info, ValignqImm32XForm>;
11151 let Predicates = [HasVLX] in {
11152 // For 128-bit we lower to the widest element type we can. So we only need
11153 // to handle converting valignq to valignd.
11154 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11155 v4i32x_info, ValignqImm32XForm>;
11156 // For 256-bit we lower to the widest element type we can. So we only need
11157 // to handle converting valignq to valignd.
11158 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11159 v8i32x_info, ValignqImm32XForm>;
11162 let Predicates = [HasVLX, HasBWI] in {
11163 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11164 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11165 v16i8x_info, ValignqImm8XForm>;
11166 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11167 v16i8x_info, ValigndImm8XForm>;
11170 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11171 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11172 EVEX_CD8<8, CD8VF>;
11174 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11175 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11176 let ExeDomain = _.ExeDomain in {
11177 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11178 (ins _.RC:$src1), OpcodeStr,
11180 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11183 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11184 (ins _.MemOp:$src1), OpcodeStr,
11186 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11187 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11188 Sched<[sched.Folded]>;
11192 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11193 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11194 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11195 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11196 (ins _.ScalarMemOp:$src1), OpcodeStr,
11197 "${src1}"#_.BroadcastStr,
11198 "${src1}"#_.BroadcastStr,
11199 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11200 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11201 Sched<[sched.Folded]>;
11204 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11205 X86SchedWriteWidths sched,
11206 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11207 let Predicates = [prd] in
11208 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11211 let Predicates = [prd, HasVLX] in {
11212 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11214 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11219 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11220 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11222 let Predicates = [prd] in
11223 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11226 let Predicates = [prd, HasVLX] in {
11227 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11229 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11234 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11235 SDNode OpNode, X86SchedWriteWidths sched,
11237 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11238 avx512vl_i64_info, prd>, REX_W;
11239 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11240 avx512vl_i32_info, prd>;
11243 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11244 SDNode OpNode, X86SchedWriteWidths sched,
11246 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11247 avx512vl_i16_info, prd>, WIG;
11248 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11249 avx512vl_i8_info, prd>, WIG;
11252 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11253 bits<8> opc_d, bits<8> opc_q,
11254 string OpcodeStr, SDNode OpNode,
11255 X86SchedWriteWidths sched> {
11256 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11258 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11262 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11265 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11266 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
11267 def : Pat<(v4i64 (abs VR256X:$src)),
11270 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11272 def : Pat<(v2i64 (abs VR128X:$src)),
11275 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11279 // Use 512bit version to implement 128/256 bit.
11280 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11281 AVX512VLVectorVTInfo _, Predicate prd> {
11282 let Predicates = [prd, NoVLX, HasEVEX512] in {
11283 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11285 (!cast<Instruction>(InstrStr # "Zrr")
11286 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11287 _.info256.RC:$src1,
11288 _.info256.SubRegIdx)),
11289 _.info256.SubRegIdx)>;
11291 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11293 (!cast<Instruction>(InstrStr # "Zrr")
11294 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11295 _.info128.RC:$src1,
11296 _.info128.SubRegIdx)),
11297 _.info128.SubRegIdx)>;
11301 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11302 SchedWriteVecIMul, HasCDI>;
11304 // FIXME: Is there a better scheduler class for VPCONFLICT?
11305 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11306 SchedWriteVecALU, HasCDI>;
11308 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11309 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11310 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11312 //===---------------------------------------------------------------------===//
11313 // Counts number of ones - VPOPCNTD and VPOPCNTQ
11314 //===---------------------------------------------------------------------===//
11316 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11317 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11318 SchedWriteVecALU, HasVPOPCNTDQ>;
11320 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11321 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11323 //===---------------------------------------------------------------------===//
11324 // Replicate Single FP - MOVSHDUP and MOVSLDUP
11325 //===---------------------------------------------------------------------===//
11327 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11328 X86SchedWriteWidths sched> {
11329 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11330 avx512vl_f32_info, HasAVX512>, TB, XS;
11333 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11334 SchedWriteFShuffle>;
11335 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11336 SchedWriteFShuffle>;
11338 //===----------------------------------------------------------------------===//
11339 // AVX-512 - MOVDDUP
11340 //===----------------------------------------------------------------------===//
11342 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11343 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11344 let ExeDomain = _.ExeDomain in {
11345 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11346 (ins _.RC:$src), OpcodeStr, "$src", "$src",
11347 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11349 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11350 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11351 (_.VT (_.BroadcastLdFrag addr:$src))>,
11352 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11353 Sched<[sched.Folded]>;
11357 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11358 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11359 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11360 VTInfo.info512>, EVEX_V512;
11362 let Predicates = [HasAVX512, HasVLX] in {
11363 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11364 VTInfo.info256>, EVEX_V256;
11365 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11366 VTInfo.info128>, EVEX_V128;
11370 multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11371 X86SchedWriteWidths sched> {
11372 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched,
11373 avx512vl_f64_info>, TB, XD, REX_W;
11376 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11378 let Predicates = [HasVLX] in {
11379 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11380 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11382 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11383 (v2f64 VR128X:$src0)),
11384 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11385 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11386 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11388 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11391 //===----------------------------------------------------------------------===//
11392 // AVX-512 - Unpack Instructions
11393 //===----------------------------------------------------------------------===//
11395 let Uses = []<Register>, mayRaiseFPException = 0 in {
11396 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11397 SchedWriteFShuffleSizes, 0, 1>;
11398 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11399 SchedWriteFShuffleSizes>;
11402 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11403 SchedWriteShuffle, HasBWI>;
11404 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11405 SchedWriteShuffle, HasBWI>;
11406 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11407 SchedWriteShuffle, HasBWI>;
11408 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11409 SchedWriteShuffle, HasBWI>;
11411 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11412 SchedWriteShuffle, HasAVX512>;
11413 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11414 SchedWriteShuffle, HasAVX512>;
11415 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11416 SchedWriteShuffle, HasAVX512>;
11417 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11418 SchedWriteShuffle, HasAVX512>;
11420 //===----------------------------------------------------------------------===//
11421 // AVX-512 - Extract & Insert Integer Instructions
11422 //===----------------------------------------------------------------------===//
11424 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11425 X86VectorVTInfo _> {
11426 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11427 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11428 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11429 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11431 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11434 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11435 let Predicates = [HasBWI] in {
11436 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11437 (ins _.RC:$src1, u8imm:$src2),
11438 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11439 [(set GR32orGR64:$dst,
11440 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11441 EVEX, TA, PD, Sched<[WriteVecExtract]>;
11443 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD;
11447 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11448 let Predicates = [HasBWI] in {
11449 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11450 (ins _.RC:$src1, u8imm:$src2),
11451 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11452 [(set GR32orGR64:$dst,
11453 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11454 EVEX, TB, PD, Sched<[WriteVecExtract]>;
11456 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11457 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11458 (ins _.RC:$src1, u8imm:$src2),
11459 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11460 EVEX, TA, PD, Sched<[WriteVecExtract]>;
11462 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD;
11466 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11467 RegisterClass GRC> {
11468 let Predicates = [HasDQI] in {
11469 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11470 (ins _.RC:$src1, u8imm:$src2),
11471 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11473 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11474 EVEX, TA, PD, Sched<[WriteVecExtract]>;
11476 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11477 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11478 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11479 [(store (extractelt (_.VT _.RC:$src1),
11480 imm:$src2),addr:$dst)]>,
11481 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD,
11482 Sched<[WriteVecExtractSt]>;
11486 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
11487 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
11488 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11489 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
11491 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11492 X86VectorVTInfo _, PatFrag LdFrag,
11493 SDPatternOperator immoperator> {
11494 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11495 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11496 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11498 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11499 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11502 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11503 X86VectorVTInfo _, PatFrag LdFrag> {
11504 let Predicates = [HasBWI] in {
11505 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11506 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11507 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11509 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV,
11510 Sched<[WriteVecInsert]>;
11512 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11516 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11517 X86VectorVTInfo _, RegisterClass GRC> {
11518 let Predicates = [HasDQI] in {
11519 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11520 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11521 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11523 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11524 EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>;
11526 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11527 _.ScalarLdFrag, imm>, TA, PD;
11531 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11532 extloadi8>, TA, PD, WIG;
11533 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11534 extloadi16>, TB, PD, WIG;
11535 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11536 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
11538 let Predicates = [HasAVX512, NoBWI] in {
11539 def : Pat<(X86pinsrb VR128:$src1,
11540 (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11542 (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11546 let Predicates = [HasBWI] in {
11547 def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11548 (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11549 GR8:$src2, sub_8bit), timm:$src3)>;
11550 def : Pat<(X86pinsrb VR128:$src1,
11551 (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11553 (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11557 // Always select FP16 instructions if available.
11558 let Predicates = [HasBWI], AddedComplexity = -10 in {
11559 def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11560 def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11561 def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11562 def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11565 //===----------------------------------------------------------------------===//
11566 // VSHUFPS - VSHUFPD Operations
11567 //===----------------------------------------------------------------------===//
11569 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11570 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11571 SchedWriteFShuffle>,
11572 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11576 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB;
11577 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W;
11579 //===----------------------------------------------------------------------===//
11580 // AVX-512 - Byte shift Left/Right
11581 //===----------------------------------------------------------------------===//
11583 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11584 Format MRMm, string OpcodeStr,
11585 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11586 def ri : AVX512<opc, MRMr,
11587 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11588 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11589 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11591 def mi : AVX512<opc, MRMm,
11592 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11593 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11594 [(set _.RC:$dst,(_.VT (OpNode
11595 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11596 (i8 timm:$src2))))]>,
11597 Sched<[sched.Folded, sched.ReadAfterFold]>;
11600 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11601 Format MRMm, string OpcodeStr,
11602 X86SchedWriteWidths sched, Predicate prd>{
11603 let Predicates = [prd] in
11604 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11605 sched.ZMM, v64i8_info>, EVEX_V512;
11606 let Predicates = [prd, HasVLX] in {
11607 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11608 sched.YMM, v32i8x_info>, EVEX_V256;
11609 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11610 sched.XMM, v16i8x_info>, EVEX_V128;
11613 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11614 SchedWriteShuffle, HasBWI>,
11615 AVX512PDIi8Base, EVEX, VVVV, WIG;
11616 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11617 SchedWriteShuffle, HasBWI>,
11618 AVX512PDIi8Base, EVEX, VVVV, WIG;
11620 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11621 string OpcodeStr, X86FoldableSchedWrite sched,
11622 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11623 let isCommutable = 1 in
11624 def rr : AVX512BI<opc, MRMSrcReg,
11625 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11626 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11627 [(set _dst.RC:$dst,(_dst.VT
11628 (OpNode (_src.VT _src.RC:$src1),
11629 (_src.VT _src.RC:$src2))))]>,
11631 def rm : AVX512BI<opc, MRMSrcMem,
11632 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11633 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11634 [(set _dst.RC:$dst,(_dst.VT
11635 (OpNode (_src.VT _src.RC:$src1),
11636 (_src.VT (bitconvert
11637 (_src.LdFrag addr:$src2))))))]>,
11638 Sched<[sched.Folded, sched.ReadAfterFold]>;
11641 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11642 string OpcodeStr, X86SchedWriteWidths sched,
11644 let Predicates = [prd] in
11645 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11646 v8i64_info, v64i8_info>, EVEX_V512;
11647 let Predicates = [prd, HasVLX] in {
11648 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11649 v4i64x_info, v32i8x_info>, EVEX_V256;
11650 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11651 v2i64x_info, v16i8x_info>, EVEX_V128;
11655 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11656 SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG;
11658 // Transforms to swizzle an immediate to enable better matching when
11659 // memory operand isn't in the right place.
11660 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11661 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11662 uint8_t Imm = N->getZExtValue();
11663 // Swap bits 1/4 and 3/6.
11664 uint8_t NewImm = Imm & 0xa5;
11665 if (Imm & 0x02) NewImm |= 0x10;
11666 if (Imm & 0x10) NewImm |= 0x02;
11667 if (Imm & 0x08) NewImm |= 0x40;
11668 if (Imm & 0x40) NewImm |= 0x08;
11669 return getI8Imm(NewImm, SDLoc(N));
11671 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11672 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11673 uint8_t Imm = N->getZExtValue();
11674 // Swap bits 2/4 and 3/5.
11675 uint8_t NewImm = Imm & 0xc3;
11676 if (Imm & 0x04) NewImm |= 0x10;
11677 if (Imm & 0x10) NewImm |= 0x04;
11678 if (Imm & 0x08) NewImm |= 0x20;
11679 if (Imm & 0x20) NewImm |= 0x08;
11680 return getI8Imm(NewImm, SDLoc(N));
11682 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11683 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11684 uint8_t Imm = N->getZExtValue();
11685 // Swap bits 1/2 and 5/6.
11686 uint8_t NewImm = Imm & 0x99;
11687 if (Imm & 0x02) NewImm |= 0x04;
11688 if (Imm & 0x04) NewImm |= 0x02;
11689 if (Imm & 0x20) NewImm |= 0x40;
11690 if (Imm & 0x40) NewImm |= 0x20;
11691 return getI8Imm(NewImm, SDLoc(N));
11693 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11694 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11695 uint8_t Imm = N->getZExtValue();
11696 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11697 uint8_t NewImm = Imm & 0x81;
11698 if (Imm & 0x02) NewImm |= 0x04;
11699 if (Imm & 0x04) NewImm |= 0x10;
11700 if (Imm & 0x08) NewImm |= 0x40;
11701 if (Imm & 0x10) NewImm |= 0x02;
11702 if (Imm & 0x20) NewImm |= 0x08;
11703 if (Imm & 0x40) NewImm |= 0x20;
11704 return getI8Imm(NewImm, SDLoc(N));
11706 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11707 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11708 uint8_t Imm = N->getZExtValue();
11709 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11710 uint8_t NewImm = Imm & 0x81;
11711 if (Imm & 0x02) NewImm |= 0x10;
11712 if (Imm & 0x04) NewImm |= 0x02;
11713 if (Imm & 0x08) NewImm |= 0x20;
11714 if (Imm & 0x10) NewImm |= 0x04;
11715 if (Imm & 0x20) NewImm |= 0x40;
11716 if (Imm & 0x40) NewImm |= 0x08;
11717 return getI8Imm(NewImm, SDLoc(N));
11720 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11721 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11723 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11724 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11725 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11726 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11727 (OpNode (_.VT _.RC:$src1),
11730 (i8 timm:$src4)), 1, 1>,
11731 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
11732 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11733 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11734 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11735 (OpNode (_.VT _.RC:$src1),
11737 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11738 (i8 timm:$src4)), 1, 0>,
11739 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11740 Sched<[sched.Folded, sched.ReadAfterFold]>;
11741 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11742 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11743 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11744 "$src2, ${src3}"#_.BroadcastStr#", $src4",
11745 (OpNode (_.VT _.RC:$src1),
11747 (_.VT (_.BroadcastLdFrag addr:$src3)),
11748 (i8 timm:$src4)), 1, 0>, EVEX_B,
11749 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11750 Sched<[sched.Folded, sched.ReadAfterFold]>;
11751 }// Constraints = "$src1 = $dst"
11753 // Additional patterns for matching passthru operand in other positions.
11754 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11755 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11757 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11758 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11759 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11760 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11762 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11763 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11765 // Additional patterns for matching zero masking with loads in other
11767 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11768 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11769 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11771 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11772 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11773 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11774 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11775 _.RC:$src2, (i8 timm:$src4)),
11777 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11778 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11780 // Additional patterns for matching masked loads with different
11782 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11783 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11784 _.RC:$src2, (i8 timm:$src4)),
11786 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11787 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11788 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11789 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11790 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11792 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11793 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11794 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11795 (OpNode _.RC:$src2, _.RC:$src1,
11796 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11798 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11799 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11800 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11801 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11802 _.RC:$src1, (i8 timm:$src4)),
11804 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11805 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11806 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11807 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11808 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11810 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11811 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11813 // Additional patterns for matching zero masking with broadcasts in other
11815 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11816 (OpNode (_.BroadcastLdFrag addr:$src3),
11817 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11819 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11820 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11821 (VPTERNLOG321_imm8 timm:$src4))>;
11822 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11823 (OpNode _.RC:$src1,
11824 (_.BroadcastLdFrag addr:$src3),
11825 _.RC:$src2, (i8 timm:$src4)),
11827 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11828 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11829 (VPTERNLOG132_imm8 timm:$src4))>;
11831 // Additional patterns for matching masked broadcasts with different
11833 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11834 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11835 _.RC:$src2, (i8 timm:$src4)),
11837 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11838 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11839 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11840 (OpNode (_.BroadcastLdFrag addr:$src3),
11841 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11843 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11844 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11845 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11846 (OpNode _.RC:$src2, _.RC:$src1,
11847 (_.BroadcastLdFrag addr:$src3),
11848 (i8 timm:$src4)), _.RC:$src1)),
11849 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11850 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11851 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11852 (OpNode _.RC:$src2,
11853 (_.BroadcastLdFrag addr:$src3),
11854 _.RC:$src1, (i8 timm:$src4)),
11856 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11857 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11858 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11859 (OpNode (_.BroadcastLdFrag addr:$src3),
11860 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11862 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11863 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11866 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11867 AVX512VLVectorVTInfo _> {
11868 let Predicates = [HasAVX512] in
11869 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11870 _.info512, NAME>, EVEX_V512;
11871 let Predicates = [HasAVX512, HasVLX] in {
11872 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11873 _.info128, NAME>, EVEX_V128;
11874 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11875 _.info256, NAME>, EVEX_V256;
11879 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11880 avx512vl_i32_info>;
11881 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11882 avx512vl_i64_info>, REX_W;
11884 // Patterns to implement vnot using vpternlog instead of creating all ones
11885 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11886 // so that the result is only dependent on src0. But we use the same source
11887 // for all operands to prevent a false dependency.
11888 // TODO: We should maybe have a more generalized algorithm for folding to
11890 let Predicates = [HasAVX512] in {
11891 def : Pat<(v64i8 (vnot VR512:$src)),
11892 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11893 def : Pat<(v32i16 (vnot VR512:$src)),
11894 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11895 def : Pat<(v16i32 (vnot VR512:$src)),
11896 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11897 def : Pat<(v8i64 (vnot VR512:$src)),
11898 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11901 let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
11902 def : Pat<(v16i8 (vnot VR128X:$src)),
11905 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11906 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11907 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11908 (i8 15)), sub_xmm)>;
11909 def : Pat<(v8i16 (vnot VR128X:$src)),
11912 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11913 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11914 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11915 (i8 15)), sub_xmm)>;
11916 def : Pat<(v4i32 (vnot VR128X:$src)),
11919 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11920 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11921 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11922 (i8 15)), sub_xmm)>;
11923 def : Pat<(v2i64 (vnot VR128X:$src)),
11926 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11927 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11928 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11929 (i8 15)), sub_xmm)>;
11931 def : Pat<(v32i8 (vnot VR256X:$src)),
11934 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11935 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11936 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11937 (i8 15)), sub_ymm)>;
11938 def : Pat<(v16i16 (vnot VR256X:$src)),
11941 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11942 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11943 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11944 (i8 15)), sub_ymm)>;
11945 def : Pat<(v8i32 (vnot VR256X:$src)),
11948 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11949 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11950 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11951 (i8 15)), sub_ymm)>;
11952 def : Pat<(v4i64 (vnot VR256X:$src)),
11955 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11956 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11957 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11958 (i8 15)), sub_ymm)>;
11961 let Predicates = [HasVLX] in {
11962 def : Pat<(v16i8 (vnot VR128X:$src)),
11963 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11964 def : Pat<(v8i16 (vnot VR128X:$src)),
11965 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11966 def : Pat<(v4i32 (vnot VR128X:$src)),
11967 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11968 def : Pat<(v2i64 (vnot VR128X:$src)),
11969 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11971 def : Pat<(v32i8 (vnot VR256X:$src)),
11972 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11973 def : Pat<(v16i16 (vnot VR256X:$src)),
11974 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11975 def : Pat<(v8i32 (vnot VR256X:$src)),
11976 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11977 def : Pat<(v4i64 (vnot VR256X:$src)),
11978 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11981 //===----------------------------------------------------------------------===//
11982 // AVX-512 - FixupImm
11983 //===----------------------------------------------------------------------===//
11985 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11986 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11987 X86VectorVTInfo TblVT>{
11988 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11989 Uses = [MXCSR], mayRaiseFPException = 1 in {
11990 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11991 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11992 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11993 (X86VFixupimm (_.VT _.RC:$src1),
11995 (TblVT.VT _.RC:$src3),
11996 (i32 timm:$src4))>, Sched<[sched]>;
11997 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11998 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11999 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12000 (X86VFixupimm (_.VT _.RC:$src1),
12002 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12003 (i32 timm:$src4))>,
12004 Sched<[sched.Folded, sched.ReadAfterFold]>;
12005 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12006 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12007 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12008 "$src2, ${src3}"#_.BroadcastStr#", $src4",
12009 (X86VFixupimm (_.VT _.RC:$src1),
12011 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12012 (i32 timm:$src4))>,
12013 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12014 } // Constraints = "$src1 = $dst"
12017 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12018 X86FoldableSchedWrite sched,
12019 X86VectorVTInfo _, X86VectorVTInfo TblVT>
12020 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12021 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12022 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12023 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12024 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12025 "$src2, $src3, {sae}, $src4",
12026 (X86VFixupimmSAE (_.VT _.RC:$src1),
12028 (TblVT.VT _.RC:$src3),
12029 (i32 timm:$src4))>,
12030 EVEX_B, Sched<[sched]>;
12034 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12035 X86FoldableSchedWrite sched, X86VectorVTInfo _,
12036 X86VectorVTInfo _src3VT> {
12037 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12038 ExeDomain = _.ExeDomain in {
12039 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12040 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12041 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12042 (X86VFixupimms (_.VT _.RC:$src1),
12044 (_src3VT.VT _src3VT.RC:$src3),
12045 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12046 let Uses = [MXCSR] in
12047 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12048 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12049 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12050 "$src2, $src3, {sae}, $src4",
12051 (X86VFixupimmSAEs (_.VT _.RC:$src1),
12053 (_src3VT.VT _src3VT.RC:$src3),
12054 (i32 timm:$src4))>,
12055 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12056 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12057 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12058 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12059 (X86VFixupimms (_.VT _.RC:$src1),
12061 (_src3VT.VT (scalar_to_vector
12062 (_src3VT.ScalarLdFrag addr:$src3))),
12063 (i32 timm:$src4))>,
12064 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12068 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12069 AVX512VLVectorVTInfo _Vec,
12070 AVX512VLVectorVTInfo _Tbl> {
12071 let Predicates = [HasAVX512] in
12072 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12073 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12074 EVEX, VVVV, EVEX_V512;
12075 let Predicates = [HasAVX512, HasVLX] in {
12076 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12077 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12078 EVEX, VVVV, EVEX_V128;
12079 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12080 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12081 EVEX, VVVV, EVEX_V256;
12085 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12086 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12087 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
12088 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12089 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12090 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
12091 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12092 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12093 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12094 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
12096 // Patterns used to select SSE scalar fp arithmetic instructions from
12099 // (1) a scalar fp operation followed by a blend
12101 // The effect is that the backend no longer emits unnecessary vector
12102 // insert instructions immediately after SSE scalar fp instructions
12103 // like addss or mulss.
12105 // For example, given the following code:
12106 // __m128 foo(__m128 A, __m128 B) {
12111 // Previously we generated:
12112 // addss %xmm0, %xmm1
12113 // movss %xmm1, %xmm0
12115 // We now generate:
12116 // addss %xmm1, %xmm0
12118 // (2) a vector packed single/double fp operation followed by a vector insert
12120 // The effect is that the backend converts the packed fp instruction
12121 // followed by a vector insert into a single SSE scalar fp instruction.
12123 // For example, given the following code:
12124 // __m128 foo(__m128 A, __m128 B) {
12125 // __m128 C = A + B;
12126 // return (__m128) {c[0], a[1], a[2], a[3]};
12129 // Previously we generated:
12130 // addps %xmm0, %xmm1
12131 // movss %xmm1, %xmm0
12133 // We now generate:
12134 // addss %xmm1, %xmm0
12136 // TODO: Some canonicalization in lowering would simplify the number of
12137 // patterns we have to try to match.
12138 multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12139 string OpcPrefix, SDNode MoveNode,
12140 X86VectorVTInfo _, PatLeaf ZeroFP> {
12141 let Predicates = [HasAVX512] in {
12142 // extracted scalar math op with insert via movss
12143 def : Pat<(MoveNode
12144 (_.VT VR128X:$dst),
12145 (_.VT (scalar_to_vector
12146 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12148 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12149 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12150 def : Pat<(MoveNode
12151 (_.VT VR128X:$dst),
12152 (_.VT (scalar_to_vector
12153 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12154 (_.ScalarLdFrag addr:$src))))),
12155 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12157 // extracted masked scalar math op with insert via movss
12158 def : Pat<(MoveNode (_.VT VR128X:$src1),
12160 (X86selects_mask VK1WM:$mask,
12162 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12165 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12166 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12167 VK1WM:$mask, _.VT:$src1,
12168 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12169 def : Pat<(MoveNode (_.VT VR128X:$src1),
12171 (X86selects_mask VK1WM:$mask,
12173 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12174 (_.ScalarLdFrag addr:$src2)),
12176 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12177 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12178 VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12180 // extracted masked scalar math op with insert via movss
12181 def : Pat<(MoveNode (_.VT VR128X:$src1),
12183 (X86selects_mask VK1WM:$mask,
12185 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12186 _.FRC:$src2), (_.EltVT ZeroFP)))),
12187 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12188 VK1WM:$mask, _.VT:$src1,
12189 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12190 def : Pat<(MoveNode (_.VT VR128X:$src1),
12192 (X86selects_mask VK1WM:$mask,
12194 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12195 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12196 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12200 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12201 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12202 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12203 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12205 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12206 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12207 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12208 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12210 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12211 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12212 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12213 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12215 multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12216 SDNode Move, X86VectorVTInfo _> {
12217 let Predicates = [HasAVX512] in {
12218 def : Pat<(_.VT (Move _.VT:$dst,
12219 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12220 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12224 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12225 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12226 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12228 //===----------------------------------------------------------------------===//
12229 // AES instructions
12230 //===----------------------------------------------------------------------===//
12232 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12233 let Predicates = [HasVLX, HasVAES] in {
12234 defm Z128 : AESI_binop_rm_int<Op, OpStr,
12235 !cast<Intrinsic>(IntPrefix),
12236 loadv2i64, 0, VR128X, i128mem>,
12237 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
12238 defm Z256 : AESI_binop_rm_int<Op, OpStr,
12239 !cast<Intrinsic>(IntPrefix#"_256"),
12240 loadv4i64, 0, VR256X, i256mem>,
12241 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
12243 let Predicates = [HasAVX512, HasVAES] in
12244 defm Z : AESI_binop_rm_int<Op, OpStr,
12245 !cast<Intrinsic>(IntPrefix#"_512"),
12246 loadv8i64, 0, VR512, i512mem>,
12247 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
12250 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12251 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12252 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12253 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12255 //===----------------------------------------------------------------------===//
12256 // PCLMUL instructions - Carry less multiplication
12257 //===----------------------------------------------------------------------===//
12259 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12260 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12261 EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
12263 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12264 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12265 EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
12267 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12268 int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256,
12269 EVEX_CD8<64, CD8VF>, WIG;
12273 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12274 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12275 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12277 //===----------------------------------------------------------------------===//
12279 //===----------------------------------------------------------------------===//
12281 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12282 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12283 let Constraints = "$src1 = $dst",
12284 ExeDomain = VTI.ExeDomain in {
12285 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12286 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12287 "$src3, $src2", "$src2, $src3",
12288 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12289 T8, PD, EVEX, VVVV, Sched<[sched]>;
12290 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12291 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12292 "$src3, $src2", "$src2, $src3",
12293 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12294 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12295 T8, PD, EVEX, VVVV,
12296 Sched<[sched.Folded, sched.ReadAfterFold]>;
12300 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12301 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12302 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12303 let Constraints = "$src1 = $dst",
12304 ExeDomain = VTI.ExeDomain in
12305 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12306 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12307 "${src3}"#VTI.BroadcastStr#", $src2",
12308 "$src2, ${src3}"#VTI.BroadcastStr,
12309 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12310 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12311 T8, PD, EVEX, VVVV, EVEX_B,
12312 Sched<[sched.Folded, sched.ReadAfterFold]>;
12315 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12316 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12317 let Predicates = [HasVBMI2] in
12318 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12320 let Predicates = [HasVBMI2, HasVLX] in {
12321 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12323 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12328 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12329 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12330 let Predicates = [HasVBMI2] in
12331 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12333 let Predicates = [HasVBMI2, HasVLX] in {
12334 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12336 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12340 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12341 SDNode OpNode, X86SchedWriteWidths sched> {
12342 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12343 avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12344 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12345 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12346 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12347 avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
12350 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12351 SDNode OpNode, X86SchedWriteWidths sched> {
12352 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12353 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12354 REX_W, EVEX_CD8<16, CD8VF>;
12355 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12356 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
12357 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12358 sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
12362 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12363 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12364 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12365 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12368 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12369 avx512vl_i8_info, HasVBMI2>, EVEX;
12370 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12371 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12373 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12374 avx512vl_i8_info, HasVBMI2>, EVEX;
12375 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12376 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12378 //===----------------------------------------------------------------------===//
12380 //===----------------------------------------------------------------------===//
12382 let Constraints = "$src1 = $dst" in
12383 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12384 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12385 bit IsCommutable> {
12386 let ExeDomain = VTI.ExeDomain in {
12387 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12388 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12389 "$src3, $src2", "$src2, $src3",
12390 (VTI.VT (OpNode VTI.RC:$src1,
12391 VTI.RC:$src2, VTI.RC:$src3)),
12392 IsCommutable, IsCommutable>,
12393 EVEX, VVVV, T8, PD, Sched<[sched]>;
12394 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12395 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12396 "$src3, $src2", "$src2, $src3",
12397 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12398 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12399 EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD,
12400 Sched<[sched.Folded, sched.ReadAfterFold,
12401 sched.ReadAfterFold]>;
12402 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12403 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12404 OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12405 "$src2, ${src3}"#VTI.BroadcastStr,
12406 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12407 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12408 EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B,
12409 T8, PD, Sched<[sched.Folded, sched.ReadAfterFold,
12410 sched.ReadAfterFold]>;
12414 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12415 X86SchedWriteWidths sched, bit IsCommutable> {
12416 let Predicates = [HasVNNI] in
12417 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12418 IsCommutable>, EVEX_V512;
12419 let Predicates = [HasVNNI, HasVLX] in {
12420 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12421 IsCommutable>, EVEX_V256;
12422 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12423 IsCommutable>, EVEX_V128;
12427 // FIXME: Is there a better scheduler class for VPDP?
12428 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12429 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12430 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12431 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12433 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12434 let Predicates = [HasVNNI] in {
12435 def : Pat<(v16i32 (add VR512:$src1,
12436 (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12437 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12438 def : Pat<(v16i32 (add VR512:$src1,
12439 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12440 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12442 let Predicates = [HasVNNI,HasVLX] in {
12443 def : Pat<(v8i32 (add VR256X:$src1,
12444 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12445 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12446 def : Pat<(v8i32 (add VR256X:$src1,
12447 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12448 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12449 def : Pat<(v4i32 (add VR128X:$src1,
12450 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12451 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12452 def : Pat<(v4i32 (add VR128X:$src1,
12453 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12454 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12457 //===----------------------------------------------------------------------===//
12459 //===----------------------------------------------------------------------===//
12461 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12462 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12463 avx512vl_i8_info, HasBITALG>;
12464 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12465 avx512vl_i16_info, HasBITALG>, REX_W;
12467 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12468 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12470 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12471 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12472 (ins VTI.RC:$src1, VTI.RC:$src2),
12474 "$src2, $src1", "$src1, $src2",
12475 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12476 (VTI.VT VTI.RC:$src2)),
12477 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12478 (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD,
12480 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12481 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12483 "$src2, $src1", "$src1, $src2",
12484 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12485 (VTI.VT (VTI.LdFrag addr:$src2))),
12486 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12487 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12488 EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD,
12489 Sched<[sched.Folded, sched.ReadAfterFold]>;
12492 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12493 let Predicates = [HasBITALG] in
12494 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12495 let Predicates = [HasBITALG, HasVLX] in {
12496 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12497 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12501 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12502 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12504 //===----------------------------------------------------------------------===//
12506 //===----------------------------------------------------------------------===//
12508 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12509 X86SchedWriteWidths sched> {
12510 let Predicates = [HasGFNI, HasAVX512] in
12511 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12513 let Predicates = [HasGFNI, HasVLX] in {
12514 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12516 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12521 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12523 EVEX_CD8<8, CD8VF>, T8;
12525 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12526 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12527 X86VectorVTInfo BcstVTI>
12528 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12529 let ExeDomain = VTI.ExeDomain in
12530 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12531 (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3),
12532 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12533 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12534 (OpNode (VTI.VT VTI.RC:$src1),
12535 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12536 (i8 timm:$src3))>, EVEX_B,
12537 Sched<[sched.Folded, sched.ReadAfterFold]>;
12540 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12541 X86SchedWriteWidths sched> {
12542 let Predicates = [HasGFNI, HasAVX512] in
12543 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12544 v64i8_info, v8i64_info>, EVEX_V512;
12545 let Predicates = [HasGFNI, HasVLX] in {
12546 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12547 v32i8x_info, v4i64x_info>, EVEX_V256;
12548 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12549 v16i8x_info, v2i64x_info>, EVEX_V128;
12553 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12554 X86GF2P8affineinvqb, SchedWriteVecIMul>,
12555 EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12556 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12557 X86GF2P8affineqb, SchedWriteVecIMul>,
12558 EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12561 //===----------------------------------------------------------------------===//
12563 //===----------------------------------------------------------------------===//
12565 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12566 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12567 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12568 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12569 "v4fmaddps", "$src3, $src2", "$src2, $src3",
12570 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12571 Sched<[SchedWriteFMA.ZMM.Folded]>;
12573 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12574 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12575 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12576 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12577 Sched<[SchedWriteFMA.ZMM.Folded]>;
12579 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12580 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12581 "v4fmaddss", "$src3, $src2", "$src2, $src3",
12582 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12583 Sched<[SchedWriteFMA.Scl.Folded]>;
12585 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12586 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12587 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12588 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12589 Sched<[SchedWriteFMA.Scl.Folded]>;
12592 //===----------------------------------------------------------------------===//
12594 //===----------------------------------------------------------------------===//
12596 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12597 Constraints = "$src1 = $dst" in {
12598 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12599 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12600 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12601 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12602 Sched<[SchedWriteFMA.ZMM.Folded]>;
12604 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12605 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12606 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12607 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12608 Sched<[SchedWriteFMA.ZMM.Folded]>;
12611 let hasSideEffects = 0 in {
12612 let mayStore = 1, SchedRW = [WriteFStoreX] in
12613 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12614 let mayLoad = 1, SchedRW = [WriteFLoadX] in
12615 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12618 //===----------------------------------------------------------------------===//
12620 //===----------------------------------------------------------------------===//
12622 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12623 def rr : I<0x68, MRMSrcReg,
12624 (outs _.KRPC:$dst),
12625 (ins _.RC:$src1, _.RC:$src2),
12626 !strconcat("vp2intersect", _.Suffix,
12627 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12628 [(set _.KRPC:$dst, (X86vp2intersect
12629 _.RC:$src1, (_.VT _.RC:$src2)))]>,
12630 EVEX, VVVV, T8, XD, Sched<[sched]>;
12632 def rm : I<0x68, MRMSrcMem,
12633 (outs _.KRPC:$dst),
12634 (ins _.RC:$src1, _.MemOp:$src2),
12635 !strconcat("vp2intersect", _.Suffix,
12636 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12637 [(set _.KRPC:$dst, (X86vp2intersect
12638 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12639 EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>,
12640 Sched<[sched.Folded, sched.ReadAfterFold]>;
12642 def rmb : I<0x68, MRMSrcMem,
12643 (outs _.KRPC:$dst),
12644 (ins _.RC:$src1, _.ScalarMemOp:$src2),
12645 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12646 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12647 [(set _.KRPC:$dst, (X86vp2intersect
12648 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12649 EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12650 Sched<[sched.Folded, sched.ReadAfterFold]>;
12653 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12654 let Predicates = [HasAVX512, HasVP2INTERSECT] in
12655 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12657 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12658 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12659 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12663 let ExeDomain = SSEPackedInt in {
12664 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12665 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
12668 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12669 X86SchedWriteWidths sched,
12670 AVX512VLVectorVTInfo _SrcVTInfo,
12671 AVX512VLVectorVTInfo _DstVTInfo,
12672 SDNode OpNode, Predicate prd,
12673 bit IsCommutable = 0> {
12674 let Predicates = [prd] in
12675 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12676 _SrcVTInfo.info512, _DstVTInfo.info512,
12677 _SrcVTInfo.info512, IsCommutable>,
12678 EVEX_V512, EVEX_CD8<32, CD8VF>;
12679 let Predicates = [HasVLX, prd] in {
12680 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12681 _SrcVTInfo.info256, _DstVTInfo.info256,
12682 _SrcVTInfo.info256, IsCommutable>,
12683 EVEX_V256, EVEX_CD8<32, CD8VF>;
12684 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12685 _SrcVTInfo.info128, _DstVTInfo.info128,
12686 _SrcVTInfo.info128, IsCommutable>,
12687 EVEX_V128, EVEX_CD8<32, CD8VF>;
12691 let ExeDomain = SSEPackedSingle in
12692 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12693 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12694 avx512vl_f32_info, avx512vl_bf16_info,
12695 X86cvtne2ps2bf16, HasBF16, 0>, T8, XD;
12697 // Truncate Float to BFloat16
12698 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12699 X86SchedWriteWidths sched> {
12700 let ExeDomain = SSEPackedSingle in {
12701 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12702 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
12703 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12705 let Predicates = [HasBF16, HasVLX] in {
12706 let Uses = []<Register>, mayRaiseFPException = 0 in {
12707 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
12708 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12710 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
12711 X86cvtneps2bf16, X86cvtneps2bf16,
12712 sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12714 } // Predicates = [HasBF16, HasVLX]
12715 } // ExeDomain = SSEPackedSingle
12717 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12718 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12720 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12721 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12722 f128mem:$src), 0, "intel">;
12723 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12724 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12726 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12727 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12728 f256mem:$src), 0, "intel">;
12731 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12732 SchedWriteCvtPD2PS>, T8, XS,
12733 EVEX_CD8<32, CD8VF>;
12735 let Predicates = [HasBF16, HasVLX] in {
12736 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12737 // patterns have been disabled with null_frag.
12738 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12739 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12740 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
12742 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12743 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
12745 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12747 def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12748 (VCVTNEPS2BF16Z128rm addr:$src)>;
12749 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
12751 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12752 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
12754 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12756 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
12757 (X86VBroadcastld32 addr:$src)))),
12758 (VCVTNEPS2BF16Z128rmb addr:$src)>;
12759 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12760 (v8bf16 VR128X:$src0), VK4WM:$mask),
12761 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12762 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12763 v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
12764 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12766 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12767 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12768 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12769 (VCVTNEPS2BF16Z128rm addr:$src)>;
12771 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12772 (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12773 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12774 (VCVTNEPS2BF16Z256rm addr:$src)>;
12776 def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12777 (VPBROADCASTWZ128rm addr:$src)>;
12778 def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12779 (VPBROADCASTWZ256rm addr:$src)>;
12781 def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12782 (VPBROADCASTWZ128rr VR128X:$src)>;
12783 def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12784 (VPBROADCASTWZ256rr VR128X:$src)>;
12786 def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
12787 (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12788 def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
12789 (VCVTNEPS2BF16Z256rm addr:$src)>;
12791 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12794 let Predicates = [HasBF16] in {
12795 def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12796 (VPBROADCASTWZrm addr:$src)>;
12798 def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12799 (VPBROADCASTWZrr VR128X:$src)>;
12801 def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
12802 (VCVTNEPS2BF16Zrr VR512:$src)>;
12803 def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
12804 (VCVTNEPS2BF16Zrm addr:$src)>;
12805 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12808 let Constraints = "$src1 = $dst" in {
12809 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12810 X86FoldableSchedWrite sched,
12811 X86VectorVTInfo _, X86VectorVTInfo src_v> {
12812 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12813 (ins src_v.RC:$src2, src_v.RC:$src3),
12814 OpcodeStr, "$src3, $src2", "$src2, $src3",
12815 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12816 EVEX, VVVV, Sched<[sched]>;
12818 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12819 (ins src_v.RC:$src2, src_v.MemOp:$src3),
12820 OpcodeStr, "$src3, $src2", "$src2, $src3",
12821 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12822 (src_v.LdFrag addr:$src3)))>, EVEX, VVVV,
12823 Sched<[sched.Folded, sched.ReadAfterFold]>;
12825 let mayLoad = 1, hasSideEffects = 0 in
12826 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12827 (ins src_v.RC:$src2, f32mem:$src3),
12829 !strconcat("${src3}", _.BroadcastStr,", $src2"),
12830 !strconcat("$src2, ${src3}", _.BroadcastStr),
12832 EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
12835 } // Constraints = "$src1 = $dst"
12837 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12838 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12839 AVX512VLVectorVTInfo src_v, Predicate prd> {
12840 let Predicates = [prd] in {
12841 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12842 src_v.info512>, EVEX_V512;
12844 let Predicates = [HasVLX, prd] in {
12845 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12846 src_v.info256>, EVEX_V256;
12847 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12848 src_v.info128>, EVEX_V128;
12852 let ExeDomain = SSEPackedSingle in
12853 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12854 avx512vl_f32_info, avx512vl_bf16_info,
12855 HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>;
12857 //===----------------------------------------------------------------------===//
12859 //===----------------------------------------------------------------------===//
12861 let Predicates = [HasFP16] in {
12862 // Move word ( r/m16) to Packed word
12863 def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12864 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12865 def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12866 "vmovw\t{$src, $dst|$dst, $src}",
12868 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12869 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12871 def : Pat<(f16 (bitconvert GR16:$src)),
12872 (f16 (COPY_TO_REGCLASS
12874 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12876 def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12877 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12878 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12879 (VMOVW2SHrr GR32:$src)>;
12880 // FIXME: We should really find a way to improve these patterns.
12881 def : Pat<(v8i32 (X86vzmovl
12882 (insert_subvector undef,
12883 (v4i32 (scalar_to_vector
12884 (and GR32:$src, 0xffff))),
12886 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12887 def : Pat<(v16i32 (X86vzmovl
12888 (insert_subvector undef,
12889 (v4i32 (scalar_to_vector
12890 (and GR32:$src, 0xffff))),
12892 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12894 def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
12895 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12897 // AVX 128-bit movw instruction write zeros in the high 128-bit part.
12898 def : Pat<(v8i16 (X86vzload16 addr:$src)),
12899 (VMOVWrm addr:$src)>;
12900 def : Pat<(v16i16 (X86vzload16 addr:$src)),
12901 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12903 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12904 def : Pat<(v32i16 (X86vzload16 addr:$src)),
12905 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12907 def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12908 (VMOVWrm addr:$src)>;
12909 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12910 (VMOVWrm addr:$src)>;
12911 def : Pat<(v8i32 (X86vzmovl
12912 (insert_subvector undef,
12913 (v4i32 (scalar_to_vector
12914 (i32 (zextloadi16 addr:$src)))),
12916 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12917 def : Pat<(v16i32 (X86vzmovl
12918 (insert_subvector undef,
12919 (v4i32 (scalar_to_vector
12920 (i32 (zextloadi16 addr:$src)))),
12922 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12924 // Move word from xmm register to r/m16
12925 def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12926 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12927 def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs),
12928 (ins i16mem:$dst, VR128X:$src),
12929 "vmovw\t{$src, $dst|$dst, $src}",
12930 [(store (i16 (extractelt (v8i16 VR128X:$src),
12931 (iPTR 0))), addr:$dst)]>,
12932 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12934 def : Pat<(i16 (bitconvert FR16X:$src)),
12935 (i16 (EXTRACT_SUBREG
12936 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12938 def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12939 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12941 // Allow "vmovw" to use GR64
12942 let hasSideEffects = 0 in {
12943 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12944 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
12945 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12946 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
12950 // Convert 16-bit float to i16/u16
12951 multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12952 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12953 AVX512VLVectorVTInfo _Dst,
12954 AVX512VLVectorVTInfo _Src,
12955 X86SchedWriteWidths sched> {
12956 let Predicates = [HasFP16] in {
12957 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12958 OpNode, MaskOpNode, sched.ZMM>,
12959 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
12960 OpNodeRnd, sched.ZMM>, EVEX_V512;
12962 let Predicates = [HasFP16, HasVLX] in {
12963 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12964 OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12965 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12966 OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12970 // Convert 16-bit float to i16/u16 truncate
12971 multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12972 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12973 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
12974 X86SchedWriteWidths sched> {
12975 let Predicates = [HasFP16] in {
12976 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12977 OpNode, MaskOpNode, sched.ZMM>,
12978 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
12979 OpNodeRnd, sched.ZMM>, EVEX_V512;
12981 let Predicates = [HasFP16, HasVLX] in {
12982 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12983 OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12984 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12985 OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12989 defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
12990 X86cvtp2UIntRnd, avx512vl_i16_info,
12991 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
12992 T_MAP5, EVEX_CD8<16, CD8VF>;
12993 defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
12994 X86VUintToFpRnd, avx512vl_f16_info,
12995 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
12996 T_MAP5, XD, EVEX_CD8<16, CD8VF>;
12997 defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
12998 X86cvttp2si, X86cvttp2siSAE,
12999 avx512vl_i16_info, avx512vl_f16_info,
13000 SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>;
13001 defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13002 X86cvttp2ui, X86cvttp2uiSAE,
13003 avx512vl_i16_info, avx512vl_f16_info,
13004 SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>;
13005 defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13006 X86cvtp2IntRnd, avx512vl_i16_info,
13007 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13008 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
13009 defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13010 X86VSintToFpRnd, avx512vl_f16_info,
13011 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13012 T_MAP5, XS, EVEX_CD8<16, CD8VF>;
13014 // Convert Half to Signed/Unsigned Doubleword
13015 multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13016 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13017 X86SchedWriteWidths sched> {
13018 let Predicates = [HasFP16] in {
13019 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13020 MaskOpNode, sched.ZMM>,
13021 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13022 OpNodeRnd, sched.ZMM>, EVEX_V512;
13024 let Predicates = [HasFP16, HasVLX] in {
13025 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13026 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13027 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13028 MaskOpNode, sched.YMM>, EVEX_V256;
13032 // Convert Half to Signed/Unsigned Doubleword with truncation
13033 multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13034 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13035 X86SchedWriteWidths sched> {
13036 let Predicates = [HasFP16] in {
13037 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13038 MaskOpNode, sched.ZMM>,
13039 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13040 OpNodeRnd, sched.ZMM>, EVEX_V512;
13042 let Predicates = [HasFP16, HasVLX] in {
13043 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13044 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13045 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13046 MaskOpNode, sched.YMM>, EVEX_V256;
13051 defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13052 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13053 EVEX_CD8<16, CD8VH>;
13054 defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13055 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5,
13056 EVEX_CD8<16, CD8VH>;
13058 defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13059 X86cvttp2si, X86cvttp2siSAE,
13060 SchedWriteCvtPS2DQ>, T_MAP5, XS,
13061 EVEX_CD8<16, CD8VH>;
13063 defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13064 X86cvttp2ui, X86cvttp2uiSAE,
13065 SchedWriteCvtPS2DQ>, T_MAP5,
13066 EVEX_CD8<16, CD8VH>;
13068 // Convert Half to Signed/Unsigned Quardword
13069 multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13070 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13071 X86SchedWriteWidths sched> {
13072 let Predicates = [HasFP16] in {
13073 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13074 MaskOpNode, sched.ZMM>,
13075 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13076 OpNodeRnd, sched.ZMM>, EVEX_V512;
13078 let Predicates = [HasFP16, HasVLX] in {
13079 // Explicitly specified broadcast string, since we take only 2 elements
13080 // from v8f16x_info source
13081 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13082 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13084 // Explicitly specified broadcast string, since we take only 4 elements
13085 // from v8f16x_info source
13086 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13087 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13092 // Convert Half to Signed/Unsigned Quardword with truncation
13093 multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13094 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13095 X86SchedWriteWidths sched> {
13096 let Predicates = [HasFP16] in {
13097 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13098 MaskOpNode, sched.ZMM>,
13099 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13100 OpNodeRnd, sched.ZMM>, EVEX_V512;
13102 let Predicates = [HasFP16, HasVLX] in {
13103 // Explicitly specified broadcast string, since we take only 2 elements
13104 // from v8f16x_info source
13105 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13106 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13107 // Explicitly specified broadcast string, since we take only 4 elements
13108 // from v8f16x_info source
13109 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13110 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13114 defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13115 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13116 EVEX_CD8<16, CD8VQ>;
13118 defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13119 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13120 EVEX_CD8<16, CD8VQ>;
13122 defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13123 X86cvttp2si, X86cvttp2siSAE,
13124 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13125 EVEX_CD8<16, CD8VQ>;
13127 defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13128 X86cvttp2ui, X86cvttp2uiSAE,
13129 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13130 EVEX_CD8<16, CD8VQ>;
13132 // Convert Signed/Unsigned Quardword to Half
13133 multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13134 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13135 X86SchedWriteWidths sched> {
13136 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13137 // 512 memory forms of these instructions in Asm Parcer. They have the same
13138 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13139 // due to the same reason.
13140 let Predicates = [HasFP16] in {
13141 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13142 MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13143 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13144 OpNodeRnd, sched.ZMM>, EVEX_V512;
13146 let Predicates = [HasFP16, HasVLX] in {
13147 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13148 null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13149 i128mem, VK2WM>, EVEX_V128;
13150 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13151 null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13152 i256mem, VK4WM>, EVEX_V256;
13155 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13156 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13157 VR128X:$src), 0, "att">;
13158 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13159 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13160 VK2WM:$mask, VR128X:$src), 0, "att">;
13161 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13162 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13163 VK2WM:$mask, VR128X:$src), 0, "att">;
13164 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13165 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13166 i64mem:$src), 0, "att">;
13167 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13168 "$dst {${mask}}, ${src}{1to2}}",
13169 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13170 VK2WM:$mask, i64mem:$src), 0, "att">;
13171 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13172 "$dst {${mask}} {z}, ${src}{1to2}}",
13173 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13174 VK2WM:$mask, i64mem:$src), 0, "att">;
13176 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13177 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13178 VR256X:$src), 0, "att">;
13179 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13180 "$dst {${mask}}, $src}",
13181 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13182 VK4WM:$mask, VR256X:$src), 0, "att">;
13183 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13184 "$dst {${mask}} {z}, $src}",
13185 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13186 VK4WM:$mask, VR256X:$src), 0, "att">;
13187 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13188 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13189 i64mem:$src), 0, "att">;
13190 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13191 "$dst {${mask}}, ${src}{1to4}}",
13192 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13193 VK4WM:$mask, i64mem:$src), 0, "att">;
13194 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13195 "$dst {${mask}} {z}, ${src}{1to4}}",
13196 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13197 VK4WM:$mask, i64mem:$src), 0, "att">;
13199 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13200 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13201 VR512:$src), 0, "att">;
13202 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13203 "$dst {${mask}}, $src}",
13204 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13205 VK8WM:$mask, VR512:$src), 0, "att">;
13206 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13207 "$dst {${mask}} {z}, $src}",
13208 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13209 VK8WM:$mask, VR512:$src), 0, "att">;
13210 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13211 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13212 i64mem:$src), 0, "att">;
13213 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13214 "$dst {${mask}}, ${src}{1to8}}",
13215 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13216 VK8WM:$mask, i64mem:$src), 0, "att">;
13217 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13218 "$dst {${mask}} {z}, ${src}{1to8}}",
13219 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13220 VK8WM:$mask, i64mem:$src), 0, "att">;
13223 defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13224 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5,
13225 EVEX_CD8<64, CD8VF>;
13227 defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13228 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD,
13229 EVEX_CD8<64, CD8VF>;
13231 // Convert half to signed/unsigned int 32/64
13232 defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13233 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13234 T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13235 defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13236 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13237 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13238 defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13239 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13240 T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13241 defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13242 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13243 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13245 defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13246 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13247 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13248 defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13249 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13250 "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13251 defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13252 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13253 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13254 defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13255 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13256 "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13258 let Predicates = [HasFP16] in {
13259 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13260 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13261 T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13262 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13263 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13264 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13265 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13266 v8f16x_info, i32mem, loadi32,
13267 "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13268 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13269 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13270 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13271 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13272 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13274 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13275 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13278 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13279 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13280 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13281 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13283 def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13284 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13285 def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13286 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13288 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13289 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13290 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13291 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13293 def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13294 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13295 def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13296 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13298 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13299 // which produce unnecessary vmovsh instructions
13300 def : Pat<(v8f16 (X86Movsh
13301 (v8f16 VR128X:$dst),
13302 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13303 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13305 def : Pat<(v8f16 (X86Movsh
13306 (v8f16 VR128X:$dst),
13307 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13308 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13310 def : Pat<(v8f16 (X86Movsh
13311 (v8f16 VR128X:$dst),
13312 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13313 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13315 def : Pat<(v8f16 (X86Movsh
13316 (v8f16 VR128X:$dst),
13317 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13318 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13320 def : Pat<(v8f16 (X86Movsh
13321 (v8f16 VR128X:$dst),
13322 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13323 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13325 def : Pat<(v8f16 (X86Movsh
13326 (v8f16 VR128X:$dst),
13327 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13328 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13330 def : Pat<(v8f16 (X86Movsh
13331 (v8f16 VR128X:$dst),
13332 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13333 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13335 def : Pat<(v8f16 (X86Movsh
13336 (v8f16 VR128X:$dst),
13337 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13338 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13339 } // Predicates = [HasFP16]
13341 let Predicates = [HasFP16, HasVLX] in {
13342 // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13343 // patterns have been disabled with null_frag.
13344 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13345 (VCVTQQ2PHZ256rr VR256X:$src)>;
13346 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13348 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13349 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13351 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13353 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13354 (VCVTQQ2PHZ256rm addr:$src)>;
13355 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13357 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13358 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13360 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13362 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13363 (VCVTQQ2PHZ256rmb addr:$src)>;
13364 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13365 (v8f16 VR128X:$src0), VK4WM:$mask),
13366 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13367 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13368 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13369 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13371 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13372 (VCVTQQ2PHZ128rr VR128X:$src)>;
13373 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13375 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13376 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13378 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13380 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13381 (VCVTQQ2PHZ128rm addr:$src)>;
13382 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13384 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13385 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13387 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13389 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13390 (VCVTQQ2PHZ128rmb addr:$src)>;
13391 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13392 (v8f16 VR128X:$src0), VK2WM:$mask),
13393 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13394 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13395 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13396 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13398 // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13399 // patterns have been disabled with null_frag.
13400 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13401 (VCVTUQQ2PHZ256rr VR256X:$src)>;
13402 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13404 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13405 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13407 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13409 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13410 (VCVTUQQ2PHZ256rm addr:$src)>;
13411 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13413 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13414 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13416 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13418 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13419 (VCVTUQQ2PHZ256rmb addr:$src)>;
13420 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13421 (v8f16 VR128X:$src0), VK4WM:$mask),
13422 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13423 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13424 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13425 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13427 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13428 (VCVTUQQ2PHZ128rr VR128X:$src)>;
13429 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13431 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13432 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13434 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13436 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13437 (VCVTUQQ2PHZ128rm addr:$src)>;
13438 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13440 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13441 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13443 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13445 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13446 (VCVTUQQ2PHZ128rmb addr:$src)>;
13447 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13448 (v8f16 VR128X:$src0), VK2WM:$mask),
13449 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13450 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13451 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13452 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13455 let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13456 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13457 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13458 (ins _.RC:$src2, _.RC:$src3),
13459 OpcodeStr, "$src3, $src2", "$src2, $src3",
13460 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV;
13462 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13463 (ins _.RC:$src2, _.MemOp:$src3),
13464 OpcodeStr, "$src3, $src2", "$src2, $src3",
13465 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV;
13467 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13468 (ins _.RC:$src2, _.ScalarMemOp:$src3),
13469 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13470 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV;
13472 } // Constraints = "@earlyclobber $dst, $src1 = $dst"
13474 multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13475 X86VectorVTInfo _> {
13476 let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13477 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13478 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13479 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13480 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13481 EVEX, VVVV, EVEX_B, EVEX_RC;
13485 multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13486 let Predicates = [HasFP16] in {
13487 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13488 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13489 EVEX_V512, Sched<[WriteFMAZ]>;
13491 let Predicates = [HasVLX, HasFP16] in {
13492 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13493 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13497 multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13498 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13499 let Predicates = [HasFP16] in {
13500 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13501 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13502 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13503 "", "@earlyclobber $dst">, EVEX_V512;
13505 let Predicates = [HasVLX, HasFP16] in {
13506 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13507 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13508 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13509 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13514 let Uses = [MXCSR] in {
13515 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13516 T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13517 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13518 T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13520 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13521 x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13522 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13523 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13527 multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13528 bit IsCommutable> {
13529 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13530 defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13531 (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13532 "$src3, $src2", "$src2, $src3",
13533 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13534 Sched<[WriteFMAX]>;
13535 defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13536 (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13537 "$src3, $src2", "$src2, $src3",
13538 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13539 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13540 defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13541 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13542 "$rc, $src3, $src2", "$src2, $src3, $rc",
13543 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13544 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13548 multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13549 SDNode OpNodeRnd, bit IsCommutable> {
13550 let Predicates = [HasFP16] in {
13551 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13552 (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13553 "$src2, $src1", "$src1, $src2",
13554 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13555 IsCommutable, IsCommutable, IsCommutable,
13556 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13557 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13558 (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13559 "$src2, $src1", "$src1, $src2",
13560 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13561 0, 0, 0, X86selects, "@earlyclobber $dst">,
13562 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13563 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13564 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13565 "$rc, $src2, $src1", "$src1, $src2, $rc",
13566 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13567 0, 0, 0, X86selects, "@earlyclobber $dst">,
13568 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13572 let Uses = [MXCSR] in {
13573 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13574 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13575 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13576 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13578 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13579 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13580 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13581 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;