1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // This multiclass generates the masking variants from the non-masking
16 // variant. It only provides the assembly pieces for the masking variants.
17 // It assumes custom ISel patterns for masking which can be provided as
18 // template arguments.
19 multiclass AVX512_maskable_custom<bits<8> O, Format F,
21 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
23 string AttSrcAsm, string IntelSrcAsm,
25 list<dag> MaskingPattern,
26 list<dag> ZeroMaskingPattern,
27 string MaskingConstraint = "",
29 bit IsKCommutable = 0,
30 bit IsKZCommutable = IsCommutable,
31 string ClobberConstraint = ""> {
32 let isCommutable = IsCommutable, Constraints = ClobberConstraint in
33 def NAME: AVX512<O, F, Outs, Ins,
34 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
35 "$dst, "#IntelSrcAsm#"}",
38 // Prefer over VMOV*rrk Pat<>
39 let isCommutable = IsKCommutable in
40 def NAME#k: AVX512<O, F, Outs, MaskingIns,
41 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
42 "$dst {${mask}}, "#IntelSrcAsm#"}",
45 // In case of the 3src subclass this is overridden with a let.
46 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
47 !if(!eq(MaskingConstraint, ""), ClobberConstraint,
48 !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
51 // Zero mask does not add any restrictions to commute operands transformation.
52 // So, it is Ok to use IsCommutable instead of IsKCommutable.
53 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
54 Constraints = ClobberConstraint in
55 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
56 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
57 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
63 // Common base class of AVX512_maskable and AVX512_maskable_3src.
64 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
66 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
68 string AttSrcAsm, string IntelSrcAsm,
69 dag RHS, dag MaskingRHS,
70 SDPatternOperator Select = vselect_mask,
71 string MaskingConstraint = "",
73 bit IsKCommutable = 0,
74 bit IsKZCommutable = IsCommutable,
75 string ClobberConstraint = ""> :
76 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
77 AttSrcAsm, IntelSrcAsm,
78 [(set _.RC:$dst, RHS)],
79 [(set _.RC:$dst, MaskingRHS)],
81 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
82 MaskingConstraint, IsCommutable,
83 IsKCommutable, IsKZCommutable, ClobberConstraint>;
85 // This multiclass generates the unconditional/non-masking, the masking and
86 // the zero-masking variant of the vector instruction. In the masking case, the
87 // preserved vector elements come from a new dummy input operand tied to $dst.
88 // This version uses a separate dag for non-masking and masking.
89 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
90 dag Outs, dag Ins, string OpcodeStr,
91 string AttSrcAsm, string IntelSrcAsm,
93 string ClobberConstraint = "",
94 bit IsCommutable = 0, bit IsKCommutable = 0,
95 bit IsKZCommutable = IsCommutable> :
96 AVX512_maskable_custom<O, F, Outs, Ins,
97 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
98 !con((ins _.KRCWM:$mask), Ins),
99 OpcodeStr, AttSrcAsm, IntelSrcAsm,
100 [(set _.RC:$dst, RHS)],
102 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
104 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
105 "$src0 = $dst", IsCommutable, IsKCommutable,
106 IsKZCommutable, ClobberConstraint>;
108 // This multiclass generates the unconditional/non-masking, the masking and
109 // the zero-masking variant of the vector instruction. In the masking case, the
110 // preserved vector elements come from a new dummy input operand tied to $dst.
111 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
112 dag Outs, dag Ins, string OpcodeStr,
113 string AttSrcAsm, string IntelSrcAsm,
115 bit IsCommutable = 0, bit IsKCommutable = 0,
116 bit IsKZCommutable = IsCommutable,
117 SDPatternOperator Select = vselect_mask,
118 string ClobberConstraint = ""> :
119 AVX512_maskable_common<O, F, _, Outs, Ins,
120 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
121 !con((ins _.KRCWM:$mask), Ins),
122 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
123 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
124 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
125 IsKZCommutable, ClobberConstraint>;
127 // This multiclass generates the unconditional/non-masking, the masking and
128 // the zero-masking variant of the scalar instruction.
129 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
130 dag Outs, dag Ins, string OpcodeStr,
131 string AttSrcAsm, string IntelSrcAsm,
133 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
134 RHS, 0, 0, 0, X86selects_mask>;
136 // Similar to AVX512_maskable but in this case one of the source operands
137 // ($src1) is already tied to $dst so we just use that for the preserved
138 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
140 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
141 dag Outs, dag NonTiedIns, string OpcodeStr,
142 string AttSrcAsm, string IntelSrcAsm,
144 bit IsCommutable = 0,
145 bit IsKCommutable = 0,
146 SDPatternOperator Select = vselect_mask,
148 AVX512_maskable_common<O, F, _, Outs,
149 !con((ins _.RC:$src1), NonTiedIns),
150 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
151 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
152 OpcodeStr, AttSrcAsm, IntelSrcAsm,
153 !if(MaskOnly, (null_frag), RHS),
154 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
155 Select, "", IsCommutable, IsKCommutable>;
157 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
158 // operand differs from the output VT. This requires a bitconvert on
159 // the preserved vector going into the vselect.
160 // NOTE: The unmasked pattern is disabled.
161 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
162 X86VectorVTInfo InVT,
163 dag Outs, dag NonTiedIns, string OpcodeStr,
164 string AttSrcAsm, string IntelSrcAsm,
165 dag RHS, bit IsCommutable = 0> :
166 AVX512_maskable_common<O, F, OutVT, Outs,
167 !con((ins InVT.RC:$src1), NonTiedIns),
168 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
169 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
170 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
171 (vselect_mask InVT.KRCWM:$mask, RHS,
172 (bitconvert InVT.RC:$src1)),
173 vselect_mask, "", IsCommutable>;
175 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
176 dag Outs, dag NonTiedIns, string OpcodeStr,
177 string AttSrcAsm, string IntelSrcAsm,
179 bit IsCommutable = 0,
180 bit IsKCommutable = 0,
182 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
183 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
184 X86selects_mask, MaskOnly>;
186 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
189 string AttSrcAsm, string IntelSrcAsm,
191 AVX512_maskable_custom<O, F, Outs, Ins,
192 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
193 !con((ins _.KRCWM:$mask), Ins),
194 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
197 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
198 dag Outs, dag NonTiedIns,
200 string AttSrcAsm, string IntelSrcAsm,
202 AVX512_maskable_custom<O, F, Outs,
203 !con((ins _.RC:$src1), NonTiedIns),
204 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
205 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
206 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
209 // Instruction with mask that puts result in mask register,
210 // like "compare" and "vptest"
211 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
213 dag Ins, dag MaskingIns,
215 string AttSrcAsm, string IntelSrcAsm,
217 list<dag> MaskingPattern,
218 bit IsCommutable = 0> {
219 let isCommutable = IsCommutable in {
220 def NAME: AVX512<O, F, Outs, Ins,
221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
222 "$dst, "#IntelSrcAsm#"}",
225 def NAME#k: AVX512<O, F, Outs, MaskingIns,
226 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
227 "$dst {${mask}}, "#IntelSrcAsm#"}",
228 MaskingPattern>, EVEX_K;
232 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
234 dag Ins, dag MaskingIns,
236 string AttSrcAsm, string IntelSrcAsm,
237 dag RHS, dag MaskingRHS,
238 bit IsCommutable = 0> :
239 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
240 AttSrcAsm, IntelSrcAsm,
241 [(set _.KRC:$dst, RHS)],
242 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
244 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
245 dag Outs, dag Ins, string OpcodeStr,
246 string AttSrcAsm, string IntelSrcAsm,
247 dag RHS, dag RHS_su, bit IsCommutable = 0> :
248 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
249 !con((ins _.KRCWM:$mask), Ins),
250 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
251 (and _.KRCWM:$mask, RHS_su), IsCommutable>;
253 // Used by conversion instructions.
254 multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
256 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
258 string AttSrcAsm, string IntelSrcAsm,
259 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261 AttSrcAsm, IntelSrcAsm,
262 [(set _.RC:$dst, RHS)],
263 [(set _.RC:$dst, MaskingRHS)],
264 [(set _.RC:$dst, ZeroMaskingRHS)],
267 multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
268 dag Outs, dag NonTiedIns, string OpcodeStr,
269 string AttSrcAsm, string IntelSrcAsm,
270 dag RHS, dag MaskingRHS, bit IsCommutable,
272 AVX512_maskable_custom<O, F, Outs,
273 !con((ins _.RC:$src1), NonTiedIns),
274 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
275 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
276 OpcodeStr, AttSrcAsm, IntelSrcAsm,
277 [(set _.RC:$dst, RHS)],
279 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
281 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
282 "", IsCommutable, IsKCommutable>;
284 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
285 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
286 // swizzled by ExecutionDomainFix to pxor.
287 // We set canFoldAsLoad because this can be converted to a constant-pool
288 // load of an all-zeros value if folding it would be beneficial.
289 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
290 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
291 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
292 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
293 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
294 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
297 let Predicates = [HasAVX512] in {
298 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
299 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
300 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
301 def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
302 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
303 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
306 // Alias instructions that allow VPTERNLOG to be used with a mask to create
307 // a mix of all ones and all zeros elements. This is done this way to force
308 // the same register to be used as input for all three sources.
309 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
310 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
311 (ins VK16WM:$mask), "",
312 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
313 (v16i32 immAllOnesV),
314 (v16i32 immAllZerosV)))]>;
315 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
316 (ins VK8WM:$mask), "",
317 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
319 (v8i64 immAllZerosV)))]>;
322 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
323 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
324 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
325 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
326 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
327 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
330 let Predicates = [HasAVX512] in {
331 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
332 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
333 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
334 def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
335 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
336 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
337 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
338 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
339 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
340 def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
341 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
342 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
345 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
346 // This is expanded by ExpandPostRAPseudos.
347 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
348 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
349 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
350 [(set FR16X:$dst, fp16imm0)]>;
351 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
352 [(set FR32X:$dst, fp32imm0)]>;
353 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
354 [(set FR64X:$dst, fp64imm0)]>;
355 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
356 [(set VR128X:$dst, fp128imm0)]>;
359 //===----------------------------------------------------------------------===//
360 // AVX-512 - VECTOR INSERT
363 // Supports two different pattern operators for mask and unmasked ops. Allows
364 // null_frag to be passed for one.
365 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
367 SDPatternOperator vinsert_insert,
368 SDPatternOperator vinsert_for_mask,
369 X86FoldableSchedWrite sched> {
370 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
371 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
372 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
373 "vinsert" # From.EltTypeName # "x" # From.NumElts,
374 "$src3, $src2, $src1", "$src1, $src2, $src3",
375 (vinsert_insert:$src3 (To.VT To.RC:$src1),
376 (From.VT From.RC:$src2),
378 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
379 (From.VT From.RC:$src2),
381 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
383 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
384 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
385 "vinsert" # From.EltTypeName # "x" # From.NumElts,
386 "$src3, $src2, $src1", "$src1, $src2, $src3",
387 (vinsert_insert:$src3 (To.VT To.RC:$src1),
388 (From.VT (From.LdFrag addr:$src2)),
390 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
391 (From.VT (From.LdFrag addr:$src2)),
392 (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV,
393 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
394 Sched<[sched.Folded, sched.ReadAfterFold]>;
398 // Passes the same pattern operator for masked and unmasked ops.
399 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
401 SDPatternOperator vinsert_insert,
402 X86FoldableSchedWrite sched> :
403 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
405 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
406 X86VectorVTInfo To, PatFrag vinsert_insert,
407 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
408 let Predicates = p in {
409 def : Pat<(vinsert_insert:$ins
410 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
411 (To.VT (!cast<Instruction>(InstrStr#"rr")
412 To.RC:$src1, From.RC:$src2,
413 (INSERT_get_vinsert_imm To.RC:$ins)))>;
415 def : Pat<(vinsert_insert:$ins
417 (From.VT (From.LdFrag addr:$src2)),
419 (To.VT (!cast<Instruction>(InstrStr#"rm")
420 To.RC:$src1, addr:$src2,
421 (INSERT_get_vinsert_imm To.RC:$ins)))>;
425 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
426 ValueType EltVT64, int Opcode256,
427 X86FoldableSchedWrite sched> {
429 let Predicates = [HasVLX] in
430 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
431 X86VectorVTInfo< 4, EltVT32, VR128X>,
432 X86VectorVTInfo< 8, EltVT32, VR256X>,
433 vinsert128_insert, sched>, EVEX_V256;
435 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
436 X86VectorVTInfo< 4, EltVT32, VR128X>,
437 X86VectorVTInfo<16, EltVT32, VR512>,
438 vinsert128_insert, sched>, EVEX_V512;
440 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
441 X86VectorVTInfo< 4, EltVT64, VR256X>,
442 X86VectorVTInfo< 8, EltVT64, VR512>,
443 vinsert256_insert, sched>, REX_W, EVEX_V512;
445 // Even with DQI we'd like to only use these instructions for masking.
446 let Predicates = [HasVLX, HasDQI] in
447 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
448 X86VectorVTInfo< 2, EltVT64, VR128X>,
449 X86VectorVTInfo< 4, EltVT64, VR256X>,
450 null_frag, vinsert128_insert, sched>,
453 // Even with DQI we'd like to only use these instructions for masking.
454 let Predicates = [HasDQI] in {
455 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
456 X86VectorVTInfo< 2, EltVT64, VR128X>,
457 X86VectorVTInfo< 8, EltVT64, VR512>,
458 null_frag, vinsert128_insert, sched>,
461 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
462 X86VectorVTInfo< 8, EltVT32, VR256X>,
463 X86VectorVTInfo<16, EltVT32, VR512>,
464 null_frag, vinsert256_insert, sched>,
469 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
470 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
471 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
473 // Codegen pattern with the alternative types,
474 // Even with AVX512DQ we'll still use these for unmasked operations.
475 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
476 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
477 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
478 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
480 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
481 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
482 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
483 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
485 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
486 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
487 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
488 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
490 // Codegen pattern with the alternative types insert VEC128 into VEC256
491 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
492 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
493 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
494 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
495 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
496 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
497 // Codegen pattern with the alternative types insert VEC128 into VEC512
498 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
499 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
500 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
501 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
502 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
503 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
504 // Codegen pattern with the alternative types insert VEC256 into VEC512
505 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
506 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
507 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
508 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
509 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
510 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
513 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
514 X86VectorVTInfo To, X86VectorVTInfo Cast,
515 PatFrag vinsert_insert,
516 SDNodeXForm INSERT_get_vinsert_imm,
518 let Predicates = p in {
520 (vselect_mask Cast.KRCWM:$mask,
522 (vinsert_insert:$ins (To.VT To.RC:$src1),
523 (From.VT From.RC:$src2),
526 (!cast<Instruction>(InstrStr#"rrk")
527 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
528 (INSERT_get_vinsert_imm To.RC:$ins))>;
530 (vselect_mask Cast.KRCWM:$mask,
532 (vinsert_insert:$ins (To.VT To.RC:$src1),
535 (From.LdFrag addr:$src2))),
538 (!cast<Instruction>(InstrStr#"rmk")
539 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
540 (INSERT_get_vinsert_imm To.RC:$ins))>;
543 (vselect_mask Cast.KRCWM:$mask,
545 (vinsert_insert:$ins (To.VT To.RC:$src1),
546 (From.VT From.RC:$src2),
549 (!cast<Instruction>(InstrStr#"rrkz")
550 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
551 (INSERT_get_vinsert_imm To.RC:$ins))>;
553 (vselect_mask Cast.KRCWM:$mask,
555 (vinsert_insert:$ins (To.VT To.RC:$src1),
556 (From.VT (From.LdFrag addr:$src2)),
559 (!cast<Instruction>(InstrStr#"rmkz")
560 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
561 (INSERT_get_vinsert_imm To.RC:$ins))>;
565 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
566 v8f32x_info, vinsert128_insert,
567 INSERT_get_vinsert128_imm, [HasVLX]>;
568 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
569 v4f64x_info, vinsert128_insert,
570 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
572 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
573 v8i32x_info, vinsert128_insert,
574 INSERT_get_vinsert128_imm, [HasVLX]>;
575 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
576 v8i32x_info, vinsert128_insert,
577 INSERT_get_vinsert128_imm, [HasVLX]>;
578 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
579 v8i32x_info, vinsert128_insert,
580 INSERT_get_vinsert128_imm, [HasVLX]>;
581 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
582 v4i64x_info, vinsert128_insert,
583 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
584 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
585 v4i64x_info, vinsert128_insert,
586 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
587 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
588 v4i64x_info, vinsert128_insert,
589 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
591 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
592 v16f32_info, vinsert128_insert,
593 INSERT_get_vinsert128_imm, [HasAVX512]>;
594 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
595 v8f64_info, vinsert128_insert,
596 INSERT_get_vinsert128_imm, [HasDQI]>;
598 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
599 v16i32_info, vinsert128_insert,
600 INSERT_get_vinsert128_imm, [HasAVX512]>;
601 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
602 v16i32_info, vinsert128_insert,
603 INSERT_get_vinsert128_imm, [HasAVX512]>;
604 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
605 v16i32_info, vinsert128_insert,
606 INSERT_get_vinsert128_imm, [HasAVX512]>;
607 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
608 v8i64_info, vinsert128_insert,
609 INSERT_get_vinsert128_imm, [HasDQI]>;
610 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
611 v8i64_info, vinsert128_insert,
612 INSERT_get_vinsert128_imm, [HasDQI]>;
613 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
614 v8i64_info, vinsert128_insert,
615 INSERT_get_vinsert128_imm, [HasDQI]>;
617 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
618 v16f32_info, vinsert256_insert,
619 INSERT_get_vinsert256_imm, [HasDQI]>;
620 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
621 v8f64_info, vinsert256_insert,
622 INSERT_get_vinsert256_imm, [HasAVX512]>;
624 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
625 v16i32_info, vinsert256_insert,
626 INSERT_get_vinsert256_imm, [HasDQI]>;
627 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
628 v16i32_info, vinsert256_insert,
629 INSERT_get_vinsert256_imm, [HasDQI]>;
630 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
631 v16i32_info, vinsert256_insert,
632 INSERT_get_vinsert256_imm, [HasDQI]>;
633 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
634 v8i64_info, vinsert256_insert,
635 INSERT_get_vinsert256_imm, [HasAVX512]>;
636 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
637 v8i64_info, vinsert256_insert,
638 INSERT_get_vinsert256_imm, [HasAVX512]>;
639 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
640 v8i64_info, vinsert256_insert,
641 INSERT_get_vinsert256_imm, [HasAVX512]>;
643 // vinsertps - insert f32 to XMM
644 let ExeDomain = SSEPackedSingle in {
645 let isCommutable = 1 in
646 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
647 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
648 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
649 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
650 EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
651 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
652 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
653 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
654 [(set VR128X:$dst, (X86insertps VR128X:$src1,
655 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
657 EVEX, VVVV, EVEX_CD8<32, CD8VT1>,
658 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
661 //===----------------------------------------------------------------------===//
662 // AVX-512 VECTOR EXTRACT
665 // Supports two different pattern operators for mask and unmasked ops. Allows
666 // null_frag to be passed for one.
667 multiclass vextract_for_size_split<int Opcode,
668 X86VectorVTInfo From, X86VectorVTInfo To,
669 SDPatternOperator vextract_extract,
670 SDPatternOperator vextract_for_mask,
671 SchedWrite SchedRR, SchedWrite SchedMR> {
673 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
674 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
675 (ins From.RC:$src1, u8imm:$idx),
676 "vextract" # To.EltTypeName # "x" # To.NumElts,
677 "$idx, $src1", "$src1, $idx",
678 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
679 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
680 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
682 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
683 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
684 "vextract" # To.EltTypeName # "x" # To.NumElts #
685 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
686 [(store (To.VT (vextract_extract:$idx
687 (From.VT From.RC:$src1), (iPTR imm))),
691 let mayStore = 1, hasSideEffects = 0 in
692 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
693 (ins To.MemOp:$dst, To.KRCWM:$mask,
694 From.RC:$src1, u8imm:$idx),
695 "vextract" # To.EltTypeName # "x" # To.NumElts #
696 "\t{$idx, $src1, $dst {${mask}}|"
697 "$dst {${mask}}, $src1, $idx}", []>,
698 EVEX_K, EVEX, Sched<[SchedMR]>;
702 // Passes the same pattern operator for masked and unmasked ops.
703 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
705 SDPatternOperator vextract_extract,
706 SchedWrite SchedRR, SchedWrite SchedMR> :
707 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
709 // Codegen pattern for the alternative types
710 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
711 X86VectorVTInfo To, PatFrag vextract_extract,
712 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
713 let Predicates = p in {
714 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
715 (To.VT (!cast<Instruction>(InstrStr#"rr")
717 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
718 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
719 (iPTR imm))), addr:$dst),
720 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
721 (EXTRACT_get_vextract_imm To.RC:$ext))>;
725 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
726 ValueType EltVT64, int Opcode256,
727 SchedWrite SchedRR, SchedWrite SchedMR> {
728 let Predicates = [HasAVX512] in {
729 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
730 X86VectorVTInfo<16, EltVT32, VR512>,
731 X86VectorVTInfo< 4, EltVT32, VR128X>,
732 vextract128_extract, SchedRR, SchedMR>,
733 EVEX_V512, EVEX_CD8<32, CD8VT4>;
734 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
735 X86VectorVTInfo< 8, EltVT64, VR512>,
736 X86VectorVTInfo< 4, EltVT64, VR256X>,
737 vextract256_extract, SchedRR, SchedMR>,
738 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
740 let Predicates = [HasVLX] in
741 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
742 X86VectorVTInfo< 8, EltVT32, VR256X>,
743 X86VectorVTInfo< 4, EltVT32, VR128X>,
744 vextract128_extract, SchedRR, SchedMR>,
745 EVEX_V256, EVEX_CD8<32, CD8VT4>;
747 // Even with DQI we'd like to only use these instructions for masking.
748 let Predicates = [HasVLX, HasDQI] in
749 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
750 X86VectorVTInfo< 4, EltVT64, VR256X>,
751 X86VectorVTInfo< 2, EltVT64, VR128X>,
752 null_frag, vextract128_extract, SchedRR, SchedMR>,
753 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
755 // Even with DQI we'd like to only use these instructions for masking.
756 let Predicates = [HasDQI] in {
757 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
758 X86VectorVTInfo< 8, EltVT64, VR512>,
759 X86VectorVTInfo< 2, EltVT64, VR128X>,
760 null_frag, vextract128_extract, SchedRR, SchedMR>,
761 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
762 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
763 X86VectorVTInfo<16, EltVT32, VR512>,
764 X86VectorVTInfo< 8, EltVT32, VR256X>,
765 null_frag, vextract256_extract, SchedRR, SchedMR>,
766 EVEX_V512, EVEX_CD8<32, CD8VT8>;
770 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
771 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
772 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
774 // extract_subvector codegen patterns with the alternative types.
775 // Even with AVX512DQ we'll still use these for unmasked operations.
776 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
777 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
778 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
779 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
781 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
782 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
783 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
784 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
786 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
787 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
788 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
789 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
791 // Codegen pattern with the alternative types extract VEC128 from VEC256
792 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
793 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
794 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
795 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
796 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
797 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
799 // Codegen pattern with the alternative types extract VEC128 from VEC512
800 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
801 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
802 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
803 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
804 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
805 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
806 // Codegen pattern with the alternative types extract VEC256 from VEC512
807 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
808 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
809 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
810 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
811 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
812 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
815 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
816 // smaller extract to enable EVEX->VEX.
817 let Predicates = [NoVLX] in {
818 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
819 (v2i64 (VEXTRACTI128rr
820 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
822 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
823 (v2f64 (VEXTRACTF128rr
824 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
826 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
827 (v4i32 (VEXTRACTI128rr
828 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
830 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
831 (v4f32 (VEXTRACTF128rr
832 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
834 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
835 (v8i16 (VEXTRACTI128rr
836 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
838 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
839 (v8f16 (VEXTRACTF128rr
840 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
842 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
843 (v16i8 (VEXTRACTI128rr
844 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
848 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
849 // smaller extract to enable EVEX->VEX.
850 let Predicates = [HasVLX] in {
851 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
852 (v2i64 (VEXTRACTI32x4Z256rr
853 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
855 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
856 (v2f64 (VEXTRACTF32x4Z256rr
857 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
859 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
860 (v4i32 (VEXTRACTI32x4Z256rr
861 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
863 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
864 (v4f32 (VEXTRACTF32x4Z256rr
865 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
867 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
868 (v8i16 (VEXTRACTI32x4Z256rr
869 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
871 def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
872 (v8f16 (VEXTRACTF32x4Z256rr
873 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
875 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
876 (v16i8 (VEXTRACTI32x4Z256rr
877 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
882 // Additional patterns for handling a bitcast between the vselect and the
883 // extract_subvector.
884 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
885 X86VectorVTInfo To, X86VectorVTInfo Cast,
886 PatFrag vextract_extract,
887 SDNodeXForm EXTRACT_get_vextract_imm,
889 let Predicates = p in {
890 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
892 (To.VT (vextract_extract:$ext
893 (From.VT From.RC:$src), (iPTR imm)))),
895 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
896 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
897 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
899 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
901 (To.VT (vextract_extract:$ext
902 (From.VT From.RC:$src), (iPTR imm)))),
904 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
905 Cast.KRCWM:$mask, From.RC:$src,
906 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
910 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
911 v4f32x_info, vextract128_extract,
912 EXTRACT_get_vextract128_imm, [HasVLX]>;
913 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
914 v2f64x_info, vextract128_extract,
915 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
917 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
918 v4i32x_info, vextract128_extract,
919 EXTRACT_get_vextract128_imm, [HasVLX]>;
920 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
921 v4i32x_info, vextract128_extract,
922 EXTRACT_get_vextract128_imm, [HasVLX]>;
923 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
924 v4i32x_info, vextract128_extract,
925 EXTRACT_get_vextract128_imm, [HasVLX]>;
926 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
927 v2i64x_info, vextract128_extract,
928 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
929 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
930 v2i64x_info, vextract128_extract,
931 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
932 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
933 v2i64x_info, vextract128_extract,
934 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
936 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
937 v4f32x_info, vextract128_extract,
938 EXTRACT_get_vextract128_imm, [HasAVX512]>;
939 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
940 v2f64x_info, vextract128_extract,
941 EXTRACT_get_vextract128_imm, [HasDQI]>;
943 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
944 v4i32x_info, vextract128_extract,
945 EXTRACT_get_vextract128_imm, [HasAVX512]>;
946 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
947 v4i32x_info, vextract128_extract,
948 EXTRACT_get_vextract128_imm, [HasAVX512]>;
949 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
950 v4i32x_info, vextract128_extract,
951 EXTRACT_get_vextract128_imm, [HasAVX512]>;
952 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
953 v2i64x_info, vextract128_extract,
954 EXTRACT_get_vextract128_imm, [HasDQI]>;
955 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
956 v2i64x_info, vextract128_extract,
957 EXTRACT_get_vextract128_imm, [HasDQI]>;
958 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
959 v2i64x_info, vextract128_extract,
960 EXTRACT_get_vextract128_imm, [HasDQI]>;
962 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
963 v8f32x_info, vextract256_extract,
964 EXTRACT_get_vextract256_imm, [HasDQI]>;
965 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
966 v4f64x_info, vextract256_extract,
967 EXTRACT_get_vextract256_imm, [HasAVX512]>;
969 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
970 v8i32x_info, vextract256_extract,
971 EXTRACT_get_vextract256_imm, [HasDQI]>;
972 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
973 v8i32x_info, vextract256_extract,
974 EXTRACT_get_vextract256_imm, [HasDQI]>;
975 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
976 v8i32x_info, vextract256_extract,
977 EXTRACT_get_vextract256_imm, [HasDQI]>;
978 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
979 v4i64x_info, vextract256_extract,
980 EXTRACT_get_vextract256_imm, [HasAVX512]>;
981 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
982 v4i64x_info, vextract256_extract,
983 EXTRACT_get_vextract256_imm, [HasAVX512]>;
984 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
985 v4i64x_info, vextract256_extract,
986 EXTRACT_get_vextract256_imm, [HasAVX512]>;
988 // vextractps - extract 32 bits from XMM
989 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
990 (ins VR128X:$src1, u8imm:$src2),
991 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
992 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
993 EVEX, WIG, Sched<[WriteVecExtract]>;
995 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
996 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
997 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
998 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1000 EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1002 //===---------------------------------------------------------------------===//
1003 // AVX-512 BROADCAST
1005 // broadcast with a scalar argument.
1006 multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1007 X86VectorVTInfo SrcInfo> {
1008 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1009 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1010 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1011 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1012 (X86VBroadcast SrcInfo.FRC:$src),
1013 DestInfo.RC:$src0)),
1014 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1015 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1016 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1017 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1018 (X86VBroadcast SrcInfo.FRC:$src),
1019 DestInfo.ImmAllZerosV)),
1020 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1021 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1024 // Split version to allow mask and broadcast node to be different types. This
1025 // helps support the 32x2 broadcasts.
1026 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1027 SchedWrite SchedRR, SchedWrite SchedRM,
1028 X86VectorVTInfo MaskInfo,
1029 X86VectorVTInfo DestInfo,
1030 X86VectorVTInfo SrcInfo,
1031 bit IsConvertibleToThreeAddress,
1032 SDPatternOperator UnmaskedOp = X86VBroadcast,
1033 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1034 let hasSideEffects = 0 in
1035 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1036 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1037 [(set MaskInfo.RC:$dst,
1041 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1042 DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>;
1043 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1044 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1045 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1046 "${dst} {${mask}} {z}, $src}"),
1047 [(set MaskInfo.RC:$dst,
1048 (vselect_mask MaskInfo.KRCWM:$mask,
1052 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1053 MaskInfo.ImmAllZerosV))],
1054 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1055 let Constraints = "$src0 = $dst" in
1056 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1057 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1059 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1060 "${dst} {${mask}}, $src}"),
1061 [(set MaskInfo.RC:$dst,
1062 (vselect_mask MaskInfo.KRCWM:$mask,
1066 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1067 MaskInfo.RC:$src0))],
1068 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1070 let hasSideEffects = 0, mayLoad = 1 in
1071 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1072 (ins SrcInfo.ScalarMemOp:$src),
1073 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1074 [(set MaskInfo.RC:$dst,
1078 (UnmaskedBcastOp addr:$src)))))],
1079 DestInfo.ExeDomain>, T8, PD, EVEX,
1080 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1082 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1083 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1084 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1085 "${dst} {${mask}} {z}, $src}"),
1086 [(set MaskInfo.RC:$dst,
1087 (vselect_mask MaskInfo.KRCWM:$mask,
1091 (SrcInfo.BroadcastLdFrag addr:$src)))),
1092 MaskInfo.ImmAllZerosV))],
1093 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ,
1094 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1096 let Constraints = "$src0 = $dst",
1097 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1098 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1099 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1100 SrcInfo.ScalarMemOp:$src),
1101 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1102 "${dst} {${mask}}, $src}"),
1103 [(set MaskInfo.RC:$dst,
1104 (vselect_mask MaskInfo.KRCWM:$mask,
1108 (SrcInfo.BroadcastLdFrag addr:$src)))),
1109 MaskInfo.RC:$src0))],
1110 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K,
1111 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1114 // Helper class to force mask and broadcast result to same type.
1115 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1116 SchedWrite SchedRR, SchedWrite SchedRM,
1117 X86VectorVTInfo DestInfo,
1118 X86VectorVTInfo SrcInfo,
1119 bit IsConvertibleToThreeAddress> :
1120 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1121 DestInfo, DestInfo, SrcInfo,
1122 IsConvertibleToThreeAddress>;
1124 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1125 AVX512VLVectorVTInfo _> {
1126 let Predicates = [HasAVX512] in {
1127 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1128 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1129 avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1133 let Predicates = [HasVLX] in {
1134 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1135 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1136 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1141 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1142 AVX512VLVectorVTInfo _> {
1143 let Predicates = [HasAVX512] in {
1144 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1145 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1146 avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1150 let Predicates = [HasVLX] in {
1151 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1152 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1153 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1155 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1156 WriteFShuffle256Ld, _.info128, _.info128, 1>,
1157 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1161 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1163 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1164 avx512vl_f64_info>, REX_W;
1166 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1167 X86VectorVTInfo _, SDPatternOperator OpNode,
1168 RegisterClass SrcRC> {
1169 // Fold with a mask even if it has multiple uses since it is cheap.
1170 let ExeDomain = _.ExeDomain in
1171 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1173 "vpbroadcast"#_.Suffix, "$src", "$src",
1174 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1175 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1176 T8, PD, EVEX, Sched<[SchedRR]>;
1179 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1180 X86VectorVTInfo _, SDPatternOperator OpNode,
1181 RegisterClass SrcRC, SubRegIndex Subreg> {
1182 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1183 defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1184 (outs _.RC:$dst), (ins GR32:$src),
1185 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1186 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1187 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1188 "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>;
1190 def : Pat <(_.VT (OpNode SrcRC:$src)),
1191 (!cast<Instruction>(Name#rr)
1192 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1194 // Fold with a mask even if it has multiple uses since it is cheap.
1195 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1196 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1197 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1199 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1200 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1201 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1204 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1205 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1206 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1207 let Predicates = [prd] in
1208 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1209 OpNode, SrcRC, Subreg>, EVEX_V512;
1210 let Predicates = [prd, HasVLX] in {
1211 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1212 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1213 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1214 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1218 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1219 SDPatternOperator OpNode,
1220 RegisterClass SrcRC, Predicate prd> {
1221 let Predicates = [prd] in
1222 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1224 let Predicates = [prd, HasVLX] in {
1225 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1227 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1232 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1233 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1234 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1235 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1237 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1238 X86VBroadcast, GR32, HasAVX512>;
1239 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1240 X86VBroadcast, GR64, HasAVX512>, REX_W;
1242 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1243 AVX512VLVectorVTInfo _, Predicate prd,
1244 bit IsConvertibleToThreeAddress> {
1245 let Predicates = [prd] in {
1246 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1247 WriteShuffle256Ld, _.info512, _.info128,
1248 IsConvertibleToThreeAddress>,
1251 let Predicates = [prd, HasVLX] in {
1252 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1253 WriteShuffle256Ld, _.info256, _.info128,
1254 IsConvertibleToThreeAddress>,
1256 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1257 WriteShuffleXLd, _.info128, _.info128,
1258 IsConvertibleToThreeAddress>,
1263 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1264 avx512vl_i8_info, HasBWI, 0>;
1265 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1266 avx512vl_i16_info, HasBWI, 0>;
1267 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1268 avx512vl_i32_info, HasAVX512, 1>;
1269 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1270 avx512vl_i64_info, HasAVX512, 1>, REX_W;
1272 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1273 SDPatternOperator OpNode,
1274 X86VectorVTInfo _Dst,
1275 X86VectorVTInfo _Src> {
1276 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1277 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1278 (_Dst.VT (OpNode addr:$src))>,
1279 Sched<[SchedWriteShuffle.YMM.Folded]>,
1283 // This should be used for the AVX512DQ broadcast instructions. It disables
1284 // the unmasked patterns so that we only use the DQ instructions when masking
1286 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1287 SDPatternOperator OpNode,
1288 X86VectorVTInfo _Dst,
1289 X86VectorVTInfo _Src> {
1290 let hasSideEffects = 0, mayLoad = 1 in
1291 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1292 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1294 (_Dst.VT (OpNode addr:$src))>,
1295 Sched<[SchedWriteShuffle.YMM.Folded]>,
1298 let Predicates = [HasBWI] in {
1299 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1300 (VPBROADCASTWZrm addr:$src)>;
1302 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1303 (VPBROADCASTWZrr VR128X:$src)>;
1304 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1305 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1307 let Predicates = [HasVLX, HasBWI] in {
1308 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1309 (VPBROADCASTWZ128rm addr:$src)>;
1310 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1311 (VPBROADCASTWZ256rm addr:$src)>;
1313 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1314 (VPBROADCASTWZ128rr VR128X:$src)>;
1315 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1316 (VPBROADCASTWZ256rr VR128X:$src)>;
1318 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1319 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1320 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1321 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1324 //===----------------------------------------------------------------------===//
1325 // AVX-512 BROADCAST SUBVECTORS
1328 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1329 X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1330 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1331 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1332 X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1333 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1334 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1335 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
1336 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1337 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1338 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
1339 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1341 let Predicates = [HasAVX512] in {
1342 def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1343 (VBROADCASTF64X4rm addr:$src)>;
1344 def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1345 (VBROADCASTF64X4rm addr:$src)>;
1346 def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1347 (VBROADCASTF64X4rm addr:$src)>;
1348 def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1349 (VBROADCASTI64X4rm addr:$src)>;
1350 def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1351 (VBROADCASTI64X4rm addr:$src)>;
1352 def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1353 (VBROADCASTI64X4rm addr:$src)>;
1354 def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1355 (VBROADCASTI64X4rm addr:$src)>;
1357 def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1358 (VBROADCASTF32X4rm addr:$src)>;
1359 def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1360 (VBROADCASTF32X4rm addr:$src)>;
1361 def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1362 (VBROADCASTF32X4rm addr:$src)>;
1363 def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1364 (VBROADCASTI32X4rm addr:$src)>;
1365 def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1366 (VBROADCASTI32X4rm addr:$src)>;
1367 def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1368 (VBROADCASTI32X4rm addr:$src)>;
1369 def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1370 (VBROADCASTI32X4rm addr:$src)>;
1372 // Patterns for selects of bitcasted operations.
1373 def : Pat<(vselect_mask VK16WM:$mask,
1374 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1375 (v16f32 immAllZerosV)),
1376 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1377 def : Pat<(vselect_mask VK16WM:$mask,
1378 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1380 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1381 def : Pat<(vselect_mask VK16WM:$mask,
1382 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1383 (v16i32 immAllZerosV)),
1384 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1385 def : Pat<(vselect_mask VK16WM:$mask,
1386 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1388 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1390 def : Pat<(vselect_mask VK8WM:$mask,
1391 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1392 (v8f64 immAllZerosV)),
1393 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1394 def : Pat<(vselect_mask VK8WM:$mask,
1395 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1397 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1398 def : Pat<(vselect_mask VK8WM:$mask,
1399 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1400 (v8i64 immAllZerosV)),
1401 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1402 def : Pat<(vselect_mask VK8WM:$mask,
1403 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1405 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1408 let Predicates = [HasVLX] in {
1409 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1410 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1411 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1412 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1413 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1414 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1416 def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1417 (VBROADCASTF32X4Z256rm addr:$src)>;
1418 def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1419 (VBROADCASTF32X4Z256rm addr:$src)>;
1420 def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1421 (VBROADCASTF32X4Z256rm addr:$src)>;
1422 def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1423 (VBROADCASTI32X4Z256rm addr:$src)>;
1424 def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1425 (VBROADCASTI32X4Z256rm addr:$src)>;
1426 def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1427 (VBROADCASTI32X4Z256rm addr:$src)>;
1428 def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1429 (VBROADCASTI32X4Z256rm addr:$src)>;
1431 // Patterns for selects of bitcasted operations.
1432 def : Pat<(vselect_mask VK8WM:$mask,
1433 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1434 (v8f32 immAllZerosV)),
1435 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1436 def : Pat<(vselect_mask VK8WM:$mask,
1437 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1439 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1440 def : Pat<(vselect_mask VK8WM:$mask,
1441 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1442 (v8i32 immAllZerosV)),
1443 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1444 def : Pat<(vselect_mask VK8WM:$mask,
1445 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1447 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1450 let Predicates = [HasBF16] in {
1451 def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
1452 (VBROADCASTF64X4rm addr:$src)>;
1453 def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
1454 (VBROADCASTF32X4rm addr:$src)>;
1457 let Predicates = [HasBF16, HasVLX] in
1458 def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
1459 (VBROADCASTF32X4Z256rm addr:$src)>;
1461 let Predicates = [HasVLX, HasDQI] in {
1462 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1463 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>,
1464 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1465 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1466 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>,
1467 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1469 // Patterns for selects of bitcasted operations.
1470 def : Pat<(vselect_mask VK4WM:$mask,
1471 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1472 (v4f64 immAllZerosV)),
1473 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1474 def : Pat<(vselect_mask VK4WM:$mask,
1475 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1477 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1478 def : Pat<(vselect_mask VK4WM:$mask,
1479 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1480 (v4i64 immAllZerosV)),
1481 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1482 def : Pat<(vselect_mask VK4WM:$mask,
1483 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1485 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1488 let Predicates = [HasDQI] in {
1489 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1490 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
1491 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1492 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1493 X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1494 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1495 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1496 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
1497 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1498 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1499 X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1500 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1502 // Patterns for selects of bitcasted operations.
1503 def : Pat<(vselect_mask VK16WM:$mask,
1504 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1505 (v16f32 immAllZerosV)),
1506 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1507 def : Pat<(vselect_mask VK16WM:$mask,
1508 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1510 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1511 def : Pat<(vselect_mask VK16WM:$mask,
1512 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1513 (v16i32 immAllZerosV)),
1514 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1515 def : Pat<(vselect_mask VK16WM:$mask,
1516 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1518 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1520 def : Pat<(vselect_mask VK8WM:$mask,
1521 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1522 (v8f64 immAllZerosV)),
1523 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1524 def : Pat<(vselect_mask VK8WM:$mask,
1525 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1527 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1528 def : Pat<(vselect_mask VK8WM:$mask,
1529 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1530 (v8i64 immAllZerosV)),
1531 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1532 def : Pat<(vselect_mask VK8WM:$mask,
1533 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1535 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1538 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1539 AVX512VLVectorVTInfo _Dst,
1540 AVX512VLVectorVTInfo _Src> {
1541 let Predicates = [HasDQI] in
1542 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1543 WriteShuffle256Ld, _Dst.info512,
1544 _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1546 let Predicates = [HasDQI, HasVLX] in
1547 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1548 WriteShuffle256Ld, _Dst.info256,
1549 _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1553 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1554 AVX512VLVectorVTInfo _Dst,
1555 AVX512VLVectorVTInfo _Src> :
1556 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1558 let Predicates = [HasDQI, HasVLX] in
1559 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1560 WriteShuffleXLd, _Dst.info128,
1561 _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1565 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1566 avx512vl_i32_info, avx512vl_i64_info>;
1567 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1568 avx512vl_f32_info, avx512vl_f64_info>;
1570 //===----------------------------------------------------------------------===//
1571 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1573 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1574 X86VectorVTInfo _, RegisterClass KRC> {
1575 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1576 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1577 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1578 EVEX, Sched<[WriteShuffle]>;
1581 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1582 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1583 let Predicates = [HasCDI] in
1584 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1585 let Predicates = [HasCDI, HasVLX] in {
1586 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1587 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1591 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1592 avx512vl_i32_info, VK16>;
1593 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1594 avx512vl_i64_info, VK8>, REX_W;
1596 //===----------------------------------------------------------------------===//
1597 // -- VPERMI2 - 3 source operands form --
1598 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1599 X86FoldableSchedWrite sched,
1600 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1601 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1602 hasSideEffects = 0 in {
1603 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1604 (ins _.RC:$src2, _.RC:$src3),
1605 OpcodeStr, "$src3, $src2", "$src2, $src3",
1606 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1607 EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1610 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1611 (ins _.RC:$src2, _.MemOp:$src3),
1612 OpcodeStr, "$src3, $src2", "$src2, $src3",
1613 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1614 (_.VT (_.LdFrag addr:$src3)))), 1>,
1615 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1619 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1620 X86FoldableSchedWrite sched,
1621 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1622 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1623 hasSideEffects = 0, mayLoad = 1 in
1624 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1625 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1626 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1627 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1628 (_.VT (X86VPermt2 _.RC:$src2,
1629 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1630 AVX5128IBase, EVEX, VVVV, EVEX_B,
1631 Sched<[sched.Folded, sched.ReadAfterFold]>;
1634 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1635 X86FoldableSchedWrite sched,
1636 AVX512VLVectorVTInfo VTInfo,
1637 AVX512VLVectorVTInfo ShuffleMask> {
1638 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1639 ShuffleMask.info512>,
1640 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1641 ShuffleMask.info512>, EVEX_V512;
1642 let Predicates = [HasVLX] in {
1643 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1644 ShuffleMask.info128>,
1645 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1646 ShuffleMask.info128>, EVEX_V128;
1647 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1648 ShuffleMask.info256>,
1649 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1650 ShuffleMask.info256>, EVEX_V256;
1654 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1655 X86FoldableSchedWrite sched,
1656 AVX512VLVectorVTInfo VTInfo,
1657 AVX512VLVectorVTInfo Idx,
1659 let Predicates = [Prd] in
1660 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1661 Idx.info512>, EVEX_V512;
1662 let Predicates = [Prd, HasVLX] in {
1663 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1664 Idx.info128>, EVEX_V128;
1665 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1666 Idx.info256>, EVEX_V256;
1670 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1671 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1672 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1673 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1674 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1675 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1676 REX_W, EVEX_CD8<16, CD8VF>;
1677 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1678 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1680 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1681 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1682 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1683 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1685 // Extra patterns to deal with extra bitcasts due to passthru and index being
1686 // different types on the fp versions.
1687 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1688 X86VectorVTInfo IdxVT,
1689 X86VectorVTInfo CastVT> {
1690 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1691 (X86VPermt2 (_.VT _.RC:$src2),
1692 (IdxVT.VT (bitconvert
1693 (CastVT.VT _.RC:$src1))),
1695 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1696 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1697 _.RC:$src2, _.RC:$src3)>;
1698 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1699 (X86VPermt2 _.RC:$src2,
1700 (IdxVT.VT (bitconvert
1701 (CastVT.VT _.RC:$src1))),
1702 (_.LdFrag addr:$src3)),
1703 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1704 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1705 _.RC:$src2, addr:$src3)>;
1706 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1707 (X86VPermt2 _.RC:$src2,
1708 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1709 (_.BroadcastLdFrag addr:$src3)),
1710 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1711 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1712 _.RC:$src2, addr:$src3)>;
1715 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1716 defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>;
1717 defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>;
1718 defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>;
1721 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1722 X86FoldableSchedWrite sched,
1723 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1724 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1725 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1726 (ins IdxVT.RC:$src2, _.RC:$src3),
1727 OpcodeStr, "$src3, $src2", "$src2, $src3",
1728 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1729 EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1731 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1732 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1733 OpcodeStr, "$src3, $src2", "$src2, $src3",
1734 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1735 (_.LdFrag addr:$src3))), 1>,
1736 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1739 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1740 X86FoldableSchedWrite sched,
1741 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1742 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1743 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1744 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1745 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1746 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1747 (_.VT (X86VPermt2 _.RC:$src1,
1748 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1749 AVX5128IBase, EVEX, VVVV, EVEX_B,
1750 Sched<[sched.Folded, sched.ReadAfterFold]>;
1753 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1754 X86FoldableSchedWrite sched,
1755 AVX512VLVectorVTInfo VTInfo,
1756 AVX512VLVectorVTInfo ShuffleMask> {
1757 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1758 ShuffleMask.info512>,
1759 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1760 ShuffleMask.info512>, EVEX_V512;
1761 let Predicates = [HasVLX] in {
1762 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1763 ShuffleMask.info128>,
1764 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1765 ShuffleMask.info128>, EVEX_V128;
1766 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1767 ShuffleMask.info256>,
1768 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1769 ShuffleMask.info256>, EVEX_V256;
1773 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1774 X86FoldableSchedWrite sched,
1775 AVX512VLVectorVTInfo VTInfo,
1776 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1777 let Predicates = [Prd] in
1778 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1779 Idx.info512>, EVEX_V512;
1780 let Predicates = [Prd, HasVLX] in {
1781 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1782 Idx.info128>, EVEX_V128;
1783 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1784 Idx.info256>, EVEX_V256;
1788 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1789 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1790 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1791 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1792 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1793 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1794 REX_W, EVEX_CD8<16, CD8VF>;
1795 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1796 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1798 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1799 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1800 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1801 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1803 //===----------------------------------------------------------------------===//
1804 // AVX-512 - BLEND using mask
1807 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1808 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1809 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1810 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1811 (ins _.RC:$src1, _.RC:$src2),
1812 !strconcat(OpcodeStr,
1813 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1814 EVEX, VVVV, Sched<[sched]>;
1815 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1816 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1817 !strconcat(OpcodeStr,
1818 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1819 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
1820 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1821 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1822 !strconcat(OpcodeStr,
1823 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1824 []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>;
1825 let mayLoad = 1 in {
1826 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1827 (ins _.RC:$src1, _.MemOp:$src2),
1828 !strconcat(OpcodeStr,
1829 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1830 []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
1831 Sched<[sched.Folded, sched.ReadAfterFold]>;
1832 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1833 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1834 !strconcat(OpcodeStr,
1835 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1836 []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1837 Sched<[sched.Folded, sched.ReadAfterFold]>;
1838 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1839 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1840 !strconcat(OpcodeStr,
1841 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1842 []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1843 Sched<[sched.Folded, sched.ReadAfterFold]>;
1847 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1848 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1849 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1850 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1851 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1852 !strconcat(OpcodeStr,
1853 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1854 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1855 EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1856 Sched<[sched.Folded, sched.ReadAfterFold]>;
1858 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1859 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1860 !strconcat(OpcodeStr,
1861 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1862 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1863 EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1864 Sched<[sched.Folded, sched.ReadAfterFold]>;
1866 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1867 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1868 !strconcat(OpcodeStr,
1869 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1870 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1871 EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1872 Sched<[sched.Folded, sched.ReadAfterFold]>;
1876 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1877 AVX512VLVectorVTInfo VTInfo> {
1878 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1879 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1882 let Predicates = [HasVLX] in {
1883 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1884 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1886 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1887 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1892 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1893 AVX512VLVectorVTInfo VTInfo> {
1894 let Predicates = [HasBWI] in
1895 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1898 let Predicates = [HasBWI, HasVLX] in {
1899 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1901 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1906 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
1908 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
1909 avx512vl_f64_info>, REX_W;
1910 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
1912 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
1913 avx512vl_i64_info>, REX_W;
1914 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
1916 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
1917 avx512vl_i16_info>, REX_W;
1919 //===----------------------------------------------------------------------===//
1920 // Compare Instructions
1921 //===----------------------------------------------------------------------===//
1923 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1925 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
1926 PatFrag OpNode_su, PatFrag OpNodeSAE_su,
1927 X86FoldableSchedWrite sched> {
1928 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1930 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1932 "$cc, $src2, $src1", "$src1, $src2, $cc",
1933 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1934 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1935 timm:$cc)>, EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1937 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1939 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
1941 "$cc, $src2, $src1", "$src1, $src2, $cc",
1942 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1944 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1945 timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1946 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1948 let Uses = [MXCSR] in
1949 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1951 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1953 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
1954 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1956 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1958 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
1960 let isCodeGenOnly = 1 in {
1961 let isCommutable = 1 in
1962 def rr : AVX512Ii8<0xC2, MRMSrcReg,
1963 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
1964 !strconcat("vcmp", _.Suffix,
1965 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1966 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1969 EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1970 def rm : AVX512Ii8<0xC2, MRMSrcMem,
1972 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1973 !strconcat("vcmp", _.Suffix,
1974 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1975 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1976 (_.ScalarLdFrag addr:$src2),
1978 EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1979 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1983 let Predicates = [HasAVX512] in {
1984 let ExeDomain = SSEPackedSingle in
1985 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
1986 X86cmpms_su, X86cmpmsSAE_su,
1987 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
1988 let ExeDomain = SSEPackedDouble in
1989 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
1990 X86cmpms_su, X86cmpmsSAE_su,
1991 SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
1993 let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
1994 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
1995 X86cmpms_su, X86cmpmsSAE_su,
1996 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
1998 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
1999 X86FoldableSchedWrite sched,
2000 X86VectorVTInfo _, bit IsCommutable> {
2001 let isCommutable = IsCommutable, hasSideEffects = 0 in
2002 def rr : AVX512BI<opc, MRMSrcReg,
2003 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2004 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2005 []>, EVEX, VVVV, Sched<[sched]>;
2006 let mayLoad = 1, hasSideEffects = 0 in
2007 def rm : AVX512BI<opc, MRMSrcMem,
2008 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2009 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2010 []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2011 let isCommutable = IsCommutable, hasSideEffects = 0 in
2012 def rrk : AVX512BI<opc, MRMSrcReg,
2013 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2014 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2015 "$dst {${mask}}, $src1, $src2}"),
2016 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
2017 let mayLoad = 1, hasSideEffects = 0 in
2018 def rmk : AVX512BI<opc, MRMSrcMem,
2019 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2020 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2021 "$dst {${mask}}, $src1, $src2}"),
2022 []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2025 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2026 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2028 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2029 let mayLoad = 1, hasSideEffects = 0 in {
2030 def rmb : AVX512BI<opc, MRMSrcMem,
2031 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2032 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2033 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2034 []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2035 def rmbk : AVX512BI<opc, MRMSrcMem,
2036 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2037 _.ScalarMemOp:$src2),
2038 !strconcat(OpcodeStr,
2039 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2040 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2041 []>, EVEX, VVVV, EVEX_K, EVEX_B,
2042 Sched<[sched.Folded, sched.ReadAfterFold]>;
2046 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2047 X86SchedWriteWidths sched,
2048 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2049 bit IsCommutable = 0> {
2050 let Predicates = [prd] in
2051 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2052 VTInfo.info512, IsCommutable>, EVEX_V512;
2054 let Predicates = [prd, HasVLX] in {
2055 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2056 VTInfo.info256, IsCommutable>, EVEX_V256;
2057 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2058 VTInfo.info128, IsCommutable>, EVEX_V128;
2062 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2063 X86SchedWriteWidths sched,
2064 AVX512VLVectorVTInfo VTInfo,
2065 Predicate prd, bit IsCommutable = 0> {
2066 let Predicates = [prd] in
2067 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2068 VTInfo.info512, IsCommutable>, EVEX_V512;
2070 let Predicates = [prd, HasVLX] in {
2071 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2072 VTInfo.info256, IsCommutable>, EVEX_V256;
2073 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2074 VTInfo.info128, IsCommutable>, EVEX_V128;
2078 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2079 // increase the pattern complexity the way an immediate would.
2080 let AddedComplexity = 2 in {
2081 // FIXME: Is there a better scheduler class for VPCMP?
2082 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2083 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2084 EVEX_CD8<8, CD8VF>, WIG;
2086 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2087 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2088 EVEX_CD8<16, CD8VF>, WIG;
2090 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2091 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2092 EVEX_CD8<32, CD8VF>;
2094 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2095 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2096 T8, REX_W, EVEX_CD8<64, CD8VF>;
2098 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2099 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2100 EVEX_CD8<8, CD8VF>, WIG;
2102 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2103 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2104 EVEX_CD8<16, CD8VF>, WIG;
2106 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2107 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2108 EVEX_CD8<32, CD8VF>;
2110 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2111 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2112 T8, REX_W, EVEX_CD8<64, CD8VF>;
2115 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2117 X86FoldableSchedWrite sched,
2118 X86VectorVTInfo _, string Name> {
2119 let isCommutable = 1 in
2120 def rri : AVX512AIi8<opc, MRMSrcReg,
2121 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2122 !strconcat("vpcmp", Suffix,
2123 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2124 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2127 EVEX, VVVV, Sched<[sched]>;
2128 def rmi : AVX512AIi8<opc, MRMSrcMem,
2129 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2130 !strconcat("vpcmp", Suffix,
2131 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2132 [(set _.KRC:$dst, (_.KVT
2135 (_.VT (_.LdFrag addr:$src2)),
2137 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2138 let isCommutable = 1 in
2139 def rrik : AVX512AIi8<opc, MRMSrcReg,
2140 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2142 !strconcat("vpcmp", Suffix,
2143 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2144 "$dst {${mask}}, $src1, $src2, $cc}"),
2145 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2146 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2149 EVEX, VVVV, EVEX_K, Sched<[sched]>;
2150 def rmik : AVX512AIi8<opc, MRMSrcMem,
2151 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2153 !strconcat("vpcmp", Suffix,
2154 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2155 "$dst {${mask}}, $src1, $src2, $cc}"),
2156 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2160 (_.VT (_.LdFrag addr:$src2)),
2162 EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2164 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2165 (_.VT _.RC:$src1), cond)),
2166 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2167 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2169 def : Pat<(and _.KRCWM:$mask,
2170 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2171 (_.VT _.RC:$src1), cond))),
2172 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2173 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2174 (X86pcmpm_imm_commute $cc))>;
2177 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2178 PatFrag Frag_su, X86FoldableSchedWrite sched,
2179 X86VectorVTInfo _, string Name> :
2180 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2181 def rmib : AVX512AIi8<opc, MRMSrcMem,
2182 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2184 !strconcat("vpcmp", Suffix,
2185 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2186 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2187 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2189 (_.BroadcastLdFrag addr:$src2),
2191 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2192 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2193 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2194 _.ScalarMemOp:$src2, u8imm:$cc),
2195 !strconcat("vpcmp", Suffix,
2196 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2197 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2198 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2201 (_.BroadcastLdFrag addr:$src2),
2203 EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2205 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2206 (_.VT _.RC:$src1), cond)),
2207 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2208 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2210 def : Pat<(and _.KRCWM:$mask,
2211 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2212 (_.VT _.RC:$src1), cond))),
2213 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2214 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2215 (X86pcmpm_imm_commute $cc))>;
2218 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2219 PatFrag Frag_su, X86SchedWriteWidths sched,
2220 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2221 let Predicates = [prd] in
2222 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2223 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2225 let Predicates = [prd, HasVLX] in {
2226 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2227 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2228 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2229 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2233 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2234 PatFrag Frag_su, X86SchedWriteWidths sched,
2235 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2236 let Predicates = [prd] in
2237 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2238 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2240 let Predicates = [prd, HasVLX] in {
2241 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2242 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2243 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2244 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2248 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2249 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2250 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2252 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2253 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2256 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2257 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2258 REX_W, EVEX_CD8<16, CD8VF>;
2259 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2260 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2261 REX_W, EVEX_CD8<16, CD8VF>;
2263 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2264 SchedWriteVecALU, avx512vl_i32_info,
2265 HasAVX512>, EVEX_CD8<32, CD8VF>;
2266 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2267 SchedWriteVecALU, avx512vl_i32_info,
2268 HasAVX512>, EVEX_CD8<32, CD8VF>;
2270 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2271 SchedWriteVecALU, avx512vl_i64_info,
2272 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2273 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2274 SchedWriteVecALU, avx512vl_i64_info,
2275 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2277 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2279 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2280 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2281 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2283 "$cc, $src2, $src1", "$src1, $src2, $cc",
2284 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2285 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2288 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2289 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2291 "$cc, $src2, $src1", "$src1, $src2, $cc",
2292 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2294 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2296 Sched<[sched.Folded, sched.ReadAfterFold]>;
2298 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2300 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2302 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2303 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2304 (X86any_cmpm (_.VT _.RC:$src1),
2305 (_.VT (_.BroadcastLdFrag addr:$src2)),
2307 (X86cmpm_su (_.VT _.RC:$src1),
2308 (_.VT (_.BroadcastLdFrag addr:$src2)),
2310 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2313 // Patterns for selecting with loads in other operand.
2314 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2316 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2317 (X86cmpm_imm_commute timm:$cc))>;
2319 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2322 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2323 _.RC:$src1, addr:$src2,
2324 (X86cmpm_imm_commute timm:$cc))>;
2326 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2327 (_.VT _.RC:$src1), timm:$cc),
2328 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2329 (X86cmpm_imm_commute timm:$cc))>;
2331 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2334 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2335 _.RC:$src1, addr:$src2,
2336 (X86cmpm_imm_commute timm:$cc))>;
2338 // Patterns for mask intrinsics.
2339 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2340 (_.KVT immAllOnesV)),
2341 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2343 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2344 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2345 _.RC:$src2, timm:$cc)>;
2347 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2348 (_.KVT immAllOnesV)),
2349 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2351 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2353 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2354 addr:$src2, timm:$cc)>;
2356 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2357 (_.KVT immAllOnesV)),
2358 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2360 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2362 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2363 addr:$src2, timm:$cc)>;
2365 // Patterns for mask intrinsics with loads in other operand.
2366 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2367 (_.KVT immAllOnesV)),
2368 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2369 (X86cmpm_imm_commute timm:$cc))>;
2371 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2373 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2374 _.RC:$src1, addr:$src2,
2375 (X86cmpm_imm_commute timm:$cc))>;
2377 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2378 (_.KVT immAllOnesV)),
2379 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2380 (X86cmpm_imm_commute timm:$cc))>;
2382 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2384 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2385 _.RC:$src1, addr:$src2,
2386 (X86cmpm_imm_commute timm:$cc))>;
2389 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2390 // comparison code form (VCMP[EQ/LT/LE/...]
2391 let Uses = [MXCSR] in
2392 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2393 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2394 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2396 "$cc, {sae}, $src2, $src1",
2397 "$src1, $src2, {sae}, $cc",
2398 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2399 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2400 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2401 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2402 EVEX_B, Sched<[sched]>;
2405 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2406 Predicate Pred = HasAVX512> {
2407 let Predicates = [Pred] in {
2408 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2409 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2412 let Predicates = [Pred,HasVLX] in {
2413 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2414 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2418 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2419 AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
2420 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2421 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
2422 defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2423 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
2425 // Patterns to select fp compares with load as first operand.
2426 let Predicates = [HasAVX512] in {
2427 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2428 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2430 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2431 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2434 let Predicates = [HasFP16] in {
2435 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2436 (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2439 // ----------------------------------------------------------------
2442 //handle fpclass instruction mask = op(reg_scalar,imm)
2443 // op(mem_scalar,imm)
2444 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2445 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2447 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2448 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2449 (ins _.RC:$src1, i32u8imm:$src2),
2450 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2451 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2452 (i32 timm:$src2)))]>,
2454 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2455 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2457 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2458 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2459 (X86Vfpclasss_su (_.VT _.RC:$src1),
2460 (i32 timm:$src2))))]>,
2461 EVEX_K, Sched<[sched]>;
2462 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2463 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2465 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2467 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2468 (i32 timm:$src2)))]>,
2469 Sched<[sched.Folded, sched.ReadAfterFold]>;
2470 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2471 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2473 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2474 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2475 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2476 (i32 timm:$src2))))]>,
2477 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2481 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2482 // fpclass(reg_vec, mem_vec, imm)
2483 // fpclass(reg_vec, broadcast(eltVt), imm)
2484 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2485 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2487 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2488 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2489 (ins _.RC:$src1, i32u8imm:$src2),
2490 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2491 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2492 (i32 timm:$src2)))]>,
2494 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2495 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2497 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2498 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2499 (X86Vfpclass_su (_.VT _.RC:$src1),
2500 (i32 timm:$src2))))]>,
2501 EVEX_K, Sched<[sched]>;
2502 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2503 (ins _.MemOp:$src1, i32u8imm:$src2),
2504 OpcodeStr#_.Suffix#"{"#mem#"}"#
2505 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2506 [(set _.KRC:$dst,(X86Vfpclass
2507 (_.VT (_.LdFrag addr:$src1)),
2508 (i32 timm:$src2)))]>,
2509 Sched<[sched.Folded, sched.ReadAfterFold]>;
2510 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2511 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2512 OpcodeStr#_.Suffix#"{"#mem#"}"#
2513 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2514 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2515 (_.VT (_.LdFrag addr:$src1)),
2516 (i32 timm:$src2))))]>,
2517 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2518 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2519 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2520 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2521 _.BroadcastStr#", $dst|$dst, ${src1}"
2522 #_.BroadcastStr#", $src2}",
2523 [(set _.KRC:$dst,(X86Vfpclass
2524 (_.VT (_.BroadcastLdFrag addr:$src1)),
2525 (i32 timm:$src2)))]>,
2526 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2527 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2528 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2529 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2530 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2531 _.BroadcastStr#", $src2}",
2532 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2533 (_.VT (_.BroadcastLdFrag addr:$src1)),
2534 (i32 timm:$src2))))]>,
2535 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2538 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2540 def : InstAlias<OpcodeStr#_.Suffix#mem#
2541 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2542 (!cast<Instruction>(NAME#"rr")
2543 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2544 def : InstAlias<OpcodeStr#_.Suffix#mem#
2545 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2546 (!cast<Instruction>(NAME#"rrk")
2547 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2548 def : InstAlias<OpcodeStr#_.Suffix#mem#
2549 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2550 _.BroadcastStr#", $src2}",
2551 (!cast<Instruction>(NAME#"rmb")
2552 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2553 def : InstAlias<OpcodeStr#_.Suffix#mem#
2554 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2555 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2556 (!cast<Instruction>(NAME#"rmbk")
2557 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2560 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2561 bits<8> opc, X86SchedWriteWidths sched,
2563 let Predicates = [prd] in {
2564 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2565 _.info512, "z">, EVEX_V512;
2567 let Predicates = [prd, HasVLX] in {
2568 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2569 _.info128, "x">, EVEX_V128;
2570 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2571 _.info256, "y">, EVEX_V256;
2575 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2576 bits<8> opcScalar, X86SchedWriteWidths sched> {
2577 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec,
2579 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2580 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2581 sched.Scl, f16x_info, HasFP16>,
2582 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2583 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2585 EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2586 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2588 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
2589 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2590 sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2591 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2592 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2593 sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2594 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
2597 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2599 //-----------------------------------------------------------------
2600 // Mask register copy, including
2601 // - copy between mask registers
2602 // - load/store mask registers
2603 // - copy from GPR to mask register and vice versa
2605 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2606 string OpcodeStr, RegisterClass KRC, ValueType vvt,
2607 X86MemOperand x86memop, string Suffix = ""> {
2608 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
2609 explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in
2610 def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2611 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2613 def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2614 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2615 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2617 def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2618 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2619 [(store KRC:$src, addr:$dst)]>,
2620 Sched<[WriteStore]>;
2623 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2624 string OpcodeStr, RegisterClass KRC,
2625 RegisterClass GRC, string Suffix = ""> {
2626 let hasSideEffects = 0 in {
2627 def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2628 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2630 def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2631 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2636 let Predicates = [HasDQI, NoEGPR] in
2637 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2638 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2640 let Predicates = [HasDQI, HasEGPR, In64BitMode] in
2641 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
2642 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
2645 let Predicates = [HasAVX512, NoEGPR] in
2646 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2647 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2649 let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
2650 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
2651 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
2654 let Predicates = [HasBWI, NoEGPR] in {
2655 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2657 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2659 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2661 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2664 let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
2665 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
2666 EVEX, TB, PD, REX_W;
2667 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
2669 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
2671 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
2672 EVEX, TB, XD, REX_W;
2675 // GR from/to mask register
2676 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2677 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2678 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2679 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2680 def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2681 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2683 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2684 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2685 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2686 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2688 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2689 (KMOVWrk VK16:$src)>;
2690 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2691 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2692 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2693 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2694 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2695 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2697 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2698 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2699 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2700 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2701 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2702 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2703 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2704 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2706 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2707 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2708 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2709 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2710 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2711 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2712 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2713 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2716 let Predicates = [HasDQI] in {
2717 def : Pat<(v1i1 (load addr:$src)),
2718 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2719 def : Pat<(v2i1 (load addr:$src)),
2720 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2721 def : Pat<(v4i1 (load addr:$src)),
2722 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2725 let Predicates = [HasAVX512] in {
2726 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2727 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2728 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2729 (KMOVWkm addr:$src)>;
2732 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2733 SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2734 SDTCVecEltisVT<1, i1>,
2737 let Predicates = [HasAVX512] in {
2738 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2739 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2740 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2742 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2743 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2745 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2746 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2748 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2749 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2752 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2753 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2754 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2755 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2756 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2757 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2758 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2760 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2761 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2763 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2767 // Mask unary operation
2769 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2770 RegisterClass KRC, SDPatternOperator OpNode,
2771 X86FoldableSchedWrite sched, Predicate prd> {
2772 let Predicates = [prd] in
2773 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2774 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2775 [(set KRC:$dst, (OpNode KRC:$src))]>,
2779 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2780 SDPatternOperator OpNode,
2781 X86FoldableSchedWrite sched> {
2782 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2783 sched, HasDQI>, VEX, TB, PD;
2784 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2785 sched, HasAVX512>, VEX, TB;
2786 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2787 sched, HasBWI>, VEX, TB, PD, REX_W;
2788 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2789 sched, HasBWI>, VEX, TB, REX_W;
2792 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2793 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2795 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2796 let Predicates = [HasAVX512, NoDQI] in
2797 def : Pat<(vnot VK8:$src),
2798 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2800 def : Pat<(vnot VK4:$src),
2801 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2802 def : Pat<(vnot VK2:$src),
2803 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2804 def : Pat<(vnot VK1:$src),
2805 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2807 // Mask binary operation
2808 // - KAND, KANDN, KOR, KXNOR, KXOR
2809 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2810 RegisterClass KRC, SDPatternOperator OpNode,
2811 X86FoldableSchedWrite sched, Predicate prd,
2813 let Predicates = [prd], isCommutable = IsCommutable in
2814 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2815 !strconcat(OpcodeStr,
2816 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2817 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2821 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2822 SDPatternOperator OpNode,
2823 X86FoldableSchedWrite sched, bit IsCommutable,
2824 Predicate prdW = HasAVX512> {
2825 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2826 sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD;
2827 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2828 sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB;
2829 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2830 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD;
2831 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2832 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB;
2835 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2836 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
2837 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
2838 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
2839 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
2840 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
2841 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2843 multiclass avx512_binop_pat<SDPatternOperator VOpNode,
2845 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2846 // for the DQI set, this type is legal and KxxxB instruction is used
2847 let Predicates = [NoDQI] in
2848 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2850 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2851 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2853 // All types smaller than 8 bits require conversion anyway
2854 def : Pat<(VOpNode VK1:$src1, VK1:$src2),
2855 (COPY_TO_REGCLASS (Inst
2856 (COPY_TO_REGCLASS VK1:$src1, VK16),
2857 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2858 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2859 (COPY_TO_REGCLASS (Inst
2860 (COPY_TO_REGCLASS VK2:$src1, VK16),
2861 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
2862 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2863 (COPY_TO_REGCLASS (Inst
2864 (COPY_TO_REGCLASS VK4:$src1, VK16),
2865 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
2868 defm : avx512_binop_pat<and, KANDWrr>;
2869 defm : avx512_binop_pat<vandn, KANDNWrr>;
2870 defm : avx512_binop_pat<or, KORWrr>;
2871 defm : avx512_binop_pat<vxnor, KXNORWrr>;
2872 defm : avx512_binop_pat<xor, KXORWrr>;
2875 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
2876 X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
2878 let Predicates = [prd] in {
2879 let hasSideEffects = 0 in
2880 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
2881 (ins Src.KRC:$src1, Src.KRC:$src2),
2882 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2883 VEX, VVVV, VEX_L, Sched<[sched]>;
2885 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
2886 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
2890 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, TB, PD;
2891 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB;
2892 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W;
2895 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2896 SDNode OpNode, X86FoldableSchedWrite sched,
2898 let Predicates = [prd], Defs = [EFLAGS] in
2899 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2900 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2901 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
2905 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2906 X86FoldableSchedWrite sched,
2907 Predicate prdW = HasAVX512> {
2908 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
2910 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
2912 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
2914 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
2918 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2919 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
2920 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
2923 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2924 SDNode OpNode, X86FoldableSchedWrite sched> {
2925 let Predicates = [HasAVX512] in
2926 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2927 !strconcat(OpcodeStr,
2928 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2929 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
2933 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2934 SDNode OpNode, X86FoldableSchedWrite sched> {
2935 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2936 sched>, VEX, TA, PD, REX_W;
2937 let Predicates = [HasDQI] in
2938 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2939 sched>, VEX, TA, PD;
2940 let Predicates = [HasBWI] in {
2941 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2942 sched>, VEX, TA, PD, REX_W;
2943 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2944 sched>, VEX, TA, PD;
2948 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
2949 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
2951 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
2952 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2954 X86VectorVTInfo Narrow,
2955 X86VectorVTInfo Wide> {
2956 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2957 (Narrow.VT Narrow.RC:$src2), cond)),
2959 (!cast<Instruction>(InstStr#"Zrri")
2960 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2961 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2962 (X86pcmpm_imm $cc)), Narrow.KRC)>;
2964 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2965 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
2966 (Narrow.VT Narrow.RC:$src2),
2968 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
2969 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2970 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2971 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2972 (X86pcmpm_imm $cc)), Narrow.KRC)>;
2975 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2977 X86VectorVTInfo Narrow,
2978 X86VectorVTInfo Wide> {
2980 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2981 (Narrow.BroadcastLdFrag addr:$src2), cond)),
2983 (!cast<Instruction>(InstStr#"Zrmib")
2984 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2985 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
2987 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2989 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
2990 (Narrow.BroadcastLdFrag addr:$src2),
2992 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
2993 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2994 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2995 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
2997 // Commuted with broadcast load.
2998 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
2999 (Narrow.VT Narrow.RC:$src1),
3002 (!cast<Instruction>(InstStr#"Zrmib")
3003 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3004 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3006 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3008 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3009 (Narrow.VT Narrow.RC:$src1),
3011 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3012 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3013 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3014 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3017 // Same as above, but for fp types which don't use PatFrags.
3018 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3019 X86VectorVTInfo Narrow,
3020 X86VectorVTInfo Wide> {
3021 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3022 (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3024 (!cast<Instruction>(InstStr#"Zrri")
3025 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3026 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3027 timm:$cc), Narrow.KRC)>;
3029 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3030 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3031 (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3032 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3033 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3034 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3035 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3036 timm:$cc), Narrow.KRC)>;
3039 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3040 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3042 (!cast<Instruction>(InstStr#"Zrmbi")
3043 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3044 addr:$src2, timm:$cc), Narrow.KRC)>;
3046 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3047 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3048 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3049 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3050 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3051 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3052 addr:$src2, timm:$cc), Narrow.KRC)>;
3054 // Commuted with broadcast load.
3055 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3056 (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3058 (!cast<Instruction>(InstStr#"Zrmbi")
3059 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3060 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3062 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3063 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3064 (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3065 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3066 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3067 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3068 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3071 let Predicates = [HasAVX512, NoVLX] in {
3072 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3073 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3075 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3076 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3078 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3079 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3081 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3082 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3084 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3085 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3087 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3088 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3090 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3091 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3093 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3094 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3096 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3097 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3098 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3099 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3102 let Predicates = [HasBWI, NoVLX] in {
3103 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3104 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3106 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3107 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3109 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3110 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3112 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3113 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3116 // Mask setting all 0s or 1s
3117 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3118 let Predicates = [HasAVX512] in
3119 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3120 SchedRW = [WriteZero] in
3121 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3122 [(set KRC:$dst, (VT Val))]>;
3125 multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3126 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3127 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3128 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3131 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3132 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3134 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3135 let Predicates = [HasAVX512] in {
3136 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3137 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3138 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3139 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3140 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3141 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3142 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3143 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3146 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3147 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3148 RegisterClass RC, ValueType VT> {
3149 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3150 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3152 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3153 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3155 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3156 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3157 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3158 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3159 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3160 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3162 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3163 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3164 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3165 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3166 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3168 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3169 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3170 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3171 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3173 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3174 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3175 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3177 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3178 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3180 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3182 //===----------------------------------------------------------------------===//
3183 // AVX-512 - Aligned and unaligned load and store
3186 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3187 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3188 X86SchedWriteMoveLS Sched, bit NoRMPattern = 0,
3189 SDPatternOperator SelectOprr = vselect> {
3190 let hasSideEffects = 0 in {
3191 let isMoveReg = 1 in
3192 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3193 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3194 _.ExeDomain>, EVEX, Sched<[Sched.RR]>;
3195 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3196 (ins _.KRCWM:$mask, _.RC:$src),
3197 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3198 "${dst} {${mask}} {z}, $src}"),
3199 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3201 _.ImmAllZerosV)))], _.ExeDomain>,
3202 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3204 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3205 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3206 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3207 !if(NoRMPattern, [],
3209 (_.VT (ld_frag addr:$src)))]),
3210 _.ExeDomain>, EVEX, Sched<[Sched.RM]>;
3212 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3213 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3214 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3215 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3216 "${dst} {${mask}}, $src1}"),
3217 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3219 (_.VT _.RC:$src0))))], _.ExeDomain>,
3220 EVEX, EVEX_K, Sched<[Sched.RR]>;
3221 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3222 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3223 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3224 "${dst} {${mask}}, $src1}"),
3225 [(set _.RC:$dst, (_.VT
3226 (vselect_mask _.KRCWM:$mask,
3227 (_.VT (ld_frag addr:$src1)),
3228 (_.VT _.RC:$src0))))], _.ExeDomain>,
3229 EVEX, EVEX_K, Sched<[Sched.RM]>;
3231 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3232 (ins _.KRCWM:$mask, _.MemOp:$src),
3233 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3234 "${dst} {${mask}} {z}, $src}",
3235 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3236 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3237 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3239 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3240 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3242 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3243 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3245 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3246 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3247 _.KRCWM:$mask, addr:$ptr)>;
3250 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3251 AVX512VLVectorVTInfo _, Predicate prd,
3252 X86SchedWriteMoveLSWidths Sched,
3253 bit NoRMPattern = 0> {
3254 let Predicates = [prd] in
3255 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3256 _.info512.AlignedLdFrag, masked_load_aligned,
3257 Sched.ZMM, NoRMPattern>, EVEX_V512;
3259 let Predicates = [prd, HasVLX] in {
3260 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3261 _.info256.AlignedLdFrag, masked_load_aligned,
3262 Sched.YMM, NoRMPattern>, EVEX_V256;
3263 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3264 _.info128.AlignedLdFrag, masked_load_aligned,
3265 Sched.XMM, NoRMPattern>, EVEX_V128;
3269 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3270 AVX512VLVectorVTInfo _, Predicate prd,
3271 X86SchedWriteMoveLSWidths Sched,
3272 bit NoRMPattern = 0,
3273 SDPatternOperator SelectOprr = vselect> {
3274 let Predicates = [prd] in
3275 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3276 masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512;
3278 let Predicates = [prd, HasVLX] in {
3279 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3280 masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256;
3281 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3282 masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128;
3286 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3287 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3288 X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> {
3289 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3290 let isMoveReg = 1 in
3291 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3292 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3293 [], _.ExeDomain>, EVEX,
3295 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3296 (ins _.KRCWM:$mask, _.RC:$src),
3297 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3298 "${dst} {${mask}}, $src}",
3299 [], _.ExeDomain>, EVEX, EVEX_K,
3301 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3302 (ins _.KRCWM:$mask, _.RC:$src),
3303 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3304 "${dst} {${mask}} {z}, $src}",
3305 [], _.ExeDomain>, EVEX, EVEX_KZ,
3309 let hasSideEffects = 0, mayStore = 1 in
3310 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3311 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3312 !if(NoMRPattern, [],
3313 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3314 _.ExeDomain>, EVEX, Sched<[Sched.MR]>;
3315 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3316 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3317 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3318 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
3320 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3321 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3322 _.KRCWM:$mask, _.RC:$src)>;
3324 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3325 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3326 _.RC:$dst, _.RC:$src), 0>;
3327 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3328 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3329 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3330 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3331 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3332 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3335 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3336 AVX512VLVectorVTInfo _, Predicate prd,
3337 X86SchedWriteMoveLSWidths Sched,
3338 bit NoMRPattern = 0> {
3339 let Predicates = [prd] in
3340 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3341 masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512;
3342 let Predicates = [prd, HasVLX] in {
3343 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3344 masked_store, Sched.YMM, NoMRPattern>, EVEX_V256;
3345 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3346 masked_store, Sched.XMM, NoMRPattern>, EVEX_V128;
3350 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3351 AVX512VLVectorVTInfo _, Predicate prd,
3352 X86SchedWriteMoveLSWidths Sched,
3353 bit NoMRPattern = 0> {
3354 let Predicates = [prd] in
3355 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3356 masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512;
3358 let Predicates = [prd, HasVLX] in {
3359 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3360 masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256;
3361 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3362 masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128;
3366 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3367 HasAVX512, SchedWriteFMoveLS>,
3368 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3369 HasAVX512, SchedWriteFMoveLS>,
3370 TB, EVEX_CD8<32, CD8VF>;
3372 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3373 HasAVX512, SchedWriteFMoveLS>,
3374 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3375 HasAVX512, SchedWriteFMoveLS>,
3376 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3378 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3379 SchedWriteFMoveLS, 0, null_frag>,
3380 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3382 TB, EVEX_CD8<32, CD8VF>;
3384 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3385 SchedWriteFMoveLS, 0, null_frag>,
3386 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3388 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3390 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3391 HasAVX512, SchedWriteVecMoveLS, 1>,
3392 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3393 HasAVX512, SchedWriteVecMoveLS, 1>,
3394 TB, PD, EVEX_CD8<32, CD8VF>;
3396 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3397 HasAVX512, SchedWriteVecMoveLS>,
3398 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3399 HasAVX512, SchedWriteVecMoveLS>,
3400 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3402 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3403 SchedWriteVecMoveLS, 1>,
3404 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3405 SchedWriteVecMoveLS, 1>,
3406 TB, XD, EVEX_CD8<8, CD8VF>;
3408 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3409 SchedWriteVecMoveLS, 1>,
3410 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3411 SchedWriteVecMoveLS, 1>,
3412 TB, XD, REX_W, EVEX_CD8<16, CD8VF>;
3414 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3415 SchedWriteVecMoveLS, 1, null_frag>,
3416 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3417 SchedWriteVecMoveLS, 1>,
3418 TB, XS, EVEX_CD8<32, CD8VF>;
3420 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3421 SchedWriteVecMoveLS, 0, null_frag>,
3422 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3423 SchedWriteVecMoveLS>,
3424 TB, XS, REX_W, EVEX_CD8<64, CD8VF>;
3426 // Special instructions to help with spilling when we don't have VLX. We need
3427 // to load or store from a ZMM register instead. These are converted in
3428 // expandPostRAPseudos.
3429 let isReMaterializable = 1, canFoldAsLoad = 1,
3430 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3431 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3432 "", []>, Sched<[WriteFLoadX]>;
3433 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3434 "", []>, Sched<[WriteFLoadY]>;
3435 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3436 "", []>, Sched<[WriteFLoadX]>;
3437 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3438 "", []>, Sched<[WriteFLoadY]>;
3441 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3442 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3443 "", []>, Sched<[WriteFStoreX]>;
3444 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3445 "", []>, Sched<[WriteFStoreY]>;
3446 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3447 "", []>, Sched<[WriteFStoreX]>;
3448 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3449 "", []>, Sched<[WriteFStoreY]>;
3452 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3453 (v8i64 VR512:$src))),
3454 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3457 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3458 (v16i32 VR512:$src))),
3459 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3461 // These patterns exist to prevent the above patterns from introducing a second
3462 // mask inversion when one already exists.
3463 def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3464 (v8i64 immAllZerosV),
3465 (v8i64 VR512:$src))),
3466 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3467 def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3468 (v16i32 immAllZerosV),
3469 (v16i32 VR512:$src))),
3470 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3472 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3473 X86VectorVTInfo Wide> {
3474 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3475 Narrow.RC:$src1, Narrow.RC:$src0)),
3478 (!cast<Instruction>(InstrStr#"rrk")
3479 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3480 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3481 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3484 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3485 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3488 (!cast<Instruction>(InstrStr#"rrkz")
3489 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3490 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3494 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3495 // available. Use a 512-bit operation and extract.
3496 let Predicates = [HasAVX512, NoVLX] in {
3497 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3498 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3499 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3500 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3502 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3503 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3504 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3505 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3508 let Predicates = [HasBWI, NoVLX] in {
3509 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3510 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3512 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3513 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3515 defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3516 defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3518 defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3519 defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3522 let Predicates = [HasAVX512] in {
3524 def : Pat<(alignedloadv16i32 addr:$src),
3525 (VMOVDQA64Zrm addr:$src)>;
3526 def : Pat<(alignedloadv32i16 addr:$src),
3527 (VMOVDQA64Zrm addr:$src)>;
3528 def : Pat<(alignedloadv32f16 addr:$src),
3529 (VMOVAPSZrm addr:$src)>;
3530 def : Pat<(alignedloadv32bf16 addr:$src),
3531 (VMOVAPSZrm addr:$src)>;
3532 def : Pat<(alignedloadv64i8 addr:$src),
3533 (VMOVDQA64Zrm addr:$src)>;
3534 def : Pat<(loadv16i32 addr:$src),
3535 (VMOVDQU64Zrm addr:$src)>;
3536 def : Pat<(loadv32i16 addr:$src),
3537 (VMOVDQU64Zrm addr:$src)>;
3538 def : Pat<(loadv32f16 addr:$src),
3539 (VMOVUPSZrm addr:$src)>;
3540 def : Pat<(loadv32bf16 addr:$src),
3541 (VMOVUPSZrm addr:$src)>;
3542 def : Pat<(loadv64i8 addr:$src),
3543 (VMOVDQU64Zrm addr:$src)>;
3546 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3547 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3548 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3549 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3550 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3551 (VMOVAPSZmr addr:$dst, VR512:$src)>;
3552 def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3553 (VMOVAPSZmr addr:$dst, VR512:$src)>;
3554 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3555 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3556 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3557 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3558 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3559 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3560 def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3561 (VMOVUPSZmr addr:$dst, VR512:$src)>;
3562 def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3563 (VMOVUPSZmr addr:$dst, VR512:$src)>;
3564 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3565 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3568 let Predicates = [HasVLX] in {
3570 def : Pat<(alignedloadv4i32 addr:$src),
3571 (VMOVDQA64Z128rm addr:$src)>;
3572 def : Pat<(alignedloadv8i16 addr:$src),
3573 (VMOVDQA64Z128rm addr:$src)>;
3574 def : Pat<(alignedloadv8f16 addr:$src),
3575 (VMOVAPSZ128rm addr:$src)>;
3576 def : Pat<(alignedloadv8bf16 addr:$src),
3577 (VMOVAPSZ128rm addr:$src)>;
3578 def : Pat<(alignedloadv16i8 addr:$src),
3579 (VMOVDQA64Z128rm addr:$src)>;
3580 def : Pat<(loadv4i32 addr:$src),
3581 (VMOVDQU64Z128rm addr:$src)>;
3582 def : Pat<(loadv8i16 addr:$src),
3583 (VMOVDQU64Z128rm addr:$src)>;
3584 def : Pat<(loadv8f16 addr:$src),
3585 (VMOVUPSZ128rm addr:$src)>;
3586 def : Pat<(loadv8bf16 addr:$src),
3587 (VMOVUPSZ128rm addr:$src)>;
3588 def : Pat<(loadv16i8 addr:$src),
3589 (VMOVDQU64Z128rm addr:$src)>;
3592 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3593 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3594 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3595 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3596 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3597 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3598 def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3599 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3600 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3601 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3602 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3603 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3604 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3605 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3606 def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3607 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3608 def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3609 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3610 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3611 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3614 def : Pat<(alignedloadv8i32 addr:$src),
3615 (VMOVDQA64Z256rm addr:$src)>;
3616 def : Pat<(alignedloadv16i16 addr:$src),
3617 (VMOVDQA64Z256rm addr:$src)>;
3618 def : Pat<(alignedloadv16f16 addr:$src),
3619 (VMOVAPSZ256rm addr:$src)>;
3620 def : Pat<(alignedloadv16bf16 addr:$src),
3621 (VMOVAPSZ256rm addr:$src)>;
3622 def : Pat<(alignedloadv32i8 addr:$src),
3623 (VMOVDQA64Z256rm addr:$src)>;
3624 def : Pat<(loadv8i32 addr:$src),
3625 (VMOVDQU64Z256rm addr:$src)>;
3626 def : Pat<(loadv16i16 addr:$src),
3627 (VMOVDQU64Z256rm addr:$src)>;
3628 def : Pat<(loadv16f16 addr:$src),
3629 (VMOVUPSZ256rm addr:$src)>;
3630 def : Pat<(loadv16bf16 addr:$src),
3631 (VMOVUPSZ256rm addr:$src)>;
3632 def : Pat<(loadv32i8 addr:$src),
3633 (VMOVDQU64Z256rm addr:$src)>;
3636 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3637 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3638 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3639 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3640 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3641 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3642 def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3643 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3644 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3645 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3646 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3647 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3648 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3649 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3650 def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3651 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3652 def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3653 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3654 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3655 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3658 multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3659 let Predicates = [HasBWI] in {
3660 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3661 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3662 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3663 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3664 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3665 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3666 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3667 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3668 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3669 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3670 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3671 (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3672 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3673 def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3674 (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3675 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3676 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3677 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3678 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3679 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3680 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3681 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3683 def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3684 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3686 let Predicates = [HasBWI, HasVLX] in {
3687 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3688 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3689 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3690 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3691 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3692 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3693 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3694 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3695 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3696 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3697 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3698 (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3699 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3700 def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3701 (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3702 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3703 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3704 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3705 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3706 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3707 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3708 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3710 def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3711 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3713 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3714 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3715 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3716 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3717 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3718 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3719 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3720 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3721 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3722 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3723 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3724 (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3725 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3726 def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3727 (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3728 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3729 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3730 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3731 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3732 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3733 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3734 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3736 def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3737 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3741 defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3742 defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
3744 // Move Int Doubleword to Packed Double Int
3746 let ExeDomain = SSEPackedInt in {
3747 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3748 "vmovd\t{$src, $dst|$dst, $src}",
3750 (v4i32 (scalar_to_vector GR32:$src)))]>,
3751 EVEX, Sched<[WriteVecMoveFromGpr]>;
3752 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3753 "vmovd\t{$src, $dst|$dst, $src}",
3755 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3756 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3757 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3758 "vmovq\t{$src, $dst|$dst, $src}",
3760 (v2i64 (scalar_to_vector GR64:$src)))]>,
3761 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3762 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3763 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3765 "vmovq\t{$src, $dst|$dst, $src}", []>,
3766 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3767 let isCodeGenOnly = 1 in {
3768 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3769 "vmovq\t{$src, $dst|$dst, $src}",
3770 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3771 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3772 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3773 "vmovq\t{$src, $dst|$dst, $src}",
3774 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3775 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3777 } // ExeDomain = SSEPackedInt
3779 // Move Int Doubleword to Single Scalar
3781 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3782 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3783 "vmovd\t{$src, $dst|$dst, $src}",
3784 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3785 EVEX, Sched<[WriteVecMoveFromGpr]>;
3786 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3788 // Move doubleword from xmm register to r/m32
3790 let ExeDomain = SSEPackedInt in {
3791 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3792 "vmovd\t{$src, $dst|$dst, $src}",
3793 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3795 EVEX, Sched<[WriteVecMoveToGpr]>;
3796 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3797 (ins i32mem:$dst, VR128X:$src),
3798 "vmovd\t{$src, $dst|$dst, $src}",
3799 [(store (i32 (extractelt (v4i32 VR128X:$src),
3800 (iPTR 0))), addr:$dst)]>,
3801 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3802 } // ExeDomain = SSEPackedInt
3804 // Move quadword from xmm1 register to r/m64
3806 let ExeDomain = SSEPackedInt in {
3807 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3808 "vmovq\t{$src, $dst|$dst, $src}",
3809 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3811 TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
3812 Requires<[HasAVX512]>;
3814 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3815 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3816 "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD,
3817 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
3818 Requires<[HasAVX512, In64BitMode]>;
3820 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3821 (ins i64mem:$dst, VR128X:$src),
3822 "vmovq\t{$src, $dst|$dst, $src}",
3823 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3825 EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>,
3826 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3828 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3829 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3831 "vmovq\t{$src, $dst|$dst, $src}", []>,
3832 EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
3833 } // ExeDomain = SSEPackedInt
3835 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3836 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3838 let Predicates = [HasAVX512] in {
3839 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3840 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3843 // Move Scalar Single to Double Int
3845 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3846 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3848 "vmovd\t{$src, $dst|$dst, $src}",
3849 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3850 EVEX, Sched<[WriteVecMoveToGpr]>;
3851 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3853 // Move Quadword Int to Packed Quadword Int
3855 let ExeDomain = SSEPackedInt in {
3856 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3858 "vmovq\t{$src, $dst|$dst, $src}",
3860 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3861 EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3862 } // ExeDomain = SSEPackedInt
3864 // Allow "vmovd" but print "vmovq".
3865 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3866 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3867 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3868 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3870 // Conversions between masks and scalar fp.
3871 def : Pat<(v32i1 (bitconvert FR32X:$src)),
3872 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3873 def : Pat<(f32 (bitconvert VK32:$src)),
3874 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3876 def : Pat<(v64i1 (bitconvert FR64X:$src)),
3877 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3878 def : Pat<(f64 (bitconvert VK64:$src)),
3879 (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3881 //===----------------------------------------------------------------------===//
3882 // AVX-512 MOVSH, MOVSS, MOVSD
3883 //===----------------------------------------------------------------------===//
3885 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3886 X86VectorVTInfo _, Predicate prd = HasAVX512> {
3887 let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
3888 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3889 (ins _.RC:$src1, _.RC:$src2),
3890 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3891 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3892 _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
3893 let Predicates = [prd] in {
3894 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3895 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3896 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3897 "$dst {${mask}} {z}, $src1, $src2}"),
3898 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3899 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3901 _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3902 let Constraints = "$src0 = $dst" in
3903 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3904 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3905 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3906 "$dst {${mask}}, $src1, $src2}"),
3907 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3908 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3909 (_.VT _.RC:$src0))))],
3910 _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3911 let canFoldAsLoad = 1, isReMaterializable = 1 in {
3912 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3913 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3914 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3915 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3916 // _alt version uses FR32/FR64 register class.
3917 let isCodeGenOnly = 1 in
3918 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3919 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3920 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3921 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3923 let mayLoad = 1, hasSideEffects = 0 in {
3924 let Constraints = "$src0 = $dst" in
3925 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3926 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3927 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3928 "$dst {${mask}}, $src}"),
3929 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3930 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3931 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3932 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3933 "$dst {${mask}} {z}, $src}"),
3934 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3936 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3937 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3938 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
3939 EVEX, Sched<[WriteFStore]>;
3940 let mayStore = 1, hasSideEffects = 0 in
3941 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3942 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3943 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3944 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
3948 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3949 VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>;
3951 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3952 VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
3954 defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
3956 VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
3958 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3959 PatLeaf ZeroFP, X86VectorVTInfo _> {
3961 def : Pat<(_.VT (OpNode _.RC:$src0,
3962 (_.VT (scalar_to_vector
3963 (_.EltVT (X86selects VK1WM:$mask,
3964 (_.EltVT _.FRC:$src1),
3965 (_.EltVT _.FRC:$src2))))))),
3966 (!cast<Instruction>(InstrStr#rrk)
3967 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3970 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3972 def : Pat<(_.VT (OpNode _.RC:$src0,
3973 (_.VT (scalar_to_vector
3974 (_.EltVT (X86selects VK1WM:$mask,
3975 (_.EltVT _.FRC:$src1),
3976 (_.EltVT ZeroFP))))))),
3977 (!cast<Instruction>(InstrStr#rrkz)
3980 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3983 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3984 dag Mask, RegisterClass MaskRC> {
3986 def : Pat<(masked_store
3987 (_.info512.VT (insert_subvector undef,
3988 (_.info128.VT _.info128.RC:$src),
3989 (iPTR 0))), addr:$dst, Mask),
3990 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3991 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3992 _.info128.RC:$src)>;
3996 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3997 AVX512VLVectorVTInfo _,
3998 dag Mask, RegisterClass MaskRC,
3999 SubRegIndex subreg> {
4001 def : Pat<(masked_store
4002 (_.info512.VT (insert_subvector undef,
4003 (_.info128.VT _.info128.RC:$src),
4004 (iPTR 0))), addr:$dst, Mask),
4005 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4006 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4007 _.info128.RC:$src)>;
4011 // This matches the more recent codegen from clang that avoids emitting a 512
4012 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4013 // bits on AVX512F only targets.
4014 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4015 AVX512VLVectorVTInfo _,
4016 dag Mask512, dag Mask128,
4017 RegisterClass MaskRC,
4018 SubRegIndex subreg> {
4021 def : Pat<(masked_store
4022 (_.info512.VT (insert_subvector undef,
4023 (_.info128.VT _.info128.RC:$src),
4024 (iPTR 0))), addr:$dst, Mask512),
4025 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4026 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4027 _.info128.RC:$src)>;
4029 // AVX512VL pattern.
4030 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4031 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4032 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4033 _.info128.RC:$src)>;
4036 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4037 dag Mask, RegisterClass MaskRC> {
4039 def : Pat<(_.info128.VT (extract_subvector
4040 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4041 _.info512.ImmAllZerosV)),
4043 (!cast<Instruction>(InstrStr#rmkz)
4044 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4047 def : Pat<(_.info128.VT (extract_subvector
4048 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4049 (_.info512.VT (insert_subvector undef,
4050 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4053 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4054 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4059 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4060 AVX512VLVectorVTInfo _,
4061 dag Mask, RegisterClass MaskRC,
4062 SubRegIndex subreg> {
4064 def : Pat<(_.info128.VT (extract_subvector
4065 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4066 _.info512.ImmAllZerosV)),
4068 (!cast<Instruction>(InstrStr#rmkz)
4069 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4072 def : Pat<(_.info128.VT (extract_subvector
4073 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4074 (_.info512.VT (insert_subvector undef,
4075 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4078 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4079 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4084 // This matches the more recent codegen from clang that avoids emitting a 512
4085 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4086 // bits on AVX512F only targets.
4087 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4088 AVX512VLVectorVTInfo _,
4089 dag Mask512, dag Mask128,
4090 RegisterClass MaskRC,
4091 SubRegIndex subreg> {
4092 // AVX512F patterns.
4093 def : Pat<(_.info128.VT (extract_subvector
4094 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4095 _.info512.ImmAllZerosV)),
4097 (!cast<Instruction>(InstrStr#rmkz)
4098 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4101 def : Pat<(_.info128.VT (extract_subvector
4102 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4103 (_.info512.VT (insert_subvector undef,
4104 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4107 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4108 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4111 // AVX512Vl patterns.
4112 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4113 _.info128.ImmAllZerosV)),
4114 (!cast<Instruction>(InstrStr#rmkz)
4115 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4118 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4119 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4120 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4121 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4125 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4126 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4128 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4129 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4130 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4131 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4132 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4133 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4135 let Predicates = [HasFP16] in {
4136 defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4137 defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4138 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4139 defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4140 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4141 defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4142 (v32i1 (insert_subvector
4143 (v32i1 immAllZerosV),
4144 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4146 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4149 defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4150 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4151 defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4152 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4153 defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4154 (v32i1 (insert_subvector
4155 (v32i1 immAllZerosV),
4156 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4158 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4161 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4162 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4163 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4164 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4165 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4167 def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4168 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4169 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4172 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4173 (v16i1 (insert_subvector
4174 (v16i1 immAllZerosV),
4175 (v4i1 (extract_subvector
4176 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4179 (v4i1 (extract_subvector
4180 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4181 (iPTR 0))), GR8, sub_8bit>;
4182 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4187 (v16i1 immAllZerosV),
4188 (v2i1 (extract_subvector
4189 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4193 (v2i1 (extract_subvector
4194 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4195 (iPTR 0))), GR8, sub_8bit>;
4197 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4198 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4199 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4200 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4201 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4202 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4204 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4205 (v16i1 (insert_subvector
4206 (v16i1 immAllZerosV),
4207 (v4i1 (extract_subvector
4208 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4211 (v4i1 (extract_subvector
4212 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4213 (iPTR 0))), GR8, sub_8bit>;
4214 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4219 (v16i1 immAllZerosV),
4220 (v2i1 (extract_subvector
4221 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4225 (v2i1 (extract_subvector
4226 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4227 (iPTR 0))), GR8, sub_8bit>;
4229 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4230 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4231 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4232 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4233 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4235 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4236 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4237 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4239 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4241 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4242 VK1WM:$mask, addr:$src)),
4244 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4245 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4247 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4248 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4249 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4250 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4251 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4253 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4254 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4255 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4257 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4259 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4260 VK1WM:$mask, addr:$src)),
4262 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4263 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4266 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4267 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4268 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4269 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4271 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4272 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4273 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4274 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4276 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4277 let Predicates = [HasFP16] in {
4278 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4279 (ins VR128X:$src1, VR128X:$src2),
4280 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4281 []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4282 Sched<[SchedWriteFShuffle.XMM]>;
4284 let Constraints = "$src0 = $dst" in
4285 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4286 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4287 VR128X:$src1, VR128X:$src2),
4288 "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4289 "$dst {${mask}}, $src1, $src2}",
4290 []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG,
4291 Sched<[SchedWriteFShuffle.XMM]>;
4293 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4294 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4295 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4296 "$dst {${mask}} {z}, $src1, $src2}",
4297 []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4298 Sched<[SchedWriteFShuffle.XMM]>;
4300 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4301 (ins VR128X:$src1, VR128X:$src2),
4302 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4303 []>, TB, XS, EVEX, VVVV, VEX_LIG,
4304 Sched<[SchedWriteFShuffle.XMM]>;
4306 let Constraints = "$src0 = $dst" in
4307 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4308 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4309 VR128X:$src1, VR128X:$src2),
4310 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4311 "$dst {${mask}}, $src1, $src2}",
4312 []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG,
4313 Sched<[SchedWriteFShuffle.XMM]>;
4315 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4316 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4317 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4318 "$dst {${mask}} {z}, $src1, $src2}",
4319 []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG,
4320 Sched<[SchedWriteFShuffle.XMM]>;
4322 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4323 (ins VR128X:$src1, VR128X:$src2),
4324 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4325 []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W,
4326 Sched<[SchedWriteFShuffle.XMM]>;
4328 let Constraints = "$src0 = $dst" in
4329 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4330 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4331 VR128X:$src1, VR128X:$src2),
4332 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4333 "$dst {${mask}}, $src1, $src2}",
4334 []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG,
4335 REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4337 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4338 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4340 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4341 "$dst {${mask}} {z}, $src1, $src2}",
4342 []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG,
4343 REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4346 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4347 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4348 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4349 "$dst {${mask}}, $src1, $src2}",
4350 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4351 VR128X:$src1, VR128X:$src2), 0>;
4352 def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4353 "$dst {${mask}} {z}, $src1, $src2}",
4354 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4355 VR128X:$src1, VR128X:$src2), 0>;
4356 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4357 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4358 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4359 "$dst {${mask}}, $src1, $src2}",
4360 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4361 VR128X:$src1, VR128X:$src2), 0>;
4362 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4363 "$dst {${mask}} {z}, $src1, $src2}",
4364 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4365 VR128X:$src1, VR128X:$src2), 0>;
4366 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4367 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4368 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4369 "$dst {${mask}}, $src1, $src2}",
4370 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4371 VR128X:$src1, VR128X:$src2), 0>;
4372 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4373 "$dst {${mask}} {z}, $src1, $src2}",
4374 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4375 VR128X:$src1, VR128X:$src2), 0>;
4377 let Predicates = [HasAVX512, OptForSize] in {
4378 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4379 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4380 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4381 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4383 // Move low f32 and clear high bits.
4384 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4385 (SUBREG_TO_REG (i32 0),
4386 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4387 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4388 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4389 (SUBREG_TO_REG (i32 0),
4390 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4391 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4393 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4394 (SUBREG_TO_REG (i32 0),
4395 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4396 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4397 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4398 (SUBREG_TO_REG (i32 0),
4399 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4400 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4403 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4404 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4405 let Predicates = [HasAVX512, OptForSpeed] in {
4406 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4407 (SUBREG_TO_REG (i32 0),
4408 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4409 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4410 (i8 1))), sub_xmm)>;
4411 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4412 (SUBREG_TO_REG (i32 0),
4413 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4414 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4415 (i8 3))), sub_xmm)>;
4418 let Predicates = [HasAVX512] in {
4419 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4420 (VMOVSSZrm addr:$src)>;
4421 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4422 (VMOVSDZrm addr:$src)>;
4424 // Represent the same patterns above but in the form they appear for
4426 def : Pat<(v8f32 (X86vzload32 addr:$src)),
4427 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4428 def : Pat<(v4f64 (X86vzload64 addr:$src)),
4429 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4431 // Represent the same patterns above but in the form they appear for
4433 def : Pat<(v16f32 (X86vzload32 addr:$src)),
4434 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4435 def : Pat<(v8f64 (X86vzload64 addr:$src)),
4436 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4438 let Predicates = [HasFP16] in {
4439 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4440 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4441 def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4442 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4444 // FIXME we need better canonicalization in dag combine
4445 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4446 (SUBREG_TO_REG (i32 0),
4447 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4448 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4449 def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4450 (SUBREG_TO_REG (i32 0),
4451 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4452 (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4454 // FIXME we need better canonicalization in dag combine
4455 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4456 (SUBREG_TO_REG (i32 0),
4457 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4458 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4459 def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4460 (SUBREG_TO_REG (i32 0),
4461 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4462 (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4464 def : Pat<(v8f16 (X86vzload16 addr:$src)),
4465 (VMOVSHZrm addr:$src)>;
4467 def : Pat<(v16f16 (X86vzload16 addr:$src)),
4468 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4470 def : Pat<(v32f16 (X86vzload16 addr:$src)),
4471 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4474 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4475 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4477 "vmovq\t{$src, $dst|$dst, $src}",
4478 [(set VR128X:$dst, (v2i64 (X86vzmovl
4479 (v2i64 VR128X:$src))))]>,
4483 let Predicates = [HasAVX512] in {
4484 def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4485 (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4486 GR8:$src, sub_8bit)))>;
4487 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4488 (VMOVDI2PDIZrr GR32:$src)>;
4490 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4491 (VMOV64toPQIZrr GR64:$src)>;
4493 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4494 def : Pat<(v4i32 (X86vzload32 addr:$src)),
4495 (VMOVDI2PDIZrm addr:$src)>;
4496 def : Pat<(v8i32 (X86vzload32 addr:$src)),
4497 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4498 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4499 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4500 def : Pat<(v2i64 (X86vzload64 addr:$src)),
4501 (VMOVQI2PQIZrm addr:$src)>;
4502 def : Pat<(v4i64 (X86vzload64 addr:$src)),
4503 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4505 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4506 def : Pat<(v16i32 (X86vzload32 addr:$src)),
4507 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4508 def : Pat<(v8i64 (X86vzload64 addr:$src)),
4509 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4511 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4512 (SUBREG_TO_REG (i32 0),
4513 (v2f64 (VMOVZPQILo2PQIZrr
4514 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4516 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4517 (SUBREG_TO_REG (i32 0),
4518 (v2i64 (VMOVZPQILo2PQIZrr
4519 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4522 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4523 (SUBREG_TO_REG (i32 0),
4524 (v2f64 (VMOVZPQILo2PQIZrr
4525 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4527 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4528 (SUBREG_TO_REG (i32 0),
4529 (v2i64 (VMOVZPQILo2PQIZrr
4530 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4534 //===----------------------------------------------------------------------===//
4535 // AVX-512 - Non-temporals
4536 //===----------------------------------------------------------------------===//
4538 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4539 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4540 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4541 EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4543 let Predicates = [HasVLX] in {
4544 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4546 "vmovntdqa\t{$src, $dst|$dst, $src}",
4547 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4548 EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4550 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4552 "vmovntdqa\t{$src, $dst|$dst, $src}",
4553 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4554 EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4557 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4558 X86SchedWriteMoveLS Sched,
4559 PatFrag st_frag = alignednontemporalstore> {
4560 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4561 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4562 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4563 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4564 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4567 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4568 AVX512VLVectorVTInfo VTInfo,
4569 X86SchedWriteMoveLSWidths Sched> {
4570 let Predicates = [HasAVX512] in
4571 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4573 let Predicates = [HasAVX512, HasVLX] in {
4574 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4575 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4579 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4580 SchedWriteVecMoveLSNT>, TB, PD;
4581 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4582 SchedWriteFMoveLSNT>, TB, PD, REX_W;
4583 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4584 SchedWriteFMoveLSNT>, TB;
4586 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4587 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4588 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4589 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4590 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4591 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4592 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4594 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4595 (VMOVNTDQAZrm addr:$src)>;
4596 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4597 (VMOVNTDQAZrm addr:$src)>;
4598 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4599 (VMOVNTDQAZrm addr:$src)>;
4600 def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4601 (VMOVNTDQAZrm addr:$src)>;
4602 def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4603 (VMOVNTDQAZrm addr:$src)>;
4604 def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4605 (VMOVNTDQAZrm addr:$src)>;
4608 let Predicates = [HasVLX], AddedComplexity = 400 in {
4609 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4610 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4611 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4612 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4613 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4614 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4616 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4617 (VMOVNTDQAZ256rm addr:$src)>;
4618 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4619 (VMOVNTDQAZ256rm addr:$src)>;
4620 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4621 (VMOVNTDQAZ256rm addr:$src)>;
4622 def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4623 (VMOVNTDQAZ256rm addr:$src)>;
4624 def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4625 (VMOVNTDQAZ256rm addr:$src)>;
4626 def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4627 (VMOVNTDQAZ256rm addr:$src)>;
4629 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4630 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4631 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4632 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4633 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4634 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4636 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4637 (VMOVNTDQAZ128rm addr:$src)>;
4638 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4639 (VMOVNTDQAZ128rm addr:$src)>;
4640 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4641 (VMOVNTDQAZ128rm addr:$src)>;
4642 def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4643 (VMOVNTDQAZ128rm addr:$src)>;
4644 def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4645 (VMOVNTDQAZ128rm addr:$src)>;
4646 def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4647 (VMOVNTDQAZ128rm addr:$src)>;
4650 //===----------------------------------------------------------------------===//
4651 // AVX-512 - Integer arithmetic
4653 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4654 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4655 bit IsCommutable = 0> {
4656 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4657 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4658 "$src2, $src1", "$src1, $src2",
4659 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4660 IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV,
4663 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4664 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4665 "$src2, $src1", "$src1, $src2",
4666 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4667 AVX512BIBase, EVEX, VVVV,
4668 Sched<[sched.Folded, sched.ReadAfterFold]>;
4671 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4672 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4673 bit IsCommutable = 0> :
4674 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4675 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4676 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4677 "${src2}"#_.BroadcastStr#", $src1",
4678 "$src1, ${src2}"#_.BroadcastStr,
4679 (_.VT (OpNode _.RC:$src1,
4680 (_.BroadcastLdFrag addr:$src2)))>,
4681 AVX512BIBase, EVEX, VVVV, EVEX_B,
4682 Sched<[sched.Folded, sched.ReadAfterFold]>;
4685 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4686 AVX512VLVectorVTInfo VTInfo,
4687 X86SchedWriteWidths sched, Predicate prd,
4688 bit IsCommutable = 0> {
4689 let Predicates = [prd] in
4690 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4691 IsCommutable>, EVEX_V512;
4693 let Predicates = [prd, HasVLX] in {
4694 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4695 sched.YMM, IsCommutable>, EVEX_V256;
4696 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4697 sched.XMM, IsCommutable>, EVEX_V128;
4701 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4702 AVX512VLVectorVTInfo VTInfo,
4703 X86SchedWriteWidths sched, Predicate prd,
4704 bit IsCommutable = 0> {
4705 let Predicates = [prd] in
4706 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4707 IsCommutable>, EVEX_V512;
4709 let Predicates = [prd, HasVLX] in {
4710 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4711 sched.YMM, IsCommutable>, EVEX_V256;
4712 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4713 sched.XMM, IsCommutable>, EVEX_V128;
4717 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4718 X86SchedWriteWidths sched, Predicate prd,
4719 bit IsCommutable = 0> {
4720 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4721 sched, prd, IsCommutable>,
4722 REX_W, EVEX_CD8<64, CD8VF>;
4725 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4726 X86SchedWriteWidths sched, Predicate prd,
4727 bit IsCommutable = 0> {
4728 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4729 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4732 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4733 X86SchedWriteWidths sched, Predicate prd,
4734 bit IsCommutable = 0> {
4735 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4736 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4740 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4741 X86SchedWriteWidths sched, Predicate prd,
4742 bit IsCommutable = 0> {
4743 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4744 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4748 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4749 SDNode OpNode, X86SchedWriteWidths sched,
4750 Predicate prd, bit IsCommutable = 0> {
4751 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4754 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4758 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4759 SDNode OpNode, X86SchedWriteWidths sched,
4760 Predicate prd, bit IsCommutable = 0> {
4761 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4764 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4768 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4769 bits<8> opc_d, bits<8> opc_q,
4770 string OpcodeStr, SDNode OpNode,
4771 X86SchedWriteWidths sched,
4772 bit IsCommutable = 0> {
4773 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4774 sched, HasAVX512, IsCommutable>,
4775 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4776 sched, HasBWI, IsCommutable>;
4779 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4780 X86FoldableSchedWrite sched,
4781 SDNode OpNode,X86VectorVTInfo _Src,
4782 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4783 bit IsCommutable = 0> {
4784 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4785 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4786 "$src2, $src1","$src1, $src2",
4788 (_Src.VT _Src.RC:$src1),
4789 (_Src.VT _Src.RC:$src2))),
4791 AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
4792 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4793 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4794 "$src2, $src1", "$src1, $src2",
4795 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4796 (_Src.LdFrag addr:$src2)))>,
4797 AVX512BIBase, EVEX, VVVV,
4798 Sched<[sched.Folded, sched.ReadAfterFold]>;
4800 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4801 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4803 "${src2}"#_Brdct.BroadcastStr#", $src1",
4804 "$src1, ${src2}"#_Brdct.BroadcastStr,
4805 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4806 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4807 AVX512BIBase, EVEX, VVVV, EVEX_B,
4808 Sched<[sched.Folded, sched.ReadAfterFold]>;
4811 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4812 SchedWriteVecALU, 1>;
4813 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4814 SchedWriteVecALU, 0>;
4815 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4816 SchedWriteVecALU, HasBWI, 1>;
4817 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4818 SchedWriteVecALU, HasBWI, 0>;
4819 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4820 SchedWriteVecALU, HasBWI, 1>;
4821 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4822 SchedWriteVecALU, HasBWI, 0>;
4823 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4824 SchedWritePMULLD, HasAVX512, 1>, T8;
4825 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4826 SchedWriteVecIMul, HasBWI, 1>;
4827 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4828 SchedWriteVecIMul, HasDQI, 1>, T8;
4829 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4831 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4833 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4834 SchedWriteVecIMul, HasBWI, 1>, T8;
4835 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
4836 SchedWriteVecALU, HasBWI, 1>;
4837 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4838 SchedWriteVecIMul, HasAVX512, 1>, T8;
4839 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4840 SchedWriteVecIMul, HasAVX512, 1>;
4842 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4843 X86SchedWriteWidths sched,
4844 AVX512VLVectorVTInfo _SrcVTInfo,
4845 AVX512VLVectorVTInfo _DstVTInfo,
4846 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4847 let Predicates = [prd] in
4848 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4849 _SrcVTInfo.info512, _DstVTInfo.info512,
4850 v8i64_info, IsCommutable>,
4851 EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W;
4852 let Predicates = [HasVLX, prd] in {
4853 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4854 _SrcVTInfo.info256, _DstVTInfo.info256,
4855 v4i64x_info, IsCommutable>,
4856 EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W;
4857 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4858 _SrcVTInfo.info128, _DstVTInfo.info128,
4859 v2i64x_info, IsCommutable>,
4860 EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W;
4864 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4865 avx512vl_i8_info, avx512vl_i8_info,
4866 X86multishift, HasVBMI, 0>, T8;
4868 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4869 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4870 X86FoldableSchedWrite sched> {
4871 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4872 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4874 "${src2}"#_Src.BroadcastStr#", $src1",
4875 "$src1, ${src2}"#_Src.BroadcastStr,
4876 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4877 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4878 EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4879 Sched<[sched.Folded, sched.ReadAfterFold]>;
4882 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4883 SDNode OpNode,X86VectorVTInfo _Src,
4884 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4885 bit IsCommutable = 0> {
4886 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4887 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4888 "$src2, $src1","$src1, $src2",
4890 (_Src.VT _Src.RC:$src1),
4891 (_Src.VT _Src.RC:$src2))),
4892 IsCommutable, IsCommutable>,
4893 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>;
4894 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4895 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4896 "$src2, $src1", "$src1, $src2",
4897 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4898 (_Src.LdFrag addr:$src2)))>,
4899 EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>,
4900 Sched<[sched.Folded, sched.ReadAfterFold]>;
4903 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4905 let Predicates = [HasBWI] in
4906 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4907 v32i16_info, SchedWriteShuffle.ZMM>,
4908 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4909 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4910 let Predicates = [HasBWI, HasVLX] in {
4911 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4912 v16i16x_info, SchedWriteShuffle.YMM>,
4913 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4914 v16i16x_info, SchedWriteShuffle.YMM>,
4916 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4917 v8i16x_info, SchedWriteShuffle.XMM>,
4918 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4919 v8i16x_info, SchedWriteShuffle.XMM>,
4923 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4925 let Predicates = [HasBWI] in
4926 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4927 SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
4928 let Predicates = [HasBWI, HasVLX] in {
4929 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4930 v32i8x_info, SchedWriteShuffle.YMM>,
4932 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4933 v16i8x_info, SchedWriteShuffle.XMM>,
4938 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4939 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4940 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4941 let Predicates = [HasBWI] in
4942 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4943 _Dst.info512, SchedWriteVecIMul.ZMM,
4944 IsCommutable>, EVEX_V512;
4945 let Predicates = [HasBWI, HasVLX] in {
4946 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4947 _Dst.info256, SchedWriteVecIMul.YMM,
4948 IsCommutable>, EVEX_V256;
4949 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4950 _Dst.info128, SchedWriteVecIMul.XMM,
4951 IsCommutable>, EVEX_V128;
4955 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4956 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4957 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4958 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4960 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4961 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG;
4962 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4963 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
4965 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4966 SchedWriteVecALU, HasBWI, 1>, T8;
4967 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4968 SchedWriteVecALU, HasBWI, 1>;
4969 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4970 SchedWriteVecALU, HasAVX512, 1>, T8;
4971 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4972 SchedWriteVecALU, HasAVX512, 1>, T8;
4974 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4975 SchedWriteVecALU, HasBWI, 1>;
4976 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4977 SchedWriteVecALU, HasBWI, 1>, T8;
4978 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4979 SchedWriteVecALU, HasAVX512, 1>, T8;
4980 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4981 SchedWriteVecALU, HasAVX512, 1>, T8;
4983 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4984 SchedWriteVecALU, HasBWI, 1>, T8;
4985 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4986 SchedWriteVecALU, HasBWI, 1>;
4987 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4988 SchedWriteVecALU, HasAVX512, 1>, T8;
4989 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4990 SchedWriteVecALU, HasAVX512, 1>, T8;
4992 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4993 SchedWriteVecALU, HasBWI, 1>;
4994 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4995 SchedWriteVecALU, HasBWI, 1>, T8;
4996 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4997 SchedWriteVecALU, HasAVX512, 1>, T8;
4998 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4999 SchedWriteVecALU, HasAVX512, 1>, T8;
5001 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5002 let Predicates = [HasDQI, NoVLX] in {
5003 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5006 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5007 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5009 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5012 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5016 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5019 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5020 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5022 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5025 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5030 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5031 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5033 (!cast<Instruction>(Instr#"rr")
5034 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5035 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5037 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5039 (!cast<Instruction>(Instr#"rmb")
5040 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5044 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5046 (!cast<Instruction>(Instr#"rr")
5047 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5048 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5050 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5052 (!cast<Instruction>(Instr#"rmb")
5053 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5058 let Predicates = [HasAVX512, NoVLX] in {
5059 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5060 defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5061 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5062 defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5065 //===----------------------------------------------------------------------===//
5066 // AVX-512 Logical Instructions
5067 //===----------------------------------------------------------------------===//
5069 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5070 SchedWriteVecLogic, HasAVX512, 1>;
5071 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5072 SchedWriteVecLogic, HasAVX512, 1>;
5073 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5074 SchedWriteVecLogic, HasAVX512, 1>;
5075 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5076 SchedWriteVecLogic, HasAVX512>;
5078 let Predicates = [HasVLX] in {
5079 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5080 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5081 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5082 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5084 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5085 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5086 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5087 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5089 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5090 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5091 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5092 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5094 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5095 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5096 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5097 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5099 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5100 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5101 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5102 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5104 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5105 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5106 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5107 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5109 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5110 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5111 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5112 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5114 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5115 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5116 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5117 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5119 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5120 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5121 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5122 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5124 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5125 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5126 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5127 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5129 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5130 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5131 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5132 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5134 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5135 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5136 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5137 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5139 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5140 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5141 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5142 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5144 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5145 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5146 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5147 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5149 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5150 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5151 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5152 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5154 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5155 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5156 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5157 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5160 let Predicates = [HasAVX512] in {
5161 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5162 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5163 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5164 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5166 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5167 (VPORQZrr VR512:$src1, VR512:$src2)>;
5168 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5169 (VPORQZrr VR512:$src1, VR512:$src2)>;
5171 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5172 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5173 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5174 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5176 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5177 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5178 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5179 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5181 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5182 (VPANDQZrm VR512:$src1, addr:$src2)>;
5183 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5184 (VPANDQZrm VR512:$src1, addr:$src2)>;
5186 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5187 (VPORQZrm VR512:$src1, addr:$src2)>;
5188 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5189 (VPORQZrm VR512:$src1, addr:$src2)>;
5191 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5192 (VPXORQZrm VR512:$src1, addr:$src2)>;
5193 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5194 (VPXORQZrm VR512:$src1, addr:$src2)>;
5196 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5197 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5198 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5199 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5202 // Patterns to catch vselect with different type than logic op.
5203 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5205 X86VectorVTInfo IntInfo> {
5206 // Masked register-register logical operations.
5207 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5208 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5210 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5211 _.RC:$src1, _.RC:$src2)>;
5213 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5214 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5216 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5219 // Masked register-memory logical operations.
5220 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5221 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5222 (load addr:$src2)))),
5224 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5225 _.RC:$src1, addr:$src2)>;
5226 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5227 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5228 (load addr:$src2)))),
5230 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5234 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5236 X86VectorVTInfo IntInfo> {
5237 // Register-broadcast logical operations.
5238 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5240 (IntInfo.VT (OpNode _.RC:$src1,
5241 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5243 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5244 _.RC:$src1, addr:$src2)>;
5245 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5247 (IntInfo.VT (OpNode _.RC:$src1,
5248 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5250 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5251 _.RC:$src1, addr:$src2)>;
5254 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5255 AVX512VLVectorVTInfo SelectInfo,
5256 AVX512VLVectorVTInfo IntInfo> {
5257 let Predicates = [HasVLX] in {
5258 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5260 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5263 let Predicates = [HasAVX512] in {
5264 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5269 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5270 AVX512VLVectorVTInfo SelectInfo,
5271 AVX512VLVectorVTInfo IntInfo> {
5272 let Predicates = [HasVLX] in {
5273 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5274 SelectInfo.info128, IntInfo.info128>;
5275 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5276 SelectInfo.info256, IntInfo.info256>;
5278 let Predicates = [HasAVX512] in {
5279 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5280 SelectInfo.info512, IntInfo.info512>;
5284 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5285 // i64 vselect with i32/i16/i8 logic op
5286 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5288 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5290 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5293 // i32 vselect with i64/i16/i8 logic op
5294 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5296 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5298 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5301 // f32 vselect with i64/i32/i16/i8 logic op
5302 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5304 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5306 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5308 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5311 // f64 vselect with i64/i32/i16/i8 logic op
5312 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5314 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5316 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5318 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5321 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5324 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5329 defm : avx512_logical_lowering_types<"VPAND", and>;
5330 defm : avx512_logical_lowering_types<"VPOR", or>;
5331 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5332 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5334 //===----------------------------------------------------------------------===//
5335 // AVX-512 FP arithmetic
5336 //===----------------------------------------------------------------------===//
5338 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5339 SDPatternOperator OpNode, SDNode VecNode,
5340 X86FoldableSchedWrite sched, bit IsCommutable> {
5341 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5342 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5343 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5344 "$src2, $src1", "$src1, $src2",
5345 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5348 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5349 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5350 "$src2, $src1", "$src1, $src2",
5351 (_.VT (VecNode _.RC:$src1,
5352 (_.ScalarIntMemFrags addr:$src2)))>,
5353 Sched<[sched.Folded, sched.ReadAfterFold]>;
5354 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5355 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5356 (ins _.FRC:$src1, _.FRC:$src2),
5357 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5358 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5360 let isCommutable = IsCommutable;
5362 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5363 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5364 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5365 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5366 (_.ScalarLdFrag addr:$src2)))]>,
5367 Sched<[sched.Folded, sched.ReadAfterFold]>;
5372 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5373 SDNode VecNode, X86FoldableSchedWrite sched> {
5374 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5375 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5376 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5377 "$rc, $src2, $src1", "$src1, $src2, $rc",
5378 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5380 EVEX_B, EVEX_RC, Sched<[sched]>;
5382 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5383 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5384 X86FoldableSchedWrite sched, bit IsCommutable> {
5385 let ExeDomain = _.ExeDomain in {
5386 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5387 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5388 "$src2, $src1", "$src1, $src2",
5389 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5390 Sched<[sched]>, SIMD_EXC;
5392 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5393 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5394 "$src2, $src1", "$src1, $src2",
5395 (_.VT (VecNode _.RC:$src1,
5396 (_.ScalarIntMemFrags addr:$src2)))>,
5397 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5399 let isCodeGenOnly = 1, Predicates = [HasAVX512],
5400 Uses = [MXCSR], mayRaiseFPException = 1 in {
5401 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5402 (ins _.FRC:$src1, _.FRC:$src2),
5403 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5404 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5406 let isCommutable = IsCommutable;
5408 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5409 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5410 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5411 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5412 (_.ScalarLdFrag addr:$src2)))]>,
5413 Sched<[sched.Folded, sched.ReadAfterFold]>;
5416 let Uses = [MXCSR] in
5417 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5418 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5419 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5420 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5421 EVEX_B, Sched<[sched]>;
5425 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5426 SDNode VecNode, SDNode RndNode,
5427 X86SchedWriteSizes sched, bit IsCommutable> {
5428 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5429 sched.PS.Scl, IsCommutable>,
5430 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5432 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5433 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5434 sched.PD.Scl, IsCommutable>,
5435 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5437 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5438 let Predicates = [HasFP16] in
5439 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5440 VecNode, sched.PH.Scl, IsCommutable>,
5441 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5443 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5446 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5447 SDNode VecNode, SDNode SaeNode,
5448 X86SchedWriteSizes sched, bit IsCommutable> {
5449 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5450 VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5451 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5452 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5453 VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5454 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5455 let Predicates = [HasFP16] in {
5456 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5457 VecNode, SaeNode, sched.PH.Scl, IsCommutable>,
5458 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5461 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5462 SchedWriteFAddSizes, 1>;
5463 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5464 SchedWriteFMulSizes, 1>;
5465 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5466 SchedWriteFAddSizes, 0>;
5467 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5468 SchedWriteFDivSizes, 0>;
5469 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5470 SchedWriteFCmpSizes, 0>;
5471 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5472 SchedWriteFCmpSizes, 0>;
5474 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5475 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5476 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5477 X86VectorVTInfo _, SDNode OpNode,
5478 X86FoldableSchedWrite sched> {
5479 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5480 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5481 (ins _.FRC:$src1, _.FRC:$src2),
5482 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5483 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5485 let isCommutable = 1;
5487 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5488 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5489 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5490 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5491 (_.ScalarLdFrag addr:$src2)))]>,
5492 Sched<[sched.Folded, sched.ReadAfterFold]>;
5495 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5496 SchedWriteFCmp.Scl>, TB, XS,
5497 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5499 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5500 SchedWriteFCmp.Scl>, TB, XD,
5501 REX_W, EVEX, VVVV, VEX_LIG,
5502 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5504 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5505 SchedWriteFCmp.Scl>, TB, XS,
5506 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5508 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5509 SchedWriteFCmp.Scl>, TB, XD,
5510 REX_W, EVEX, VVVV, VEX_LIG,
5511 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5513 defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5514 SchedWriteFCmp.Scl>, T_MAP5, XS,
5515 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5517 defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5518 SchedWriteFCmp.Scl>, T_MAP5, XS,
5519 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5521 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5522 SDPatternOperator MaskOpNode,
5523 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5525 bit IsKCommutable = IsCommutable,
5526 string suffix = _.Suffix,
5527 string ClobberConstraint = "",
5528 bit MayRaiseFPException = 1> {
5529 let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5530 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5531 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5532 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5533 "$src2, $src1", "$src1, $src2",
5534 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5535 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5536 IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>;
5537 let mayLoad = 1 in {
5538 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5539 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5540 "$src2, $src1", "$src1, $src2",
5541 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5542 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5543 ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5544 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5545 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5546 "${src2}"#_.BroadcastStr#", $src1",
5547 "$src1, ${src2}"#_.BroadcastStr,
5548 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5549 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5550 ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5555 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5556 SDPatternOperator OpNodeRnd,
5557 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5558 string suffix = _.Suffix,
5559 string ClobberConstraint = ""> {
5560 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5561 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5562 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5563 "$rc, $src2, $src1", "$src1, $src2, $rc",
5564 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5565 0, 0, 0, vselect_mask, ClobberConstraint>,
5566 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
5569 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5570 SDPatternOperator OpNodeSAE,
5571 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5572 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5573 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5574 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5575 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5576 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5577 EVEX, VVVV, EVEX_B, Sched<[sched]>;
5580 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5581 SDPatternOperator MaskOpNode,
5582 Predicate prd, X86SchedWriteSizes sched,
5583 bit IsCommutable = 0,
5584 bit IsPD128Commutable = IsCommutable> {
5585 let Predicates = [prd] in {
5586 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5587 sched.PS.ZMM, IsCommutable>, EVEX_V512, TB,
5588 EVEX_CD8<32, CD8VF>;
5589 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5590 sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W,
5591 EVEX_CD8<64, CD8VF>;
5594 // Define only if AVX512VL feature is present.
5595 let Predicates = [prd, HasVLX] in {
5596 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5597 sched.PS.XMM, IsCommutable>, EVEX_V128, TB,
5598 EVEX_CD8<32, CD8VF>;
5599 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5600 sched.PS.YMM, IsCommutable>, EVEX_V256, TB,
5601 EVEX_CD8<32, CD8VF>;
5602 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5603 sched.PD.XMM, IsPD128Commutable,
5604 IsCommutable>, EVEX_V128, TB, PD, REX_W,
5605 EVEX_CD8<64, CD8VF>;
5606 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5607 sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W,
5608 EVEX_CD8<64, CD8VF>;
5612 multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5613 SDPatternOperator MaskOpNode,
5614 X86SchedWriteSizes sched, bit IsCommutable = 0> {
5615 let Predicates = [HasFP16] in {
5616 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5617 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5,
5618 EVEX_CD8<16, CD8VF>;
5620 let Predicates = [HasVLX, HasFP16] in {
5621 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5622 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5,
5623 EVEX_CD8<16, CD8VF>;
5624 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5625 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5,
5626 EVEX_CD8<16, CD8VF>;
5630 let Uses = [MXCSR] in
5631 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5632 X86SchedWriteSizes sched> {
5633 let Predicates = [HasFP16] in {
5634 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5636 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5638 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5640 EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5641 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5643 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5646 let Uses = [MXCSR] in
5647 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5648 X86SchedWriteSizes sched> {
5649 let Predicates = [HasFP16] in {
5650 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5652 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5654 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5656 EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5657 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5659 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5662 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5663 SchedWriteFAddSizes, 1>,
5664 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5665 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5666 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5667 SchedWriteFMulSizes, 1>,
5668 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5669 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5670 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5671 SchedWriteFAddSizes>,
5672 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5673 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5674 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5675 SchedWriteFDivSizes>,
5676 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5677 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5678 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5679 SchedWriteFCmpSizes, 0>,
5680 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5681 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5682 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5683 SchedWriteFCmpSizes, 0>,
5684 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5685 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5686 let isCodeGenOnly = 1 in {
5687 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5688 SchedWriteFCmpSizes, 1>,
5689 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5690 SchedWriteFCmpSizes, 1>;
5691 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5692 SchedWriteFCmpSizes, 1>,
5693 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5694 SchedWriteFCmpSizes, 1>;
5696 let Uses = []<Register>, mayRaiseFPException = 0 in {
5697 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5698 SchedWriteFLogicSizes, 1>;
5699 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5700 SchedWriteFLogicSizes, 0>;
5701 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5702 SchedWriteFLogicSizes, 1>;
5703 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5704 SchedWriteFLogicSizes, 1>;
5707 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5708 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5709 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5710 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5711 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5712 "$src2, $src1", "$src1, $src2",
5713 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5714 EVEX, VVVV, Sched<[sched]>;
5715 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5716 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5717 "$src2, $src1", "$src1, $src2",
5718 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5719 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5720 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5721 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5722 "${src2}"#_.BroadcastStr#", $src1",
5723 "$src1, ${src2}"#_.BroadcastStr,
5724 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5725 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5729 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5730 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5731 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5732 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5733 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5734 "$src2, $src1", "$src1, $src2",
5735 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5737 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5738 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5739 "$src2, $src1", "$src1, $src2",
5740 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5741 Sched<[sched.Folded, sched.ReadAfterFold]>;
5745 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5746 X86SchedWriteWidths sched> {
5747 let Predicates = [HasFP16] in {
5748 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5749 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5750 EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
5751 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5752 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5753 EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>;
5755 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5756 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5757 EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD;
5758 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5759 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5760 EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5761 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5762 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5763 X86scalefsRnd, sched.Scl>,
5764 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD;
5765 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5766 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5767 X86scalefsRnd, sched.Scl>,
5768 EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD;
5770 // Define only if AVX512VL feature is present.
5771 let Predicates = [HasVLX] in {
5772 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5773 EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD;
5774 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5775 EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD;
5776 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5777 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5778 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5779 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5782 let Predicates = [HasFP16, HasVLX] in {
5783 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
5784 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5785 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
5786 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5789 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>;
5791 //===----------------------------------------------------------------------===//
5792 // AVX-512 VPTESTM instructions
5793 //===----------------------------------------------------------------------===//
5795 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5796 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5797 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5798 // There are just too many permutations due to commutability and bitcasts.
5799 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5800 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5801 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5802 "$src2, $src1", "$src1, $src2",
5803 (null_frag), (null_frag), 1>,
5804 EVEX, VVVV, Sched<[sched]>;
5806 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5807 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5808 "$src2, $src1", "$src1, $src2",
5809 (null_frag), (null_frag)>,
5810 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5811 Sched<[sched.Folded, sched.ReadAfterFold]>;
5815 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5816 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5817 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5818 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5819 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5820 "${src2}"#_.BroadcastStr#", $src1",
5821 "$src1, ${src2}"#_.BroadcastStr,
5822 (null_frag), (null_frag)>,
5823 EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5824 Sched<[sched.Folded, sched.ReadAfterFold]>;
5827 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5828 X86SchedWriteWidths sched,
5829 AVX512VLVectorVTInfo _> {
5830 let Predicates = [HasAVX512] in
5831 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
5832 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5834 let Predicates = [HasAVX512, HasVLX] in {
5835 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
5836 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5837 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
5838 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5842 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5843 X86SchedWriteWidths sched> {
5844 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5846 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5847 avx512vl_i64_info>, REX_W;
5850 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5851 X86SchedWriteWidths sched> {
5852 let Predicates = [HasBWI] in {
5853 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5854 v32i16_info>, EVEX_V512, REX_W;
5855 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5856 v64i8_info>, EVEX_V512;
5859 let Predicates = [HasVLX, HasBWI] in {
5860 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5861 v16i16x_info>, EVEX_V256, REX_W;
5862 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5863 v8i16x_info>, EVEX_V128, REX_W;
5864 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5865 v32i8x_info>, EVEX_V256;
5866 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5867 v16i8x_info>, EVEX_V128;
5871 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5872 X86SchedWriteWidths sched> :
5873 avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5874 avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5876 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5877 SchedWriteVecLogic>, T8, PD;
5878 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5879 SchedWriteVecLogic>, T8, XS;
5881 //===----------------------------------------------------------------------===//
5882 // AVX-512 Shift instructions
5883 //===----------------------------------------------------------------------===//
5885 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5886 string OpcodeStr, SDNode OpNode,
5887 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5888 let ExeDomain = _.ExeDomain in {
5889 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5890 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5891 "$src2, $src1", "$src1, $src2",
5892 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5894 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5895 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5896 "$src2, $src1", "$src1, $src2",
5897 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5899 Sched<[sched.Folded]>;
5903 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5904 string OpcodeStr, SDNode OpNode,
5905 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5906 let ExeDomain = _.ExeDomain in
5907 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5908 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5909 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5910 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5911 EVEX_B, Sched<[sched.Folded]>;
5914 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5915 X86FoldableSchedWrite sched, ValueType SrcVT,
5916 X86VectorVTInfo _> {
5917 // src2 is always 128-bit
5918 let ExeDomain = _.ExeDomain in {
5919 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5920 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5921 "$src2, $src1", "$src1, $src2",
5922 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5923 AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
5924 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5925 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5926 "$src2, $src1", "$src1, $src2",
5927 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5929 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5933 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5934 X86SchedWriteWidths sched, ValueType SrcVT,
5935 AVX512VLVectorVTInfo VTInfo,
5937 let Predicates = [prd] in
5938 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5939 VTInfo.info512>, EVEX_V512,
5940 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5941 let Predicates = [prd, HasVLX] in {
5942 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5943 VTInfo.info256>, EVEX_V256,
5944 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5945 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5946 VTInfo.info128>, EVEX_V128,
5947 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5951 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5952 string OpcodeStr, SDNode OpNode,
5953 X86SchedWriteWidths sched> {
5954 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5955 avx512vl_i32_info, HasAVX512>;
5956 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5957 avx512vl_i64_info, HasAVX512>, REX_W;
5958 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5959 avx512vl_i16_info, HasBWI>;
5962 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5963 string OpcodeStr, SDNode OpNode,
5964 X86SchedWriteWidths sched,
5965 AVX512VLVectorVTInfo VTInfo> {
5966 let Predicates = [HasAVX512] in
5967 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5968 sched.ZMM, VTInfo.info512>,
5969 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5970 VTInfo.info512>, EVEX_V512;
5971 let Predicates = [HasAVX512, HasVLX] in {
5972 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5973 sched.YMM, VTInfo.info256>,
5974 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5975 VTInfo.info256>, EVEX_V256;
5976 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5977 sched.XMM, VTInfo.info128>,
5978 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5979 VTInfo.info128>, EVEX_V128;
5983 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5984 string OpcodeStr, SDNode OpNode,
5985 X86SchedWriteWidths sched> {
5986 let Predicates = [HasBWI] in
5987 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5988 sched.ZMM, v32i16_info>, EVEX_V512, WIG;
5989 let Predicates = [HasVLX, HasBWI] in {
5990 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5991 sched.YMM, v16i16x_info>, EVEX_V256, WIG;
5992 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5993 sched.XMM, v8i16x_info>, EVEX_V128, WIG;
5997 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5998 Format ImmFormR, Format ImmFormM,
5999 string OpcodeStr, SDNode OpNode,
6000 X86SchedWriteWidths sched> {
6001 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6002 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6003 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6004 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
6007 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6008 SchedWriteVecShiftImm>,
6009 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6010 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6012 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6013 SchedWriteVecShiftImm>,
6014 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6015 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6017 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6018 SchedWriteVecShiftImm>,
6019 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6020 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6022 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6023 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6024 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6025 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6027 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6028 SchedWriteVecShift>;
6029 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6030 SchedWriteVecShift>;
6031 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6032 SchedWriteVecShift>;
6034 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6035 let Predicates = [HasAVX512, NoVLX] in {
6036 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6037 (EXTRACT_SUBREG (v8i64
6039 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6040 VR128X:$src2)), sub_ymm)>;
6042 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6043 (EXTRACT_SUBREG (v8i64
6045 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6046 VR128X:$src2)), sub_xmm)>;
6048 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6049 (EXTRACT_SUBREG (v8i64
6051 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6052 timm:$src2)), sub_ymm)>;
6054 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6055 (EXTRACT_SUBREG (v8i64
6057 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6058 timm:$src2)), sub_xmm)>;
6061 //===-------------------------------------------------------------------===//
6062 // Variable Bit Shifts
6063 //===-------------------------------------------------------------------===//
6065 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6066 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6067 let ExeDomain = _.ExeDomain in {
6068 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6069 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6070 "$src2, $src1", "$src1, $src2",
6071 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6072 AVX5128IBase, EVEX, VVVV, Sched<[sched]>;
6073 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6074 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6075 "$src2, $src1", "$src1, $src2",
6076 (_.VT (OpNode _.RC:$src1,
6077 (_.VT (_.LdFrag addr:$src2))))>,
6078 AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6079 Sched<[sched.Folded, sched.ReadAfterFold]>;
6083 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6084 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6085 let ExeDomain = _.ExeDomain in
6086 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6087 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6088 "${src2}"#_.BroadcastStr#", $src1",
6089 "$src1, ${src2}"#_.BroadcastStr,
6090 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6091 AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6092 Sched<[sched.Folded, sched.ReadAfterFold]>;
6095 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6096 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6097 let Predicates = [HasAVX512] in
6098 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6099 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6101 let Predicates = [HasAVX512, HasVLX] in {
6102 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6103 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6104 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6105 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6109 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6110 SDNode OpNode, X86SchedWriteWidths sched> {
6111 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6113 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6114 avx512vl_i64_info>, REX_W;
6117 // Use 512bit version to implement 128/256 bit in case NoVLX.
6118 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6119 SDNode OpNode, list<Predicate> p> {
6120 let Predicates = p in {
6121 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6122 (_.info256.VT _.info256.RC:$src2))),
6124 (!cast<Instruction>(OpcodeStr#"Zrr")
6125 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6126 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6129 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6130 (_.info128.VT _.info128.RC:$src2))),
6132 (!cast<Instruction>(OpcodeStr#"Zrr")
6133 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6134 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6138 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6139 SDNode OpNode, X86SchedWriteWidths sched> {
6140 let Predicates = [HasBWI] in
6141 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6143 let Predicates = [HasVLX, HasBWI] in {
6145 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6147 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6152 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6153 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6155 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6156 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6158 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6159 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6161 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6162 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6164 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6165 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6166 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6167 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6170 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6171 let Predicates = [HasAVX512, NoVLX] in {
6172 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6173 (EXTRACT_SUBREG (v8i64
6175 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6176 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6178 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6179 (EXTRACT_SUBREG (v8i64
6181 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6182 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6185 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6186 (EXTRACT_SUBREG (v16i32
6188 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6189 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6191 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6192 (EXTRACT_SUBREG (v16i32
6194 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6195 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6198 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6199 (EXTRACT_SUBREG (v8i64
6201 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6202 timm:$src2)), sub_xmm)>;
6203 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6204 (EXTRACT_SUBREG (v8i64
6206 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6207 timm:$src2)), sub_ymm)>;
6209 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6210 (EXTRACT_SUBREG (v16i32
6212 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6213 timm:$src2)), sub_xmm)>;
6214 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6215 (EXTRACT_SUBREG (v16i32
6217 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6218 timm:$src2)), sub_ymm)>;
6221 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6222 let Predicates = [HasAVX512, NoVLX] in {
6223 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6224 (EXTRACT_SUBREG (v8i64
6226 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6227 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6229 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6230 (EXTRACT_SUBREG (v8i64
6232 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6233 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6236 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6237 (EXTRACT_SUBREG (v16i32
6239 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6240 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6242 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6243 (EXTRACT_SUBREG (v16i32
6245 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6246 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6249 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6250 (EXTRACT_SUBREG (v8i64
6252 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6253 timm:$src2)), sub_xmm)>;
6254 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6255 (EXTRACT_SUBREG (v8i64
6257 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6258 timm:$src2)), sub_ymm)>;
6260 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6261 (EXTRACT_SUBREG (v16i32
6263 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6264 timm:$src2)), sub_xmm)>;
6265 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6266 (EXTRACT_SUBREG (v16i32
6268 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6269 timm:$src2)), sub_ymm)>;
6272 //===-------------------------------------------------------------------===//
6273 // 1-src variable permutation VPERMW/D/Q
6274 //===-------------------------------------------------------------------===//
6276 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6277 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6278 let Predicates = [HasAVX512] in
6279 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6280 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6282 let Predicates = [HasAVX512, HasVLX] in
6283 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6284 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6287 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6288 string OpcodeStr, SDNode OpNode,
6289 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6290 let Predicates = [HasAVX512] in
6291 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6292 sched, VTInfo.info512>,
6293 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6294 sched, VTInfo.info512>, EVEX_V512;
6295 let Predicates = [HasAVX512, HasVLX] in
6296 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6297 sched, VTInfo.info256>,
6298 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6299 sched, VTInfo.info256>, EVEX_V256;
6302 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6303 Predicate prd, SDNode OpNode,
6304 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6305 let Predicates = [prd] in
6306 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6308 let Predicates = [HasVLX, prd] in {
6309 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6311 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6316 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6317 WriteVarShuffle256, avx512vl_i16_info>, REX_W;
6318 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6319 WriteVarShuffle256, avx512vl_i8_info>;
6321 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6322 WriteVarShuffle256, avx512vl_i32_info>;
6323 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6324 WriteVarShuffle256, avx512vl_i64_info>, REX_W;
6325 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6326 WriteFVarShuffle256, avx512vl_f32_info>;
6327 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6328 WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
6330 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6331 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6332 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6333 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6334 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6335 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6337 //===----------------------------------------------------------------------===//
6338 // AVX-512 - VPERMIL
6339 //===----------------------------------------------------------------------===//
6341 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6342 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6343 X86VectorVTInfo Ctrl> {
6344 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6345 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6346 "$src2, $src1", "$src1, $src2",
6347 (_.VT (OpNode _.RC:$src1,
6348 (Ctrl.VT Ctrl.RC:$src2)))>,
6349 T8, PD, EVEX, VVVV, Sched<[sched]>;
6350 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6351 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6352 "$src2, $src1", "$src1, $src2",
6355 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6356 T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6357 Sched<[sched.Folded, sched.ReadAfterFold]>;
6358 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6359 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6360 "${src2}"#_.BroadcastStr#", $src1",
6361 "$src1, ${src2}"#_.BroadcastStr,
6364 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6365 T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6366 Sched<[sched.Folded, sched.ReadAfterFold]>;
6369 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6370 X86SchedWriteWidths sched,
6371 AVX512VLVectorVTInfo _,
6372 AVX512VLVectorVTInfo Ctrl> {
6373 let Predicates = [HasAVX512] in {
6374 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6375 _.info512, Ctrl.info512>, EVEX_V512;
6377 let Predicates = [HasAVX512, HasVLX] in {
6378 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6379 _.info128, Ctrl.info128>, EVEX_V128;
6380 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6381 _.info256, Ctrl.info256>, EVEX_V256;
6385 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6386 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6387 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6389 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6390 X86VPermilpi, SchedWriteFShuffle, _>,
6391 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6394 let ExeDomain = SSEPackedSingle in
6395 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6397 let ExeDomain = SSEPackedDouble in
6398 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6399 avx512vl_i64_info>, REX_W;
6401 //===----------------------------------------------------------------------===//
6402 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6403 //===----------------------------------------------------------------------===//
6405 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6406 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6407 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6408 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6409 X86PShufhw, SchedWriteShuffle>,
6410 EVEX, AVX512XSIi8Base;
6411 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6412 X86PShuflw, SchedWriteShuffle>,
6413 EVEX, AVX512XDIi8Base;
6415 //===----------------------------------------------------------------------===//
6416 // AVX-512 - VPSHUFB
6417 //===----------------------------------------------------------------------===//
6419 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6420 X86SchedWriteWidths sched> {
6421 let Predicates = [HasBWI] in
6422 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6425 let Predicates = [HasVLX, HasBWI] in {
6426 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6428 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6433 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6434 SchedWriteVarShuffle>, WIG;
6436 //===----------------------------------------------------------------------===//
6437 // Move Low to High and High to Low packed FP Instructions
6438 //===----------------------------------------------------------------------===//
6440 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6441 (ins VR128X:$src1, VR128X:$src2),
6442 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6443 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6444 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6445 let isCommutable = 1 in
6446 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6447 (ins VR128X:$src1, VR128X:$src2),
6448 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6449 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6450 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6452 //===----------------------------------------------------------------------===//
6453 // VMOVHPS/PD VMOVLPS Instructions
6454 // All patterns was taken from SSS implementation.
6455 //===----------------------------------------------------------------------===//
6457 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6458 SDPatternOperator OpNode,
6459 X86VectorVTInfo _> {
6460 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6461 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6462 (ins _.RC:$src1, f64mem:$src2),
6463 !strconcat(OpcodeStr,
6464 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6468 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6469 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV;
6472 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6473 // SSE1. And MOVLPS pattern is even more complex.
6474 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6475 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6476 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6477 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6478 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6479 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6480 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6481 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6483 let Predicates = [HasAVX512] in {
6485 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6486 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6489 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6490 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6493 let SchedRW = [WriteFStore] in {
6494 let mayStore = 1, hasSideEffects = 0 in
6495 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6496 (ins f64mem:$dst, VR128X:$src),
6497 "vmovhps\t{$src, $dst|$dst, $src}",
6498 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6499 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6500 (ins f64mem:$dst, VR128X:$src),
6501 "vmovhpd\t{$src, $dst|$dst, $src}",
6502 [(store (f64 (extractelt
6503 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6504 (iPTR 0))), addr:$dst)]>,
6505 EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6506 let mayStore = 1, hasSideEffects = 0 in
6507 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6508 (ins f64mem:$dst, VR128X:$src),
6509 "vmovlps\t{$src, $dst|$dst, $src}",
6510 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6511 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6512 (ins f64mem:$dst, VR128X:$src),
6513 "vmovlpd\t{$src, $dst|$dst, $src}",
6514 [(store (f64 (extractelt (v2f64 VR128X:$src),
6515 (iPTR 0))), addr:$dst)]>,
6516 EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6519 let Predicates = [HasAVX512] in {
6521 def : Pat<(store (f64 (extractelt
6522 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6523 (iPTR 0))), addr:$dst),
6524 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6526 //===----------------------------------------------------------------------===//
6527 // FMA - Fused Multiply Operations
6530 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6531 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6532 X86VectorVTInfo _> {
6533 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6534 Uses = [MXCSR], mayRaiseFPException = 1 in {
6535 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6536 (ins _.RC:$src2, _.RC:$src3),
6537 OpcodeStr, "$src3, $src2", "$src2, $src3",
6538 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6539 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6540 EVEX, VVVV, Sched<[sched]>;
6542 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6543 (ins _.RC:$src2, _.MemOp:$src3),
6544 OpcodeStr, "$src3, $src2", "$src2, $src3",
6545 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6546 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6547 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6548 sched.ReadAfterFold]>;
6550 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6551 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6552 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6553 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6555 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6556 (MaskOpNode _.RC:$src2,
6557 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6558 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6559 sched.ReadAfterFold]>;
6563 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6564 X86FoldableSchedWrite sched,
6565 X86VectorVTInfo _> {
6566 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6568 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6569 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6570 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6571 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6572 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6573 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6576 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6577 SDNode MaskOpNode, SDNode OpNodeRnd,
6578 X86SchedWriteWidths sched,
6579 AVX512VLVectorVTInfo _,
6580 Predicate prd = HasAVX512> {
6581 let Predicates = [prd] in {
6582 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6583 sched.ZMM, _.info512>,
6584 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6586 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6588 let Predicates = [HasVLX, prd] in {
6589 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6590 sched.YMM, _.info256>,
6591 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6592 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6593 sched.XMM, _.info128>,
6594 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6598 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6599 SDNode MaskOpNode, SDNode OpNodeRnd> {
6600 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6601 OpNodeRnd, SchedWriteFMA,
6602 avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6603 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6604 OpNodeRnd, SchedWriteFMA,
6605 avx512vl_f32_info>, T8, PD;
6606 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6607 OpNodeRnd, SchedWriteFMA,
6608 avx512vl_f64_info>, T8, PD, REX_W;
6611 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6613 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6614 X86Fmsub, X86FmsubRnd>;
6615 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6616 X86Fmaddsub, X86FmaddsubRnd>;
6617 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6618 X86Fmsubadd, X86FmsubaddRnd>;
6619 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6620 X86Fnmadd, X86FnmaddRnd>;
6621 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6622 X86Fnmsub, X86FnmsubRnd>;
6625 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6626 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6627 X86VectorVTInfo _> {
6628 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6629 Uses = [MXCSR], mayRaiseFPException = 1 in {
6630 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6631 (ins _.RC:$src2, _.RC:$src3),
6632 OpcodeStr, "$src3, $src2", "$src2, $src3",
6634 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6635 EVEX, VVVV, Sched<[sched]>;
6637 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6638 (ins _.RC:$src2, _.MemOp:$src3),
6639 OpcodeStr, "$src3, $src2", "$src2, $src3",
6640 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6641 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6642 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6643 sched.ReadAfterFold]>;
6645 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6646 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6647 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6648 "$src2, ${src3}"#_.BroadcastStr,
6649 (_.VT (OpNode _.RC:$src2,
6650 (_.VT (_.BroadcastLdFrag addr:$src3)),
6652 (_.VT (MaskOpNode _.RC:$src2,
6653 (_.VT (_.BroadcastLdFrag addr:$src3)),
6654 _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B,
6655 Sched<[sched.Folded, sched.ReadAfterFold,
6656 sched.ReadAfterFold]>;
6660 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6661 X86FoldableSchedWrite sched,
6662 X86VectorVTInfo _> {
6663 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6665 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6666 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6667 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6669 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6670 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6673 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6674 SDNode MaskOpNode, SDNode OpNodeRnd,
6675 X86SchedWriteWidths sched,
6676 AVX512VLVectorVTInfo _,
6677 Predicate prd = HasAVX512> {
6678 let Predicates = [prd] in {
6679 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6680 sched.ZMM, _.info512>,
6681 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6683 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6685 let Predicates = [HasVLX, prd] in {
6686 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6687 sched.YMM, _.info256>,
6688 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6689 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6690 sched.XMM, _.info128>,
6691 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6695 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6696 SDNode MaskOpNode, SDNode OpNodeRnd > {
6697 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6698 OpNodeRnd, SchedWriteFMA,
6699 avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6700 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6701 OpNodeRnd, SchedWriteFMA,
6702 avx512vl_f32_info>, T8, PD;
6703 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6704 OpNodeRnd, SchedWriteFMA,
6705 avx512vl_f64_info>, T8, PD, REX_W;
6708 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6710 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6711 X86Fmsub, X86FmsubRnd>;
6712 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6713 X86Fmaddsub, X86FmaddsubRnd>;
6714 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6715 X86Fmsubadd, X86FmsubaddRnd>;
6716 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6717 X86Fnmadd, X86FnmaddRnd>;
6718 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6719 X86Fnmsub, X86FnmsubRnd>;
6721 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6722 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6723 X86VectorVTInfo _> {
6724 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6725 Uses = [MXCSR], mayRaiseFPException = 1 in {
6726 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6727 (ins _.RC:$src2, _.RC:$src3),
6728 OpcodeStr, "$src3, $src2", "$src2, $src3",
6730 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6731 EVEX, VVVV, Sched<[sched]>;
6733 // Pattern is 312 order so that the load is in a different place from the
6734 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6735 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6736 (ins _.RC:$src2, _.MemOp:$src3),
6737 OpcodeStr, "$src3, $src2", "$src2, $src3",
6738 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6739 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6740 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6741 sched.ReadAfterFold]>;
6743 // Pattern is 312 order so that the load is in a different place from the
6744 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6745 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6746 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6747 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6748 "$src2, ${src3}"#_.BroadcastStr,
6749 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6750 _.RC:$src1, _.RC:$src2)),
6751 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6752 _.RC:$src1, _.RC:$src2)), 1, 0>,
6753 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6754 sched.ReadAfterFold]>;
6758 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6759 X86FoldableSchedWrite sched,
6760 X86VectorVTInfo _> {
6761 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6763 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6764 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6765 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6767 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6768 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6771 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6772 SDNode MaskOpNode, SDNode OpNodeRnd,
6773 X86SchedWriteWidths sched,
6774 AVX512VLVectorVTInfo _,
6775 Predicate prd = HasAVX512> {
6776 let Predicates = [prd] in {
6777 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6778 sched.ZMM, _.info512>,
6779 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6781 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6783 let Predicates = [HasVLX, prd] in {
6784 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6785 sched.YMM, _.info256>,
6786 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6787 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6788 sched.XMM, _.info128>,
6789 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6793 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6794 SDNode MaskOpNode, SDNode OpNodeRnd > {
6795 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6796 OpNodeRnd, SchedWriteFMA,
6797 avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6798 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6799 OpNodeRnd, SchedWriteFMA,
6800 avx512vl_f32_info>, T8, PD;
6801 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6802 OpNodeRnd, SchedWriteFMA,
6803 avx512vl_f64_info>, T8, PD, REX_W;
6806 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
6808 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6809 X86Fmsub, X86FmsubRnd>;
6810 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6811 X86Fmaddsub, X86FmaddsubRnd>;
6812 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6813 X86Fmsubadd, X86FmsubaddRnd>;
6814 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6815 X86Fnmadd, X86FnmaddRnd>;
6816 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6817 X86Fnmsub, X86FnmsubRnd>;
6820 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6821 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6822 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6823 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6824 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6825 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6826 EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6829 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6830 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6831 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6832 EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6833 SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6835 let Uses = [MXCSR] in
6836 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6837 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6838 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6839 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6841 let isCodeGenOnly = 1, isCommutable = 1 in {
6842 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6843 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6844 !strconcat(OpcodeStr,
6845 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6846 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC;
6847 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
6848 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6849 !strconcat(OpcodeStr,
6850 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6851 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6852 SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC;
6854 let Uses = [MXCSR] in
6855 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6856 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6857 !strconcat(OpcodeStr,
6858 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6859 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6860 Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV;
6861 }// isCodeGenOnly = 1
6862 }// Constraints = "$src1 = $dst"
6865 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6866 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
6867 X86VectorVTInfo _, string SUFF> {
6868 let ExeDomain = _.ExeDomain in {
6869 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6870 // Operands for intrinsic are in 123 order to preserve passthu
6872 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6874 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6875 (_.ScalarLdFrag addr:$src3)))),
6876 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6877 _.FRC:$src3, (i32 timm:$rc)))), 0>;
6879 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6880 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6882 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6883 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6884 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6885 _.FRC:$src1, (i32 timm:$rc)))), 1>;
6887 // One pattern is 312 order so that the load is in a different place from the
6888 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6889 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6890 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6892 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6893 _.FRC:$src1, _.FRC:$src2))),
6894 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6895 _.FRC:$src2, (i32 timm:$rc)))), 1>;
6899 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6900 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
6901 let Predicates = [HasAVX512] in {
6902 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6903 OpNodeRnd, f32x_info, "SS">,
6904 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD;
6905 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6906 OpNodeRnd, f64x_info, "SD">,
6907 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD;
6909 let Predicates = [HasFP16] in {
6910 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6911 OpNodeRnd, f16x_info, "SH">,
6912 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD;
6916 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
6917 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6918 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6919 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6921 multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
6922 SDNode RndOp, string Prefix,
6923 string Suffix, SDNode Move,
6924 X86VectorVTInfo _, PatLeaf ZeroFP,
6925 Predicate prd = HasAVX512> {
6926 let Predicates = [prd] in {
6927 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6929 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6931 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6932 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6933 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6935 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6936 (Op _.FRC:$src2, _.FRC:$src3,
6937 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6938 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6939 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6940 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6942 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6944 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6945 (_.ScalarLdFrag addr:$src3)))))),
6946 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6947 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6950 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6951 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6952 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6953 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6954 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6957 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6958 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6959 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6960 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6961 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6964 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6965 (X86selects_mask VK1WM:$mask,
6966 (MaskedOp _.FRC:$src2,
6967 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6969 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6970 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6971 VR128X:$src1, VK1WM:$mask,
6972 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6973 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6975 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6976 (X86selects_mask VK1WM:$mask,
6977 (MaskedOp _.FRC:$src2,
6978 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6979 (_.ScalarLdFrag addr:$src3)),
6980 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6981 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6982 VR128X:$src1, VK1WM:$mask,
6983 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6985 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6986 (X86selects_mask VK1WM:$mask,
6987 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6988 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6989 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6990 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6991 VR128X:$src1, VK1WM:$mask,
6992 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6994 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6995 (X86selects_mask VK1WM:$mask,
6996 (MaskedOp _.FRC:$src2, _.FRC:$src3,
6997 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6998 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6999 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7000 VR128X:$src1, VK1WM:$mask,
7001 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7002 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7004 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7005 (X86selects_mask VK1WM:$mask,
7006 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7007 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7008 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7009 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7010 VR128X:$src1, VK1WM:$mask,
7011 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7013 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7014 (X86selects_mask VK1WM:$mask,
7015 (MaskedOp _.FRC:$src2,
7016 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7018 (_.EltVT ZeroFP)))))),
7019 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7020 VR128X:$src1, VK1WM:$mask,
7021 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7022 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7024 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7025 (X86selects_mask VK1WM:$mask,
7026 (MaskedOp _.FRC:$src2, _.FRC:$src3,
7027 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7028 (_.EltVT ZeroFP)))))),
7029 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7030 VR128X:$src1, VK1WM:$mask,
7031 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7032 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7034 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7035 (X86selects_mask VK1WM:$mask,
7036 (MaskedOp _.FRC:$src2,
7037 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7038 (_.ScalarLdFrag addr:$src3)),
7039 (_.EltVT ZeroFP)))))),
7040 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7041 VR128X:$src1, VK1WM:$mask,
7042 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7044 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7045 (X86selects_mask VK1WM:$mask,
7046 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7047 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7048 (_.EltVT ZeroFP)))))),
7049 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7050 VR128X:$src1, VK1WM:$mask,
7051 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7053 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7054 (X86selects_mask VK1WM:$mask,
7055 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7056 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7057 (_.EltVT ZeroFP)))))),
7058 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7059 VR128X:$src1, VK1WM:$mask,
7060 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7062 // Patterns with rounding mode.
7063 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7065 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7066 _.FRC:$src3, (i32 timm:$rc)))))),
7067 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7068 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7069 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7071 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7072 (RndOp _.FRC:$src2, _.FRC:$src3,
7073 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7074 (i32 timm:$rc)))))),
7075 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7076 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7077 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7079 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7080 (X86selects_mask VK1WM:$mask,
7082 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7083 _.FRC:$src3, (i32 timm:$rc)),
7084 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7085 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7086 VR128X:$src1, VK1WM:$mask,
7087 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7088 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7090 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7091 (X86selects_mask VK1WM:$mask,
7092 (RndOp _.FRC:$src2, _.FRC:$src3,
7093 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7095 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7096 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7097 VR128X:$src1, VK1WM:$mask,
7098 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7099 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7101 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7102 (X86selects_mask VK1WM:$mask,
7104 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7105 _.FRC:$src3, (i32 timm:$rc)),
7106 (_.EltVT ZeroFP)))))),
7107 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7108 VR128X:$src1, VK1WM:$mask,
7109 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7110 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7112 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7113 (X86selects_mask VK1WM:$mask,
7114 (RndOp _.FRC:$src2, _.FRC:$src3,
7115 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7117 (_.EltVT ZeroFP)))))),
7118 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7119 VR128X:$src1, VK1WM:$mask,
7120 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7121 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7124 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7125 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7126 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7127 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7128 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7129 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7130 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7131 X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7133 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7134 "SS", X86Movss, v4f32x_info, fp32imm0>;
7135 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7136 "SS", X86Movss, v4f32x_info, fp32imm0>;
7137 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7138 "SS", X86Movss, v4f32x_info, fp32imm0>;
7139 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7140 "SS", X86Movss, v4f32x_info, fp32imm0>;
7142 defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7143 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7144 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7145 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7146 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7147 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7148 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7149 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7151 //===----------------------------------------------------------------------===//
7152 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7153 //===----------------------------------------------------------------------===//
7154 let Constraints = "$src1 = $dst" in {
7155 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7156 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7157 // NOTE: The SDNode have the multiply operands first with the add last.
7158 // This enables commuted load patterns to be autogenerated by tablegen.
7159 let ExeDomain = _.ExeDomain in {
7160 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7161 (ins _.RC:$src2, _.RC:$src3),
7162 OpcodeStr, "$src3, $src2", "$src2, $src3",
7163 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7164 T8, PD, EVEX, VVVV, Sched<[sched]>;
7166 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7167 (ins _.RC:$src2, _.MemOp:$src3),
7168 OpcodeStr, "$src3, $src2", "$src2, $src3",
7169 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7170 T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
7171 sched.ReadAfterFold]>;
7173 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7174 (ins _.RC:$src2, _.ScalarMemOp:$src3),
7175 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
7176 !strconcat("$src2, ${src3}", _.BroadcastStr ),
7178 (_.VT (_.BroadcastLdFrag addr:$src3)),
7180 T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7181 sched.ReadAfterFold]>;
7184 } // Constraints = "$src1 = $dst"
7186 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7187 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7188 let Predicates = [HasIFMA] in {
7189 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7190 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7192 let Predicates = [HasVLX, HasIFMA] in {
7193 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7194 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7195 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7196 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7200 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7201 SchedWriteVecIMul, avx512vl_i64_info>,
7203 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7204 SchedWriteVecIMul, avx512vl_i64_info>,
7207 //===----------------------------------------------------------------------===//
7208 // AVX-512 Scalar convert from sign integer to float/double
7209 //===----------------------------------------------------------------------===//
7211 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7212 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7213 X86MemOperand x86memop, PatFrag ld_frag, string asm,
7214 string mem, list<Register> _Uses = [MXCSR],
7215 bit _mayRaiseFPException = 1> {
7216 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7217 mayRaiseFPException = _mayRaiseFPException in {
7218 let hasSideEffects = 0, isCodeGenOnly = 1 in {
7219 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7220 (ins DstVT.FRC:$src1, SrcRC:$src),
7221 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7222 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7224 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7225 (ins DstVT.FRC:$src1, x86memop:$src),
7226 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7227 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7228 } // hasSideEffects = 0
7229 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7230 (ins DstVT.RC:$src1, SrcRC:$src2),
7231 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7232 [(set DstVT.RC:$dst,
7233 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7234 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7236 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7237 (ins DstVT.RC:$src1, x86memop:$src2),
7238 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7239 [(set DstVT.RC:$dst,
7240 (OpNode (DstVT.VT DstVT.RC:$src1),
7241 (ld_frag addr:$src2)))]>,
7242 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7244 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7245 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7246 DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7249 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7250 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7251 X86VectorVTInfo DstVT, string asm,
7253 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7254 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7255 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7257 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7258 [(set DstVT.RC:$dst,
7259 (OpNode (DstVT.VT DstVT.RC:$src1),
7262 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7263 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7264 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7265 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7268 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7269 X86FoldableSchedWrite sched,
7270 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7271 X86MemOperand x86memop, PatFrag ld_frag,
7272 string asm, string mem> {
7273 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7274 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7275 ld_frag, asm, mem>, VEX_LIG;
7278 let Predicates = [HasAVX512] in {
7279 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7281 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7282 TB, XS, EVEX_CD8<32, CD8VT1>;
7283 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7285 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7286 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7287 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7288 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7289 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7290 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7292 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7293 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7295 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7296 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7297 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7298 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7300 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7301 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7302 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7303 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7304 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7305 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7306 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7307 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7309 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7310 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7311 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7312 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7313 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7314 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7315 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7316 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7318 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7320 v4f32x_info, i32mem, loadi32,
7321 "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>;
7322 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7324 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7325 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7326 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7327 i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7328 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7329 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7331 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7332 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7334 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7335 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7336 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7337 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7339 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7340 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7341 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7342 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7343 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7344 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7345 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7346 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7348 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7349 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7350 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7351 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7352 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7353 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7354 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7355 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7358 //===----------------------------------------------------------------------===//
7359 // AVX-512 Scalar convert from float/double to integer
7360 //===----------------------------------------------------------------------===//
7362 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7363 X86VectorVTInfo DstVT, SDNode OpNode,
7365 X86FoldableSchedWrite sched, string asm,
7366 string aliasStr, Predicate prd = HasAVX512> {
7367 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7368 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7369 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7370 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7371 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7372 let Uses = [MXCSR] in
7373 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7374 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7375 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7376 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7378 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7379 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7380 [(set DstVT.RC:$dst, (OpNode
7381 (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7382 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7383 } // Predicates = [prd]
7385 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7386 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7387 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7388 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7389 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7390 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7391 SrcVT.IntScalarMemOp:$src), 0, "att">;
7394 // Convert float/double to signed/unsigned int 32/64
7395 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7396 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7397 TB, XS, EVEX_CD8<32, CD8VT1>;
7398 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7399 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7400 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7401 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7402 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7403 TB, XS, EVEX_CD8<32, CD8VT1>;
7404 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7405 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7406 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7407 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7408 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7409 TB, XD, EVEX_CD8<64, CD8VT1>;
7410 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7411 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7412 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7413 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7414 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7415 TB, XD, EVEX_CD8<64, CD8VT1>;
7416 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7417 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7418 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7420 multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7421 X86VectorVTInfo DstVT, SDNode OpNode,
7422 X86FoldableSchedWrite sched> {
7423 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7424 let isCodeGenOnly = 1 in {
7425 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7426 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7427 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7428 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7429 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7430 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7431 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7432 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7434 } // Predicates = [HasAVX512]
7437 defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7438 lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>;
7439 defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7440 llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7441 defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7442 lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>;
7443 defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7444 llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7446 let Predicates = [HasAVX512] in {
7447 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7448 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7450 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7451 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7454 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7455 // which produce unnecessary vmovs{s,d} instructions
7456 let Predicates = [HasAVX512] in {
7457 def : Pat<(v4f32 (X86Movss
7458 (v4f32 VR128X:$dst),
7459 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7460 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7462 def : Pat<(v4f32 (X86Movss
7463 (v4f32 VR128X:$dst),
7464 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7465 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7467 def : Pat<(v4f32 (X86Movss
7468 (v4f32 VR128X:$dst),
7469 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7470 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7472 def : Pat<(v4f32 (X86Movss
7473 (v4f32 VR128X:$dst),
7474 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7475 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7477 def : Pat<(v2f64 (X86Movsd
7478 (v2f64 VR128X:$dst),
7479 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7480 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7482 def : Pat<(v2f64 (X86Movsd
7483 (v2f64 VR128X:$dst),
7484 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7485 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7487 def : Pat<(v2f64 (X86Movsd
7488 (v2f64 VR128X:$dst),
7489 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7490 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7492 def : Pat<(v2f64 (X86Movsd
7493 (v2f64 VR128X:$dst),
7494 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7495 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7497 def : Pat<(v4f32 (X86Movss
7498 (v4f32 VR128X:$dst),
7499 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7500 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7502 def : Pat<(v4f32 (X86Movss
7503 (v4f32 VR128X:$dst),
7504 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7505 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7507 def : Pat<(v4f32 (X86Movss
7508 (v4f32 VR128X:$dst),
7509 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7510 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7512 def : Pat<(v4f32 (X86Movss
7513 (v4f32 VR128X:$dst),
7514 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7515 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7517 def : Pat<(v2f64 (X86Movsd
7518 (v2f64 VR128X:$dst),
7519 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7520 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7522 def : Pat<(v2f64 (X86Movsd
7523 (v2f64 VR128X:$dst),
7524 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7525 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7527 def : Pat<(v2f64 (X86Movsd
7528 (v2f64 VR128X:$dst),
7529 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7530 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7532 def : Pat<(v2f64 (X86Movsd
7533 (v2f64 VR128X:$dst),
7534 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7535 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7536 } // Predicates = [HasAVX512]
7538 // Convert float/double to signed/unsigned int 32/64 with truncation
7539 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7540 X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7541 SDNode OpNodeInt, SDNode OpNodeSAE,
7542 X86FoldableSchedWrite sched, string aliasStr,
7543 Predicate prd = HasAVX512> {
7544 let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7545 let isCodeGenOnly = 1 in {
7546 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7547 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7548 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7549 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7550 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7551 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7552 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7553 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7556 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7557 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7558 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7559 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7560 let Uses = [MXCSR] in
7561 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7562 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7563 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7564 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7565 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7566 (ins _SrcRC.IntScalarMemOp:$src),
7567 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7568 [(set _DstRC.RC:$dst,
7569 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7570 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7571 } // Predicates = [prd]
7573 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7574 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7575 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7576 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7577 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7578 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7579 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7582 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7583 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7584 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7585 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7586 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7587 "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7588 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7589 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7590 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7591 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7592 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7593 "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7595 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7596 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7597 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7598 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7599 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7600 "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>;
7601 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7602 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7603 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7604 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7605 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7606 "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7608 //===----------------------------------------------------------------------===//
7609 // AVX-512 Convert form float to double and back
7610 //===----------------------------------------------------------------------===//
7612 let Uses = [MXCSR], mayRaiseFPException = 1 in
7613 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7614 X86VectorVTInfo _Src, SDNode OpNode,
7615 X86FoldableSchedWrite sched> {
7616 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7617 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7618 "$src2, $src1", "$src1, $src2",
7619 (_.VT (OpNode (_.VT _.RC:$src1),
7620 (_Src.VT _Src.RC:$src2)))>,
7621 EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7622 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7623 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7624 "$src2, $src1", "$src1, $src2",
7625 (_.VT (OpNode (_.VT _.RC:$src1),
7626 (_Src.ScalarIntMemFrags addr:$src2)))>,
7627 EVEX, VVVV, VEX_LIG,
7628 Sched<[sched.Folded, sched.ReadAfterFold]>;
7630 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7631 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7632 (ins _.FRC:$src1, _Src.FRC:$src2),
7633 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7634 EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7636 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7637 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7638 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7639 EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7643 // Scalar Conversion with SAE - suppress all exceptions
7644 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7645 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7646 X86FoldableSchedWrite sched> {
7647 let Uses = [MXCSR] in
7648 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7649 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7650 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7651 (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7652 (_Src.VT _Src.RC:$src2)))>,
7653 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
7656 // Scalar Conversion with rounding control (RC)
7657 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7658 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7659 X86FoldableSchedWrite sched> {
7660 let Uses = [MXCSR] in
7661 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7662 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7663 "$rc, $src2, $src1", "$src1, $src2, $rc",
7664 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7665 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7666 EVEX, VVVV, VEX_LIG, Sched<[sched]>,
7669 multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7670 SDNode OpNode, SDNode OpNodeRnd,
7671 X86FoldableSchedWrite sched,
7672 X86VectorVTInfo _src, X86VectorVTInfo _dst,
7673 Predicate prd = HasAVX512> {
7674 let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7675 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7676 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7677 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7681 multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7682 SDNode OpNode, SDNode OpNodeSAE,
7683 X86FoldableSchedWrite sched,
7684 X86VectorVTInfo _src, X86VectorVTInfo _dst,
7685 Predicate prd = HasAVX512> {
7686 let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7687 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7688 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7689 EVEX_CD8<_src.EltSize, CD8VT1>;
7692 defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7693 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7694 f32x_info>, TB, XD, REX_W;
7695 defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7696 X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7698 defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7699 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7700 f16x_info, HasFP16>, T_MAP5, XD, REX_W;
7701 defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7702 X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7703 f64x_info, HasFP16>, T_MAP5, XS;
7704 defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7705 X86froundsRnd, WriteCvtSD2SS, f32x_info,
7706 f16x_info, HasFP16>, T_MAP5;
7707 defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7708 X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7709 f32x_info, HasFP16>, T_MAP6;
7711 def : Pat<(f64 (any_fpextend FR32X:$src)),
7712 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7713 Requires<[HasAVX512]>;
7714 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7715 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7716 Requires<[HasAVX512, OptForSize]>;
7718 def : Pat<(f32 (any_fpround FR64X:$src)),
7719 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7720 Requires<[HasAVX512]>;
7722 def : Pat<(f32 (any_fpextend FR16X:$src)),
7723 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7724 Requires<[HasFP16]>;
7725 def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7726 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7727 Requires<[HasFP16, OptForSize]>;
7729 def : Pat<(f64 (any_fpextend FR16X:$src)),
7730 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7731 Requires<[HasFP16]>;
7732 def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7733 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7734 Requires<[HasFP16, OptForSize]>;
7736 def : Pat<(f16 (any_fpround FR32X:$src)),
7737 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7738 Requires<[HasFP16]>;
7739 def : Pat<(f16 (any_fpround FR64X:$src)),
7740 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7741 Requires<[HasFP16]>;
7743 def : Pat<(v4f32 (X86Movss
7744 (v4f32 VR128X:$dst),
7745 (v4f32 (scalar_to_vector
7746 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7747 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7748 Requires<[HasAVX512]>;
7750 def : Pat<(v2f64 (X86Movsd
7751 (v2f64 VR128X:$dst),
7752 (v2f64 (scalar_to_vector
7753 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7754 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7755 Requires<[HasAVX512]>;
7757 //===----------------------------------------------------------------------===//
7758 // AVX-512 Vector convert from signed/unsigned integer to float/double
7759 // and from float/double to signed/unsigned integer
7760 //===----------------------------------------------------------------------===//
7762 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7763 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7764 X86FoldableSchedWrite sched,
7765 string Broadcast = _.BroadcastStr,
7766 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7767 RegisterClass MaskRC = _.KRCWM,
7768 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7769 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7770 let Uses = [MXCSR], mayRaiseFPException = 1 in {
7771 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7773 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7774 (ins MaskRC:$mask, _Src.RC:$src),
7775 OpcodeStr, "$src", "$src",
7776 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7777 (vselect_mask MaskRC:$mask,
7778 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7780 (vselect_mask MaskRC:$mask,
7781 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7783 EVEX, Sched<[sched]>;
7785 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7787 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7788 (ins MaskRC:$mask, MemOp:$src),
7789 OpcodeStr#Alias, "$src", "$src",
7791 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7792 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7793 EVEX, Sched<[sched.Folded]>;
7795 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7796 (ins _Src.ScalarMemOp:$src),
7797 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7798 (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7800 "${src}"#Broadcast, "${src}"#Broadcast,
7801 (_.VT (OpNode (_Src.VT
7802 (_Src.BroadcastLdFrag addr:$src))
7804 (vselect_mask MaskRC:$mask,
7808 (_Src.BroadcastLdFrag addr:$src)))),
7810 (vselect_mask MaskRC:$mask,
7814 (_Src.BroadcastLdFrag addr:$src)))),
7816 EVEX, EVEX_B, Sched<[sched.Folded]>;
7819 // Conversion with SAE - suppress all exceptions
7820 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7821 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7822 X86FoldableSchedWrite sched> {
7823 let Uses = [MXCSR] in
7824 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7825 (ins _Src.RC:$src), OpcodeStr,
7826 "{sae}, $src", "$src, {sae}",
7827 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7828 EVEX, EVEX_B, Sched<[sched]>;
7831 // Conversion with rounding control (RC)
7832 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7833 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
7834 X86FoldableSchedWrite sched> {
7835 let Uses = [MXCSR] in
7836 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7837 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7838 "$rc, $src", "$src, $rc",
7839 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7840 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7843 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7844 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7845 X86VectorVTInfo _Src, SDPatternOperator OpNode,
7847 X86FoldableSchedWrite sched,
7848 string Broadcast = _.BroadcastStr,
7849 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7850 RegisterClass MaskRC = _.KRCWM>
7851 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7852 Alias, MemOp, MaskRC,
7853 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7854 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7856 // Extend [Float to Double, Half to Float]
7857 multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
7858 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7859 X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
7860 let Predicates = [prd] in {
7861 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256,
7862 any_fpextend, fpextend, sched.ZMM>,
7863 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
7864 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7866 let Predicates = [prd, HasVLX] in {
7867 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
7868 X86any_vfpext, X86vfpext, sched.XMM,
7869 _dst.info128.BroadcastStr,
7870 "", f64mem>, EVEX_V128;
7871 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
7872 any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7876 // Truncate [Double to Float, Float to Half]
7877 multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
7878 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7879 X86SchedWriteWidths sched, Predicate prd = HasAVX512,
7880 PatFrag bcast128 = _src.info128.BroadcastLdFrag,
7881 PatFrag loadVT128 = _src.info128.LdFrag,
7882 RegisterClass maskRC128 = _src.info128.KRCWM> {
7883 let Predicates = [prd] in {
7884 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
7885 X86any_vfpround, X86vfpround, sched.ZMM>,
7886 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
7887 X86vfproundRnd, sched.ZMM>, EVEX_V512;
7889 let Predicates = [prd, HasVLX] in {
7890 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
7891 null_frag, null_frag, sched.XMM,
7892 _src.info128.BroadcastStr, "{x}",
7893 f128mem, maskRC128>, EVEX_V128;
7894 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
7895 X86any_vfpround, X86vfpround,
7896 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
7898 // Special patterns to allow use of X86vmfpround for masking. Instruction
7899 // patterns have been disabled with null_frag.
7900 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
7901 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
7902 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
7904 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
7905 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
7907 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
7909 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
7910 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
7911 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
7913 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7914 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
7916 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
7918 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
7919 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
7920 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7921 (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
7922 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7923 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7924 _dst.info128.ImmAllZerosV, maskRC128:$mask),
7925 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
7928 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7929 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7930 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7931 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7932 VK2WM:$mask, VR128X:$src), 0, "att">;
7933 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7934 "$dst {${mask}} {z}, $src}",
7935 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7936 VK2WM:$mask, VR128X:$src), 0, "att">;
7937 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7938 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7939 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7940 "$dst {${mask}}, ${src}{1to2}}",
7941 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7942 VK2WM:$mask, f64mem:$src), 0, "att">;
7943 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7944 "$dst {${mask}} {z}, ${src}{1to2}}",
7945 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7946 VK2WM:$mask, f64mem:$src), 0, "att">;
7948 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7949 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7950 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7951 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7952 VK4WM:$mask, VR256X:$src), 0, "att">;
7953 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7954 "$dst {${mask}} {z}, $src}",
7955 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7956 VK4WM:$mask, VR256X:$src), 0, "att">;
7957 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7958 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7959 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7960 "$dst {${mask}}, ${src}{1to4}}",
7961 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7962 VK4WM:$mask, f64mem:$src), 0, "att">;
7963 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7964 "$dst {${mask}} {z}, ${src}{1to4}}",
7965 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7966 VK4WM:$mask, f64mem:$src), 0, "att">;
7969 defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
7970 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
7971 REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
7972 defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
7973 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
7974 TB, EVEX_CD8<32, CD8VH>;
7976 // Extend Half to Double
7977 multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
7978 X86SchedWriteWidths sched> {
7979 let Predicates = [HasFP16] in {
7980 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
7981 any_fpextend, fpextend, sched.ZMM>,
7982 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
7983 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7984 def : Pat<(v8f64 (extloadv8f16 addr:$src)),
7985 (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
7987 let Predicates = [HasFP16, HasVLX] in {
7988 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
7989 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
7991 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
7992 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
7997 // Truncate Double to Half
7998 multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7999 let Predicates = [HasFP16] in {
8000 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8001 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8002 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8003 X86vfproundRnd, sched.ZMM>, EVEX_V512;
8005 let Predicates = [HasFP16, HasVLX] in {
8006 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8007 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8009 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8010 null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8013 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8014 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8015 VR128X:$src), 0, "att">;
8016 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8017 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8018 VK2WM:$mask, VR128X:$src), 0, "att">;
8019 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8020 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8021 VK2WM:$mask, VR128X:$src), 0, "att">;
8022 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8023 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8024 i64mem:$src), 0, "att">;
8025 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8026 "$dst {${mask}}, ${src}{1to2}}",
8027 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8028 VK2WM:$mask, i64mem:$src), 0, "att">;
8029 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8030 "$dst {${mask}} {z}, ${src}{1to2}}",
8031 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8032 VK2WM:$mask, i64mem:$src), 0, "att">;
8034 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8035 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8036 VR256X:$src), 0, "att">;
8037 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8038 "$dst {${mask}}, $src}",
8039 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8040 VK4WM:$mask, VR256X:$src), 0, "att">;
8041 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8042 "$dst {${mask}} {z}, $src}",
8043 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8044 VK4WM:$mask, VR256X:$src), 0, "att">;
8045 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8046 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8047 i64mem:$src), 0, "att">;
8048 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8049 "$dst {${mask}}, ${src}{1to4}}",
8050 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8051 VK4WM:$mask, i64mem:$src), 0, "att">;
8052 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8053 "$dst {${mask}} {z}, ${src}{1to4}}",
8054 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8055 VK4WM:$mask, i64mem:$src), 0, "att">;
8057 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8058 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8059 VR512:$src), 0, "att">;
8060 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8061 "$dst {${mask}}, $src}",
8062 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8063 VK8WM:$mask, VR512:$src), 0, "att">;
8064 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8065 "$dst {${mask}} {z}, $src}",
8066 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8067 VK8WM:$mask, VR512:$src), 0, "att">;
8068 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8069 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8070 i64mem:$src), 0, "att">;
8071 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8072 "$dst {${mask}}, ${src}{1to8}}",
8073 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8074 VK8WM:$mask, i64mem:$src), 0, "att">;
8075 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8076 "$dst {${mask}} {z}, ${src}{1to8}}",
8077 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8078 VK8WM:$mask, i64mem:$src), 0, "att">;
8081 defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8082 avx512vl_f32_info, SchedWriteCvtPD2PS,
8083 HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>;
8084 defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8085 avx512vl_f16_info, SchedWriteCvtPS2PD,
8086 HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>;
8087 defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8088 REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>;
8089 defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8090 T_MAP5, EVEX_CD8<16, CD8VQ>;
8092 let Predicates = [HasFP16, HasVLX] in {
8093 // Special patterns to allow use of X86vmfpround for masking. Instruction
8094 // patterns have been disabled with null_frag.
8095 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8096 (VCVTPD2PHZ256rr VR256X:$src)>;
8097 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8099 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8100 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8102 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8104 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8105 (VCVTPD2PHZ256rm addr:$src)>;
8106 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8108 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8109 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8111 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8113 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8114 (VCVTPD2PHZ256rmb addr:$src)>;
8115 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8116 (v8f16 VR128X:$src0), VK4WM:$mask),
8117 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8118 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8119 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8120 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8122 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8123 (VCVTPD2PHZ128rr VR128X:$src)>;
8124 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8126 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8127 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8129 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8131 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8132 (VCVTPD2PHZ128rm addr:$src)>;
8133 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8135 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8136 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8138 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8140 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8141 (VCVTPD2PHZ128rmb addr:$src)>;
8142 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8143 (v8f16 VR128X:$src0), VK2WM:$mask),
8144 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8145 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8146 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8147 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8150 // Convert Signed/Unsigned Doubleword to Double
8151 let Uses = []<Register>, mayRaiseFPException = 0 in
8152 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8153 SDNode MaskOpNode, SDPatternOperator OpNode128,
8154 SDNode MaskOpNode128,
8155 X86SchedWriteWidths sched> {
8156 // No rounding in this op
8157 let Predicates = [HasAVX512] in
8158 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8159 MaskOpNode, sched.ZMM>, EVEX_V512;
8161 let Predicates = [HasVLX] in {
8162 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8163 OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8165 (v2f64 (OpNode128 (bc_v4i32
8167 (scalar_to_vector (loadi64 addr:$src)))))),
8168 (v2f64 (MaskOpNode128 (bc_v4i32
8170 (scalar_to_vector (loadi64 addr:$src))))))>,
8172 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8173 MaskOpNode, sched.YMM>, EVEX_V256;
8177 // Convert Signed/Unsigned Doubleword to Float
8178 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8179 SDNode MaskOpNode, SDNode OpNodeRnd,
8180 X86SchedWriteWidths sched> {
8181 let Predicates = [HasAVX512] in
8182 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8183 MaskOpNode, sched.ZMM>,
8184 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8185 OpNodeRnd, sched.ZMM>, EVEX_V512;
8187 let Predicates = [HasVLX] in {
8188 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8189 MaskOpNode, sched.XMM>, EVEX_V128;
8190 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8191 MaskOpNode, sched.YMM>, EVEX_V256;
8195 // Convert Float to Signed/Unsigned Doubleword with truncation
8196 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8198 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8199 let Predicates = [HasAVX512] in {
8200 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8201 MaskOpNode, sched.ZMM>,
8202 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8203 OpNodeSAE, sched.ZMM>, EVEX_V512;
8205 let Predicates = [HasVLX] in {
8206 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8207 MaskOpNode, sched.XMM>, EVEX_V128;
8208 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8209 MaskOpNode, sched.YMM>, EVEX_V256;
8213 // Convert Float to Signed/Unsigned Doubleword
8214 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8215 SDNode MaskOpNode, SDNode OpNodeRnd,
8216 X86SchedWriteWidths sched> {
8217 let Predicates = [HasAVX512] in {
8218 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8219 MaskOpNode, sched.ZMM>,
8220 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8221 OpNodeRnd, sched.ZMM>, EVEX_V512;
8223 let Predicates = [HasVLX] in {
8224 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8225 MaskOpNode, sched.XMM>, EVEX_V128;
8226 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8227 MaskOpNode, sched.YMM>, EVEX_V256;
8231 // Convert Double to Signed/Unsigned Doubleword with truncation
8232 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8233 SDNode MaskOpNode, SDNode OpNodeSAE,
8234 X86SchedWriteWidths sched> {
8235 let Predicates = [HasAVX512] in {
8236 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8237 MaskOpNode, sched.ZMM>,
8238 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8239 OpNodeSAE, sched.ZMM>, EVEX_V512;
8241 let Predicates = [HasVLX] in {
8242 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8243 // memory forms of these instructions in Asm Parser. They have the same
8244 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8245 // due to the same reason.
8246 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8247 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8249 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8250 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8253 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8254 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8255 VR128X:$src), 0, "att">;
8256 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8257 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8258 VK2WM:$mask, VR128X:$src), 0, "att">;
8259 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8260 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8261 VK2WM:$mask, VR128X:$src), 0, "att">;
8262 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8263 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8264 f64mem:$src), 0, "att">;
8265 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8266 "$dst {${mask}}, ${src}{1to2}}",
8267 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8268 VK2WM:$mask, f64mem:$src), 0, "att">;
8269 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8270 "$dst {${mask}} {z}, ${src}{1to2}}",
8271 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8272 VK2WM:$mask, f64mem:$src), 0, "att">;
8274 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8275 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8276 VR256X:$src), 0, "att">;
8277 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8278 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8279 VK4WM:$mask, VR256X:$src), 0, "att">;
8280 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8281 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8282 VK4WM:$mask, VR256X:$src), 0, "att">;
8283 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8284 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8285 f64mem:$src), 0, "att">;
8286 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8287 "$dst {${mask}}, ${src}{1to4}}",
8288 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8289 VK4WM:$mask, f64mem:$src), 0, "att">;
8290 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8291 "$dst {${mask}} {z}, ${src}{1to4}}",
8292 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8293 VK4WM:$mask, f64mem:$src), 0, "att">;
8296 // Convert Double to Signed/Unsigned Doubleword
8297 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8298 SDNode MaskOpNode, SDNode OpNodeRnd,
8299 X86SchedWriteWidths sched> {
8300 let Predicates = [HasAVX512] in {
8301 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8302 MaskOpNode, sched.ZMM>,
8303 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8304 OpNodeRnd, sched.ZMM>, EVEX_V512;
8306 let Predicates = [HasVLX] in {
8307 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8308 // memory forms of these instructions in Asm Parcer. They have the same
8309 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8310 // due to the same reason.
8311 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8312 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8314 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8315 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8318 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8319 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8320 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8321 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8322 VK2WM:$mask, VR128X:$src), 0, "att">;
8323 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8324 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8325 VK2WM:$mask, VR128X:$src), 0, "att">;
8326 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8327 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8328 f64mem:$src), 0, "att">;
8329 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8330 "$dst {${mask}}, ${src}{1to2}}",
8331 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8332 VK2WM:$mask, f64mem:$src), 0, "att">;
8333 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8334 "$dst {${mask}} {z}, ${src}{1to2}}",
8335 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8336 VK2WM:$mask, f64mem:$src), 0, "att">;
8338 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8339 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8340 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8341 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8342 VK4WM:$mask, VR256X:$src), 0, "att">;
8343 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8344 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8345 VK4WM:$mask, VR256X:$src), 0, "att">;
8346 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8347 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8348 f64mem:$src), 0, "att">;
8349 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8350 "$dst {${mask}}, ${src}{1to4}}",
8351 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8352 VK4WM:$mask, f64mem:$src), 0, "att">;
8353 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8354 "$dst {${mask}} {z}, ${src}{1to4}}",
8355 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8356 VK4WM:$mask, f64mem:$src), 0, "att">;
8359 // Convert Double to Signed/Unsigned Quardword
8360 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8361 SDNode MaskOpNode, SDNode OpNodeRnd,
8362 X86SchedWriteWidths sched> {
8363 let Predicates = [HasDQI] in {
8364 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8365 MaskOpNode, sched.ZMM>,
8366 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8367 OpNodeRnd, sched.ZMM>, EVEX_V512;
8369 let Predicates = [HasDQI, HasVLX] in {
8370 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8371 MaskOpNode, sched.XMM>, EVEX_V128;
8372 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8373 MaskOpNode, sched.YMM>, EVEX_V256;
8377 // Convert Double to Signed/Unsigned Quardword with truncation
8378 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8379 SDNode MaskOpNode, SDNode OpNodeRnd,
8380 X86SchedWriteWidths sched> {
8381 let Predicates = [HasDQI] in {
8382 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8383 MaskOpNode, sched.ZMM>,
8384 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8385 OpNodeRnd, sched.ZMM>, EVEX_V512;
8387 let Predicates = [HasDQI, HasVLX] in {
8388 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8389 MaskOpNode, sched.XMM>, EVEX_V128;
8390 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8391 MaskOpNode, sched.YMM>, EVEX_V256;
8395 // Convert Signed/Unsigned Quardword to Double
8396 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8397 SDNode MaskOpNode, SDNode OpNodeRnd,
8398 X86SchedWriteWidths sched> {
8399 let Predicates = [HasDQI] in {
8400 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8401 MaskOpNode, sched.ZMM>,
8402 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8403 OpNodeRnd, sched.ZMM>, EVEX_V512;
8405 let Predicates = [HasDQI, HasVLX] in {
8406 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8407 MaskOpNode, sched.XMM>, EVEX_V128;
8408 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8409 MaskOpNode, sched.YMM>, EVEX_V256;
8413 // Convert Float to Signed/Unsigned Quardword
8414 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8415 SDNode MaskOpNode, SDNode OpNodeRnd,
8416 X86SchedWriteWidths sched> {
8417 let Predicates = [HasDQI] in {
8418 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8419 MaskOpNode, sched.ZMM>,
8420 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8421 OpNodeRnd, sched.ZMM>, EVEX_V512;
8423 let Predicates = [HasDQI, HasVLX] in {
8424 // Explicitly specified broadcast string, since we take only 2 elements
8425 // from v4f32x_info source
8426 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8427 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8428 (v2i64 (OpNode (bc_v4f32
8430 (scalar_to_vector (loadf64 addr:$src)))))),
8431 (v2i64 (MaskOpNode (bc_v4f32
8433 (scalar_to_vector (loadf64 addr:$src))))))>,
8435 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8436 MaskOpNode, sched.YMM>, EVEX_V256;
8440 // Convert Float to Signed/Unsigned Quardword with truncation
8441 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8442 SDNode MaskOpNode, SDNode OpNodeRnd,
8443 X86SchedWriteWidths sched> {
8444 let Predicates = [HasDQI] in {
8445 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8446 MaskOpNode, sched.ZMM>,
8447 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8448 OpNodeRnd, sched.ZMM>, EVEX_V512;
8450 let Predicates = [HasDQI, HasVLX] in {
8451 // Explicitly specified broadcast string, since we take only 2 elements
8452 // from v4f32x_info source
8453 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8454 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8455 (v2i64 (OpNode (bc_v4f32
8457 (scalar_to_vector (loadf64 addr:$src)))))),
8458 (v2i64 (MaskOpNode (bc_v4f32
8460 (scalar_to_vector (loadf64 addr:$src))))))>,
8462 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8463 MaskOpNode, sched.YMM>, EVEX_V256;
8467 // Convert Signed/Unsigned Quardword to Float
8468 // Also Convert Signed/Unsigned Doubleword to Half
8469 multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8470 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8471 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8472 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8473 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8474 let Predicates = [prd] in {
8475 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8476 MaskOpNode, sched.ZMM>,
8477 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8478 OpNodeRnd, sched.ZMM>, EVEX_V512;
8480 let Predicates = [prd, HasVLX] in {
8481 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8482 // memory forms of these instructions in Asm Parcer. They have the same
8483 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8484 // due to the same reason.
8485 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8486 null_frag, sched.XMM, _src.info128.BroadcastStr,
8487 "{x}", i128mem, _src.info128.KRCWM>,
8489 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8490 MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8493 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8494 // patterns have been disabled with null_frag.
8495 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8496 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8497 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8498 _src.info128.KRCWM:$mask),
8499 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8500 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8501 _src.info128.KRCWM:$mask),
8502 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8504 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8505 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8506 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8507 _src.info128.KRCWM:$mask),
8508 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8509 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8510 _src.info128.KRCWM:$mask),
8511 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8513 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8514 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8515 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8516 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8517 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8518 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8519 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8520 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8523 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8524 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8525 VR128X:$src), 0, "att">;
8526 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8527 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8528 VK2WM:$mask, VR128X:$src), 0, "att">;
8529 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8530 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8531 VK2WM:$mask, VR128X:$src), 0, "att">;
8532 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8533 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8534 i64mem:$src), 0, "att">;
8535 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8536 "$dst {${mask}}, ${src}{1to2}}",
8537 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8538 VK2WM:$mask, i64mem:$src), 0, "att">;
8539 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8540 "$dst {${mask}} {z}, ${src}{1to2}}",
8541 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8542 VK2WM:$mask, i64mem:$src), 0, "att">;
8544 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8545 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8546 VR256X:$src), 0, "att">;
8547 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8548 "$dst {${mask}}, $src}",
8549 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8550 VK4WM:$mask, VR256X:$src), 0, "att">;
8551 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8552 "$dst {${mask}} {z}, $src}",
8553 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8554 VK4WM:$mask, VR256X:$src), 0, "att">;
8555 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8556 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8557 i64mem:$src), 0, "att">;
8558 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8559 "$dst {${mask}}, ${src}{1to4}}",
8560 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8561 VK4WM:$mask, i64mem:$src), 0, "att">;
8562 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8563 "$dst {${mask}} {z}, ${src}{1to4}}",
8564 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8565 VK4WM:$mask, i64mem:$src), 0, "att">;
8568 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8569 X86any_VSintToFP, X86VSintToFP,
8570 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8572 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8573 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8574 TB, EVEX_CD8<32, CD8VF>;
8576 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8577 X86cvttp2si, X86cvttp2siSAE,
8578 SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>;
8580 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8581 X86cvttp2si, X86cvttp2siSAE,
8582 SchedWriteCvtPD2DQ>,
8583 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
8585 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8586 X86cvttp2ui, X86cvttp2uiSAE,
8587 SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>;
8589 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8590 X86cvttp2ui, X86cvttp2uiSAE,
8591 SchedWriteCvtPD2DQ>,
8592 TB, REX_W, EVEX_CD8<64, CD8VF>;
8594 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8595 uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8596 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8598 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8599 uint_to_fp, X86VUintToFpRnd,
8600 SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>;
8602 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8603 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8604 EVEX_CD8<32, CD8VF>;
8606 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8607 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD,
8608 REX_W, EVEX_CD8<64, CD8VF>;
8610 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8611 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8612 TB, EVEX_CD8<32, CD8VF>;
8614 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8615 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8616 TB, EVEX_CD8<64, CD8VF>;
8618 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8619 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
8620 TB, PD, EVEX_CD8<64, CD8VF>;
8622 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8623 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8624 EVEX_CD8<32, CD8VH>;
8626 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8627 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8628 TB, PD, EVEX_CD8<64, CD8VF>;
8630 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8631 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8632 EVEX_CD8<32, CD8VH>;
8634 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8635 X86cvttp2si, X86cvttp2siSAE,
8636 SchedWriteCvtPD2DQ>, REX_W,
8637 TB, PD, EVEX_CD8<64, CD8VF>;
8639 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8640 X86cvttp2si, X86cvttp2siSAE,
8641 SchedWriteCvtPS2DQ>, TB, PD,
8642 EVEX_CD8<32, CD8VH>;
8644 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8645 X86cvttp2ui, X86cvttp2uiSAE,
8646 SchedWriteCvtPD2DQ>, REX_W,
8647 TB, PD, EVEX_CD8<64, CD8VF>;
8649 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8650 X86cvttp2ui, X86cvttp2uiSAE,
8651 SchedWriteCvtPS2DQ>, TB, PD,
8652 EVEX_CD8<32, CD8VH>;
8654 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8655 sint_to_fp, X86VSintToFpRnd,
8656 SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8658 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8659 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8660 REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8662 defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8663 X86any_VSintToFP, X86VMSintToFP,
8664 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8665 SchedWriteCvtDQ2PS, HasFP16>,
8666 T_MAP5, EVEX_CD8<32, CD8VF>;
8668 defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8669 X86any_VUintToFP, X86VMUintToFP,
8670 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8671 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD,
8672 EVEX_CD8<32, CD8VF>;
8674 defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8675 X86any_VSintToFP, X86VMSintToFP,
8676 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8677 SchedWriteCvtDQ2PS>, REX_W, TB,
8678 EVEX_CD8<64, CD8VF>;
8680 defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8681 X86any_VUintToFP, X86VMUintToFP,
8682 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8683 SchedWriteCvtDQ2PS>, REX_W, TB, XD,
8684 EVEX_CD8<64, CD8VF>;
8686 let Predicates = [HasVLX] in {
8687 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8688 // patterns have been disabled with null_frag.
8689 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8690 (VCVTPD2DQZ128rr VR128X:$src)>;
8691 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8693 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8694 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8696 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8698 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8699 (VCVTPD2DQZ128rm addr:$src)>;
8700 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8702 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8703 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8705 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8707 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8708 (VCVTPD2DQZ128rmb addr:$src)>;
8709 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8710 (v4i32 VR128X:$src0), VK2WM:$mask),
8711 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8712 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8713 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8714 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8716 // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8717 // patterns have been disabled with null_frag.
8718 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8719 (VCVTTPD2DQZ128rr VR128X:$src)>;
8720 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8722 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8723 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8725 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8727 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8728 (VCVTTPD2DQZ128rm addr:$src)>;
8729 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8731 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8732 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8734 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8736 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8737 (VCVTTPD2DQZ128rmb addr:$src)>;
8738 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8739 (v4i32 VR128X:$src0), VK2WM:$mask),
8740 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8741 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8742 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8743 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8745 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8746 // patterns have been disabled with null_frag.
8747 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8748 (VCVTPD2UDQZ128rr VR128X:$src)>;
8749 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8751 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8752 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8754 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8756 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8757 (VCVTPD2UDQZ128rm addr:$src)>;
8758 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8760 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8761 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8763 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8765 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8766 (VCVTPD2UDQZ128rmb addr:$src)>;
8767 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8768 (v4i32 VR128X:$src0), VK2WM:$mask),
8769 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8770 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8771 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8772 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8774 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8775 // patterns have been disabled with null_frag.
8776 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8777 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8778 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8780 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8781 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8783 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8785 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8786 (VCVTTPD2UDQZ128rm addr:$src)>;
8787 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8789 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8790 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8792 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8794 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8795 (VCVTTPD2UDQZ128rmb addr:$src)>;
8796 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8797 (v4i32 VR128X:$src0), VK2WM:$mask),
8798 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8799 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8800 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8801 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8804 let Predicates = [HasDQI, HasVLX] in {
8805 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8806 (VCVTPS2QQZ128rm addr:$src)>;
8807 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8808 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8810 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8811 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8812 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8813 v2i64x_info.ImmAllZerosV)),
8814 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8816 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8817 (VCVTPS2UQQZ128rm addr:$src)>;
8818 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8819 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8821 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8822 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8823 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8824 v2i64x_info.ImmAllZerosV)),
8825 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8827 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8828 (VCVTTPS2QQZ128rm addr:$src)>;
8829 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8830 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8832 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8833 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8834 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8835 v2i64x_info.ImmAllZerosV)),
8836 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8838 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8839 (VCVTTPS2UQQZ128rm addr:$src)>;
8840 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8841 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8843 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8844 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8845 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8846 v2i64x_info.ImmAllZerosV)),
8847 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8850 let Predicates = [HasVLX] in {
8851 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8852 (VCVTDQ2PDZ128rm addr:$src)>;
8853 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8854 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8856 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8857 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8858 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8859 v2f64x_info.ImmAllZerosV)),
8860 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8862 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8863 (VCVTUDQ2PDZ128rm addr:$src)>;
8864 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8865 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8867 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8868 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8869 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8870 v2f64x_info.ImmAllZerosV)),
8871 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8874 //===----------------------------------------------------------------------===//
8875 // Half precision conversion instructions
8876 //===----------------------------------------------------------------------===//
8878 let Uses = [MXCSR], mayRaiseFPException = 1 in
8879 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8880 X86MemOperand x86memop, dag ld_dag,
8881 X86FoldableSchedWrite sched> {
8882 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8883 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8884 (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8885 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8886 T8, PD, Sched<[sched]>;
8887 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8888 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8889 (X86any_cvtph2ps (_src.VT ld_dag)),
8890 (X86cvtph2ps (_src.VT ld_dag))>,
8891 T8, PD, Sched<[sched.Folded]>;
8894 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8895 X86FoldableSchedWrite sched> {
8896 let Uses = [MXCSR] in
8897 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8898 (ins _src.RC:$src), "vcvtph2ps",
8899 "{sae}, $src", "$src, {sae}",
8900 (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8901 T8, PD, EVEX_B, Sched<[sched]>;
8904 let Predicates = [HasAVX512] in
8905 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8906 (load addr:$src), WriteCvtPH2PSZ>,
8907 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8908 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8910 let Predicates = [HasVLX] in {
8911 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8912 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8913 EVEX_CD8<32, CD8VH>;
8914 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8915 (bitconvert (v2i64 (X86vzload64 addr:$src))),
8916 WriteCvtPH2PS>, EVEX, EVEX_V128,
8917 EVEX_CD8<32, CD8VH>;
8919 // Pattern match vcvtph2ps of a scalar i64 load.
8920 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8921 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8922 (VCVTPH2PSZ128rm addr:$src)>;
8925 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8926 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8927 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8928 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8929 (ins _src.RC:$src1, i32u8imm:$src2),
8930 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8931 [(set _dest.RC:$dst,
8932 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8934 let Constraints = "$src0 = $dst" in
8935 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8936 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8937 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8938 [(set _dest.RC:$dst,
8939 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8940 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8941 Sched<[RR]>, EVEX_K;
8942 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8943 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8944 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8945 [(set _dest.RC:$dst,
8946 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8947 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8948 Sched<[RR]>, EVEX_KZ;
8949 let hasSideEffects = 0, mayStore = 1 in {
8950 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8951 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8952 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8954 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8955 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8956 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8957 EVEX_K, Sched<[MR]>;
8962 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8964 let hasSideEffects = 0, Uses = [MXCSR] in {
8965 def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8966 (ins _src.RC:$src1, i32u8imm:$src2),
8967 "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
8968 [(set _dest.RC:$dst,
8969 (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8970 EVEX_B, Sched<[Sched]>;
8971 let Constraints = "$src0 = $dst" in
8972 def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8973 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8974 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
8975 [(set _dest.RC:$dst,
8976 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
8977 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8978 EVEX_B, Sched<[Sched]>, EVEX_K;
8979 def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8980 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8981 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
8982 [(set _dest.RC:$dst,
8983 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
8984 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8985 EVEX_B, Sched<[Sched]>, EVEX_KZ;
8989 let Predicates = [HasAVX512] in {
8990 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8991 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8992 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8993 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8995 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8996 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8999 let Predicates = [HasVLX] in {
9000 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9001 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9002 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9003 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9004 WriteCvtPS2PH, WriteCvtPS2PHSt>,
9005 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9007 def : Pat<(store (f64 (extractelt
9008 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9009 (iPTR 0))), addr:$dst),
9010 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9011 def : Pat<(store (i64 (extractelt
9012 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9013 (iPTR 0))), addr:$dst),
9014 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9015 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9016 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9019 // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9020 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9021 string OpcodeStr, Domain d,
9022 X86FoldableSchedWrite sched = WriteFComX> {
9023 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9024 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9025 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9026 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9029 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9030 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9031 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9032 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9033 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9034 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9035 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9036 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9037 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9040 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9041 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9042 "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9043 EVEX_CD8<32, CD8VT1>;
9044 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9045 "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9046 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9047 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9048 "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9049 EVEX_CD8<32, CD8VT1>;
9050 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9051 "comisd", SSEPackedDouble>, TB, PD, EVEX,
9052 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9053 let isCodeGenOnly = 1 in {
9054 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9055 sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9056 EVEX_CD8<32, CD8VT1>;
9057 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9058 sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9059 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9061 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9062 sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9063 EVEX_CD8<32, CD8VT1>;
9064 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9065 sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX,
9066 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9070 let Defs = [EFLAGS], Predicates = [HasFP16] in {
9071 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9072 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9073 EVEX_CD8<16, CD8VT1>;
9074 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9075 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9076 EVEX_CD8<16, CD8VT1>;
9077 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9078 "ucomish", SSEPackedSingle>, T_MAP5, EVEX,
9079 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9080 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9081 "comish", SSEPackedSingle>, T_MAP5, EVEX,
9082 VEX_LIG, EVEX_CD8<16, CD8VT1>;
9083 let isCodeGenOnly = 1 in {
9084 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9085 sse_load_f16, "ucomish", SSEPackedSingle>,
9086 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9088 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9089 sse_load_f16, "comish", SSEPackedSingle>,
9090 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9094 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9095 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9096 X86FoldableSchedWrite sched, X86VectorVTInfo _,
9097 Predicate prd = HasAVX512> {
9098 let Predicates = [prd], ExeDomain = _.ExeDomain in {
9099 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9100 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9101 "$src2, $src1", "$src1, $src2",
9102 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9103 EVEX, VVVV, VEX_LIG, Sched<[sched]>;
9104 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9105 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9106 "$src2, $src1", "$src1, $src2",
9107 (OpNode (_.VT _.RC:$src1),
9108 (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG,
9109 Sched<[sched.Folded, sched.ReadAfterFold]>;
9113 defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9114 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9116 defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9117 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9118 EVEX_CD8<16, CD8VT1>, T_MAP6, PD;
9119 let Uses = [MXCSR] in {
9120 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9121 f32x_info>, EVEX_CD8<32, CD8VT1>,
9123 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9124 f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
9126 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9127 SchedWriteFRsqrt.Scl, f32x_info>,
9128 EVEX_CD8<32, CD8VT1>, T8, PD;
9129 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9130 SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
9131 EVEX_CD8<64, CD8VT1>, T8, PD;
9134 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9135 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9136 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9137 let ExeDomain = _.ExeDomain in {
9138 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9139 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9140 (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD,
9142 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9143 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9145 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD,
9146 Sched<[sched.Folded, sched.ReadAfterFold]>;
9147 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9148 (ins _.ScalarMemOp:$src), OpcodeStr,
9149 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9151 (_.BroadcastLdFrag addr:$src)))>,
9152 EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9156 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9157 X86SchedWriteWidths sched> {
9158 let Uses = [MXCSR] in {
9159 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9160 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9161 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9162 v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9164 let Predicates = [HasFP16] in
9165 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9166 v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>;
9168 // Define only if AVX512VL feature is present.
9169 let Predicates = [HasVLX], Uses = [MXCSR] in {
9170 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9171 OpNode, sched.XMM, v4f32x_info>,
9172 EVEX_V128, EVEX_CD8<32, CD8VF>;
9173 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9174 OpNode, sched.YMM, v8f32x_info>,
9175 EVEX_V256, EVEX_CD8<32, CD8VF>;
9176 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9177 OpNode, sched.XMM, v2f64x_info>,
9178 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
9179 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9180 OpNode, sched.YMM, v4f64x_info>,
9181 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
9183 let Predicates = [HasFP16, HasVLX] in {
9184 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9185 OpNode, sched.XMM, v8f16x_info>,
9186 EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>;
9187 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9188 OpNode, sched.YMM, v16f16x_info>,
9189 EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>;
9193 defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9194 defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9196 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9197 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9198 SDNode OpNode, SDNode OpNodeSAE,
9199 X86FoldableSchedWrite sched> {
9200 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9201 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9202 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9203 "$src2, $src1", "$src1, $src2",
9204 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9205 Sched<[sched]>, SIMD_EXC;
9207 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9208 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9209 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9210 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9211 EVEX_B, Sched<[sched]>;
9213 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9214 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9215 "$src2, $src1", "$src1, $src2",
9216 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9217 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9221 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9222 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9223 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9224 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9225 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9226 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9229 multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9230 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9231 let Predicates = [HasFP16] in
9232 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>,
9233 EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
9236 let Predicates = [HasERI] in {
9237 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9238 SchedWriteFRcp.Scl>;
9239 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9240 SchedWriteFRsqrt.Scl>;
9243 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9244 SchedWriteFRnd.Scl>,
9245 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9246 SchedWriteFRnd.Scl>;
9247 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9249 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9250 SDNode OpNode, X86FoldableSchedWrite sched> {
9251 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9252 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9253 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9254 (OpNode (_.VT _.RC:$src))>,
9257 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9258 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9260 (bitconvert (_.LdFrag addr:$src))))>,
9261 Sched<[sched.Folded, sched.ReadAfterFold]>;
9263 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9264 (ins _.ScalarMemOp:$src), OpcodeStr,
9265 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9267 (_.BroadcastLdFrag addr:$src)))>,
9268 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9271 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9272 SDNode OpNode, X86FoldableSchedWrite sched> {
9273 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9274 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9275 (ins _.RC:$src), OpcodeStr,
9276 "{sae}, $src", "$src, {sae}",
9277 (OpNode (_.VT _.RC:$src))>,
9278 EVEX_B, Sched<[sched]>;
9281 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9282 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9283 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9284 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9285 T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9286 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9287 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9288 T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9291 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9292 SDNode OpNode, X86SchedWriteWidths sched> {
9293 // Define only if AVX512VL feature is present.
9294 let Predicates = [HasVLX] in {
9295 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9297 EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>;
9298 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9300 EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>;
9301 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9303 EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9304 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9306 EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9310 multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9311 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9312 let Predicates = [HasFP16] in
9313 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9314 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9315 T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9316 let Predicates = [HasFP16, HasVLX] in {
9317 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9318 EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9319 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9320 EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9323 let Predicates = [HasERI] in {
9324 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9325 SchedWriteFRsqrt>, EVEX;
9326 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9327 SchedWriteFRcp>, EVEX;
9328 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9329 SchedWriteFAdd>, EVEX;
9331 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9333 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9335 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9336 SchedWriteFRnd>, EVEX;
9338 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9339 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9340 let ExeDomain = _.ExeDomain in
9341 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9342 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9343 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9344 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9347 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9348 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9349 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9350 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9351 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9352 (_.VT (any_fsqrt _.RC:$src)),
9353 (_.VT (fsqrt _.RC:$src))>, EVEX,
9355 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9356 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9357 (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9358 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9359 Sched<[sched.Folded, sched.ReadAfterFold]>;
9360 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9361 (ins _.ScalarMemOp:$src), OpcodeStr,
9362 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9363 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9364 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9365 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9369 let Uses = [MXCSR], mayRaiseFPException = 1 in
9370 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9371 X86SchedWriteSizes sched> {
9372 let Predicates = [HasFP16] in
9373 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9374 sched.PH.ZMM, v32f16_info>,
9375 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9376 let Predicates = [HasFP16, HasVLX] in {
9377 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9378 sched.PH.XMM, v8f16x_info>,
9379 EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>;
9380 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9381 sched.PH.YMM, v16f16x_info>,
9382 EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>;
9384 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9385 sched.PS.ZMM, v16f32_info>,
9386 EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9387 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9388 sched.PD.ZMM, v8f64_info>,
9389 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9390 // Define only if AVX512VL feature is present.
9391 let Predicates = [HasVLX] in {
9392 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9393 sched.PS.XMM, v4f32x_info>,
9394 EVEX_V128, TB, EVEX_CD8<32, CD8VF>;
9395 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9396 sched.PS.YMM, v8f32x_info>,
9397 EVEX_V256, TB, EVEX_CD8<32, CD8VF>;
9398 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9399 sched.PD.XMM, v2f64x_info>,
9400 EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9401 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9402 sched.PD.YMM, v4f64x_info>,
9403 EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9407 let Uses = [MXCSR] in
9408 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9409 X86SchedWriteSizes sched> {
9410 let Predicates = [HasFP16] in
9411 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9412 sched.PH.ZMM, v32f16_info>,
9413 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9414 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9415 sched.PS.ZMM, v16f32_info>,
9416 EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9417 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9418 sched.PD.ZMM, v8f64_info>,
9419 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9422 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9423 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9424 let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9425 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9426 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9427 "$src2, $src1", "$src1, $src2",
9428 (X86fsqrts (_.VT _.RC:$src1),
9429 (_.VT _.RC:$src2))>,
9430 Sched<[sched]>, SIMD_EXC;
9431 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9432 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9433 "$src2, $src1", "$src1, $src2",
9434 (X86fsqrts (_.VT _.RC:$src1),
9435 (_.ScalarIntMemFrags addr:$src2))>,
9436 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9437 let Uses = [MXCSR] in
9438 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9439 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9440 "$rc, $src2, $src1", "$src1, $src2, $rc",
9441 (X86fsqrtRnds (_.VT _.RC:$src1),
9444 EVEX_B, EVEX_RC, Sched<[sched]>;
9446 let isCodeGenOnly = 1, hasSideEffects = 0 in {
9447 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9448 (ins _.FRC:$src1, _.FRC:$src2),
9449 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9450 Sched<[sched]>, SIMD_EXC;
9452 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9453 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9454 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9455 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9459 let Predicates = [prd] in {
9460 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9461 (!cast<Instruction>(Name#Zr)
9462 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9465 let Predicates = [prd, OptForSize] in {
9466 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9467 (!cast<Instruction>(Name#Zm)
9468 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9472 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9473 X86SchedWriteSizes sched> {
9474 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9475 EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS;
9476 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9477 EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS;
9478 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9479 EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W;
9482 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9483 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9485 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9487 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9488 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9489 let ExeDomain = _.ExeDomain in {
9490 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9491 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9492 "$src3, $src2, $src1", "$src1, $src2, $src3",
9493 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9494 (i32 timm:$src3)))>,
9495 Sched<[sched]>, SIMD_EXC;
9497 let Uses = [MXCSR] in
9498 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9499 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9500 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9501 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9502 (i32 timm:$src3)))>, EVEX_B,
9505 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9506 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9508 "$src3, $src2, $src1", "$src1, $src2, $src3",
9509 (_.VT (X86RndScales _.RC:$src1,
9510 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9511 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9513 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9514 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9515 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9516 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9517 []>, Sched<[sched]>, SIMD_EXC;
9520 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9521 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9522 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9523 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9527 let Predicates = [HasAVX512] in {
9528 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9529 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9530 _.FRC:$src1, timm:$src2))>;
9533 let Predicates = [HasAVX512, OptForSize] in {
9534 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9535 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9536 addr:$src1, timm:$src2))>;
9540 let Predicates = [HasFP16] in
9541 defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9542 SchedWriteFRnd.Scl, f16x_info>,
9543 AVX512PSIi8Base, TA, EVEX, VVVV,
9544 EVEX_CD8<16, CD8VT1>;
9546 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9547 SchedWriteFRnd.Scl, f32x_info>,
9548 AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9549 EVEX_CD8<32, CD8VT1>;
9551 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9552 SchedWriteFRnd.Scl, f64x_info>,
9553 REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9554 EVEX_CD8<64, CD8VT1>;
9556 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9557 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9558 dag OutMask, Predicate BasePredicate> {
9559 let Predicates = [BasePredicate] in {
9560 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9561 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9562 (extractelt _.VT:$dst, (iPTR 0))))),
9563 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9564 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9566 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9567 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9569 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9570 OutMask, _.VT:$src2, _.VT:$src1)>;
9574 defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9575 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9576 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>;
9577 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9578 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9579 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9580 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9581 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9582 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9585 //-------------------------------------------------
9586 // Integer truncate and extend operations
9587 //-------------------------------------------------
9589 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9590 SDPatternOperator MaskNode,
9591 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9592 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9593 let ExeDomain = DestInfo.ExeDomain in {
9594 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9595 (ins SrcInfo.RC:$src),
9596 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9597 [(set DestInfo.RC:$dst,
9598 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9599 EVEX, Sched<[sched]>;
9600 let Constraints = "$src0 = $dst" in
9601 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9602 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9603 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9604 [(set DestInfo.RC:$dst,
9605 (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9606 (DestInfo.VT DestInfo.RC:$src0),
9607 SrcInfo.KRCWM:$mask))]>,
9608 EVEX, EVEX_K, Sched<[sched]>;
9609 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9610 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9611 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9612 [(set DestInfo.RC:$dst,
9613 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9614 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9615 EVEX, EVEX_KZ, Sched<[sched]>;
9618 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9619 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9620 (ins x86memop:$dst, SrcInfo.RC:$src),
9621 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9622 EVEX, Sched<[sched.Folded]>;
9624 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9625 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9626 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9627 EVEX, EVEX_K, Sched<[sched.Folded]>;
9628 }//mayStore = 1, hasSideEffects = 0
9631 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9632 PatFrag truncFrag, PatFrag mtruncFrag,
9635 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9636 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9637 addr:$dst, SrcInfo.RC:$src)>;
9639 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9640 SrcInfo.KRCWM:$mask),
9641 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9642 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9645 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9646 SDNode OpNode256, SDNode OpNode512,
9647 SDPatternOperator MaskNode128,
9648 SDPatternOperator MaskNode256,
9649 SDPatternOperator MaskNode512,
9650 X86SchedWriteWidths sched,
9651 AVX512VLVectorVTInfo VTSrcInfo,
9652 X86VectorVTInfo DestInfoZ128,
9653 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9654 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9655 X86MemOperand x86memopZ, PatFrag truncFrag,
9656 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9658 let Predicates = [HasVLX, prd] in {
9659 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9660 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9661 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9662 mtruncFrag, NAME>, EVEX_V128;
9664 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9665 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9666 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9667 mtruncFrag, NAME>, EVEX_V256;
9669 let Predicates = [prd] in
9670 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9671 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9672 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9673 mtruncFrag, NAME>, EVEX_V512;
9676 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9677 X86SchedWriteWidths sched, PatFrag StoreNode,
9678 PatFrag MaskedStoreNode, SDNode InVecNode,
9679 SDPatternOperator InVecMaskNode> {
9680 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9681 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9682 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9683 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9684 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9687 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9688 SDPatternOperator MaskNode,
9689 X86SchedWriteWidths sched, PatFrag StoreNode,
9690 PatFrag MaskedStoreNode, SDNode InVecNode,
9691 SDPatternOperator InVecMaskNode> {
9692 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9693 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9694 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9695 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9696 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9699 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9700 SDPatternOperator MaskNode,
9701 X86SchedWriteWidths sched, PatFrag StoreNode,
9702 PatFrag MaskedStoreNode, SDNode InVecNode,
9703 SDPatternOperator InVecMaskNode> {
9704 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9705 InVecMaskNode, MaskNode, MaskNode, sched,
9706 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9707 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9708 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9711 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9712 SDPatternOperator MaskNode,
9713 X86SchedWriteWidths sched, PatFrag StoreNode,
9714 PatFrag MaskedStoreNode, SDNode InVecNode,
9715 SDPatternOperator InVecMaskNode> {
9716 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9717 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9718 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9719 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9720 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9723 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9724 SDPatternOperator MaskNode,
9725 X86SchedWriteWidths sched, PatFrag StoreNode,
9726 PatFrag MaskedStoreNode, SDNode InVecNode,
9727 SDPatternOperator InVecMaskNode> {
9728 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9729 InVecMaskNode, MaskNode, MaskNode, sched,
9730 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9731 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9732 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9735 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9736 SDPatternOperator MaskNode,
9737 X86SchedWriteWidths sched, PatFrag StoreNode,
9738 PatFrag MaskedStoreNode, SDNode InVecNode,
9739 SDPatternOperator InVecMaskNode> {
9740 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9741 InVecMaskNode, MaskNode, MaskNode, sched,
9742 avx512vl_i16_info, v16i8x_info, v16i8x_info,
9743 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9744 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9747 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb",
9748 SchedWriteVecTruncate, truncstorevi8,
9749 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9750 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb",
9751 SchedWriteVecTruncate, truncstore_s_vi8,
9752 masked_truncstore_s_vi8, X86vtruncs,
9754 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb",
9755 SchedWriteVecTruncate, truncstore_us_vi8,
9756 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
9758 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9759 SchedWriteVecTruncate, truncstorevi16,
9760 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9761 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
9762 SchedWriteVecTruncate, truncstore_s_vi16,
9763 masked_truncstore_s_vi16, X86vtruncs,
9765 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9766 select_truncus, SchedWriteVecTruncate,
9767 truncstore_us_vi16, masked_truncstore_us_vi16,
9768 X86vtruncus, X86vmtruncus>;
9770 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9771 SchedWriteVecTruncate, truncstorevi32,
9772 masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9773 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
9774 SchedWriteVecTruncate, truncstore_s_vi32,
9775 masked_truncstore_s_vi32, X86vtruncs,
9777 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9778 select_truncus, SchedWriteVecTruncate,
9779 truncstore_us_vi32, masked_truncstore_us_vi32,
9780 X86vtruncus, X86vmtruncus>;
9782 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9783 SchedWriteVecTruncate, truncstorevi8,
9784 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9785 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9786 SchedWriteVecTruncate, truncstore_s_vi8,
9787 masked_truncstore_s_vi8, X86vtruncs,
9789 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
9790 select_truncus, SchedWriteVecTruncate,
9791 truncstore_us_vi8, masked_truncstore_us_vi8,
9792 X86vtruncus, X86vmtruncus>;
9794 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9795 SchedWriteVecTruncate, truncstorevi16,
9796 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9797 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9798 SchedWriteVecTruncate, truncstore_s_vi16,
9799 masked_truncstore_s_vi16, X86vtruncs,
9801 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9802 select_truncus, SchedWriteVecTruncate,
9803 truncstore_us_vi16, masked_truncstore_us_vi16,
9804 X86vtruncus, X86vmtruncus>;
9806 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9807 SchedWriteVecTruncate, truncstorevi8,
9808 masked_truncstorevi8, X86vtrunc,
9810 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9811 SchedWriteVecTruncate, truncstore_s_vi8,
9812 masked_truncstore_s_vi8, X86vtruncs,
9814 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9815 select_truncus, SchedWriteVecTruncate,
9816 truncstore_us_vi8, masked_truncstore_us_vi8,
9817 X86vtruncus, X86vmtruncus>;
9819 let Predicates = [HasAVX512, NoVLX] in {
9820 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9821 (v8i16 (EXTRACT_SUBREG
9822 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9823 VR256X:$src, sub_ymm)))), sub_xmm))>;
9824 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9825 (v4i32 (EXTRACT_SUBREG
9826 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9827 VR256X:$src, sub_ymm)))), sub_xmm))>;
9830 let Predicates = [HasBWI, NoVLX] in {
9831 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9832 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9833 VR256X:$src, sub_ymm))), sub_xmm))>;
9836 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9837 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9838 X86VectorVTInfo DestInfo,
9839 X86VectorVTInfo SrcInfo> {
9840 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9842 SrcInfo.KRCWM:$mask)),
9843 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9844 SrcInfo.KRCWM:$mask,
9847 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9848 DestInfo.ImmAllZerosV,
9849 SrcInfo.KRCWM:$mask)),
9850 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9854 let Predicates = [HasVLX] in {
9855 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9856 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9857 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9860 let Predicates = [HasAVX512] in {
9861 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9862 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9863 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9865 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9866 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9867 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9869 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9870 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9871 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9874 multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9875 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9876 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9877 let ExeDomain = DestInfo.ExeDomain in {
9878 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9879 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9880 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9881 EVEX, Sched<[sched]>;
9883 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9884 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9885 (DestInfo.VT (LdFrag addr:$src))>,
9886 EVEX, Sched<[sched.Folded]>;
9890 multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
9891 SDNode OpNode, SDNode InVecNode, string ExtTy,
9892 X86SchedWriteWidths sched,
9893 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9894 let Predicates = [HasVLX, HasBWI] in {
9895 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
9896 v16i8x_info, i64mem, LdFrag, InVecNode>,
9897 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG;
9899 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
9900 v16i8x_info, i128mem, LdFrag, OpNode>,
9901 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG;
9903 let Predicates = [HasBWI] in {
9904 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
9905 v32i8x_info, i256mem, LdFrag, OpNode>,
9906 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG;
9910 multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
9911 SDNode OpNode, SDNode InVecNode, string ExtTy,
9912 X86SchedWriteWidths sched,
9913 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9914 let Predicates = [HasVLX, HasAVX512] in {
9915 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
9916 v16i8x_info, i32mem, LdFrag, InVecNode>,
9917 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG;
9919 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
9920 v16i8x_info, i64mem, LdFrag, InVecNode>,
9921 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG;
9923 let Predicates = [HasAVX512] in {
9924 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
9925 v16i8x_info, i128mem, LdFrag, OpNode>,
9926 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG;
9930 multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
9931 SDNode InVecNode, string ExtTy,
9932 X86SchedWriteWidths sched,
9933 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9934 let Predicates = [HasVLX, HasAVX512] in {
9935 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
9936 v16i8x_info, i16mem, LdFrag, InVecNode>,
9937 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG;
9939 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
9940 v16i8x_info, i32mem, LdFrag, InVecNode>,
9941 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG;
9943 let Predicates = [HasAVX512] in {
9944 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
9945 v16i8x_info, i64mem, LdFrag, InVecNode>,
9946 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG;
9950 multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
9951 SDNode OpNode, SDNode InVecNode, string ExtTy,
9952 X86SchedWriteWidths sched,
9953 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9954 let Predicates = [HasVLX, HasAVX512] in {
9955 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
9956 v8i16x_info, i64mem, LdFrag, InVecNode>,
9957 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG;
9959 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
9960 v8i16x_info, i128mem, LdFrag, OpNode>,
9961 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG;
9963 let Predicates = [HasAVX512] in {
9964 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
9965 v16i16x_info, i256mem, LdFrag, OpNode>,
9966 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG;
9970 multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
9971 SDNode OpNode, SDNode InVecNode, string ExtTy,
9972 X86SchedWriteWidths sched,
9973 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9974 let Predicates = [HasVLX, HasAVX512] in {
9975 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
9976 v8i16x_info, i32mem, LdFrag, InVecNode>,
9977 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG;
9979 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
9980 v8i16x_info, i64mem, LdFrag, InVecNode>,
9981 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG;
9983 let Predicates = [HasAVX512] in {
9984 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
9985 v8i16x_info, i128mem, LdFrag, OpNode>,
9986 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG;
9990 multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
9991 SDNode OpNode, SDNode InVecNode, string ExtTy,
9992 X86SchedWriteWidths sched,
9993 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9995 let Predicates = [HasVLX, HasAVX512] in {
9996 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
9997 v4i32x_info, i64mem, LdFrag, InVecNode>,
9998 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128;
10000 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10001 v4i32x_info, i128mem, LdFrag, OpNode>,
10002 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256;
10004 let Predicates = [HasAVX512] in {
10005 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10006 v8i32x_info, i256mem, LdFrag, OpNode>,
10007 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512;
10011 defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10012 defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10013 defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>;
10014 defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10015 defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10016 defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10018 defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10019 defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10020 defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>;
10021 defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10022 defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10023 defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10026 // Patterns that we also need any extend versions of. aext_vector_inreg
10027 // is currently legalized to zext_vector_inreg.
10028 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10029 // 256-bit patterns
10030 let Predicates = [HasVLX, HasBWI] in {
10031 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10032 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10035 let Predicates = [HasVLX] in {
10036 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10037 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10039 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10040 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10043 // 512-bit patterns
10044 let Predicates = [HasBWI] in {
10045 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10046 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10048 let Predicates = [HasAVX512] in {
10049 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10050 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10051 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10052 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10054 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10055 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10057 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10058 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10062 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10064 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10065 // 128-bit patterns
10066 let Predicates = [HasVLX, HasBWI] in {
10067 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10068 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10069 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10070 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10071 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10072 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10074 let Predicates = [HasVLX] in {
10075 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10076 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10077 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10078 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10080 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10081 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10083 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10084 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10085 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10086 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10087 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10088 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10090 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10091 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10092 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10093 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10095 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10096 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10097 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10098 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10099 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10100 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10102 let Predicates = [HasVLX] in {
10103 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10104 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10105 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10106 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10107 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10108 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10110 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10111 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10112 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10113 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10115 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10116 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10117 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10118 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10119 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10120 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10122 // 512-bit patterns
10123 let Predicates = [HasAVX512] in {
10124 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10125 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10126 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10127 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10128 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10129 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10133 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10134 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10136 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10137 // ext+trunc aggressively making it impossible to legalize the DAG to this
10138 // pattern directly.
10139 let Predicates = [HasAVX512, NoBWI] in {
10140 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10141 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10142 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10143 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10146 //===----------------------------------------------------------------------===//
10147 // GATHER - SCATTER Operations
10149 // FIXME: Improve scheduling of gather/scatter instructions.
10150 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10151 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10152 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10153 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10154 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10155 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10156 !strconcat(OpcodeStr#_.Suffix,
10157 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10158 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10159 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10162 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10163 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10164 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10165 vy512xmem>, EVEX_V512, REX_W;
10166 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10167 vz512mem>, EVEX_V512, REX_W;
10168 let Predicates = [HasVLX] in {
10169 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10170 vx256xmem>, EVEX_V256, REX_W;
10171 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10172 vy256xmem>, EVEX_V256, REX_W;
10173 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10174 vx128xmem>, EVEX_V128, REX_W;
10175 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10176 vx128xmem>, EVEX_V128, REX_W;
10180 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10181 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10182 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10184 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10186 let Predicates = [HasVLX] in {
10187 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10188 vy256xmem>, EVEX_V256;
10189 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10190 vy128xmem>, EVEX_V256;
10191 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10192 vx128xmem>, EVEX_V128;
10193 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10194 vx64xmem, VK2WM>, EVEX_V128;
10199 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10200 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10202 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10203 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10205 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10206 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10208 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10209 hasSideEffects = 0 in
10211 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10212 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10213 !strconcat(OpcodeStr#_.Suffix,
10214 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10215 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10216 Sched<[WriteStore]>;
10219 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10220 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10221 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10222 vy512xmem>, EVEX_V512, REX_W;
10223 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10224 vz512mem>, EVEX_V512, REX_W;
10225 let Predicates = [HasVLX] in {
10226 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10227 vx256xmem>, EVEX_V256, REX_W;
10228 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10229 vy256xmem>, EVEX_V256, REX_W;
10230 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10231 vx128xmem>, EVEX_V128, REX_W;
10232 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10233 vx128xmem>, EVEX_V128, REX_W;
10237 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10238 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10239 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10241 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10243 let Predicates = [HasVLX] in {
10244 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10245 vy256xmem>, EVEX_V256;
10246 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10247 vy128xmem>, EVEX_V256;
10248 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10249 vx128xmem>, EVEX_V128;
10250 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10251 vx64xmem, VK2WM>, EVEX_V128;
10255 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10256 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10258 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10259 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10262 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10263 RegisterClass KRC, X86MemOperand memop> {
10264 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10265 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10266 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10267 EVEX, EVEX_K, Sched<[WriteLoad]>;
10270 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10271 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10273 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10274 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10276 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10277 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10279 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10280 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10282 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10283 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10285 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10286 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10288 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10289 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10291 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10292 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10294 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10295 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10297 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10298 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10300 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10301 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10303 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10304 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10306 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10307 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10309 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10310 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10312 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10313 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10315 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10316 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10318 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10319 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10320 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10321 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10322 EVEX, Sched<[Sched]>;
10325 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10326 string OpcodeStr, Predicate prd> {
10327 let Predicates = [prd] in
10328 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10330 let Predicates = [prd, HasVLX] in {
10331 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10332 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10336 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10337 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
10338 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10339 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
10341 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10342 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10343 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10344 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10345 EVEX, Sched<[WriteMove]>;
10348 // Use 512bit version to implement 128/256 bit in case NoVLX.
10349 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10353 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10354 (_.KVT (COPY_TO_REGCLASS
10355 (!cast<Instruction>(Name#"Zrr")
10356 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10357 _.RC:$src, _.SubRegIdx)),
10361 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10362 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10363 let Predicates = [prd] in
10364 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10367 let Predicates = [prd, HasVLX] in {
10368 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10370 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10373 let Predicates = [prd, NoVLX] in {
10374 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10375 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10379 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10380 avx512vl_i8_info, HasBWI>;
10381 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10382 avx512vl_i16_info, HasBWI>, REX_W;
10383 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10384 avx512vl_i32_info, HasDQI>;
10385 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10386 avx512vl_i64_info, HasDQI>, REX_W;
10388 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10389 // is available, but BWI is not. We can't handle this in lowering because
10390 // a target independent DAG combine likes to combine sext and trunc.
10391 let Predicates = [HasDQI, NoBWI] in {
10392 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10393 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10394 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10395 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10398 let Predicates = [HasDQI, NoBWI, HasVLX] in {
10399 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10400 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10403 //===----------------------------------------------------------------------===//
10404 // AVX-512 - COMPRESS and EXPAND
10407 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10408 string OpcodeStr, X86FoldableSchedWrite sched> {
10409 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10410 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10411 (null_frag)>, AVX5128IBase,
10414 let mayStore = 1, hasSideEffects = 0 in
10415 def mr : AVX5128I<opc, MRMDestMem, (outs),
10416 (ins _.MemOp:$dst, _.RC:$src),
10417 OpcodeStr # "\t{$src, $dst|$dst, $src}",
10418 []>, EVEX_CD8<_.EltSize, CD8VT1>,
10419 Sched<[sched.Folded]>;
10421 def mrk : AVX5128I<opc, MRMDestMem, (outs),
10422 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10423 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10425 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10426 Sched<[sched.Folded]>;
10429 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10430 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10431 (!cast<Instruction>(Name#_.ZSuffix#mrk)
10432 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10434 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10435 (!cast<Instruction>(Name#_.ZSuffix#rrk)
10436 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10437 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10438 (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10439 _.KRCWM:$mask, _.RC:$src)>;
10442 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10443 X86FoldableSchedWrite sched,
10444 AVX512VLVectorVTInfo VTInfo,
10445 Predicate Pred = HasAVX512> {
10446 let Predicates = [Pred] in
10447 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10448 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10450 let Predicates = [Pred, HasVLX] in {
10451 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10452 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10453 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10454 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10458 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10459 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10460 avx512vl_i32_info>, EVEX;
10461 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10462 avx512vl_i64_info>, EVEX, REX_W;
10463 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10464 avx512vl_f32_info>, EVEX;
10465 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10466 avx512vl_f64_info>, EVEX, REX_W;
10469 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10470 string OpcodeStr, X86FoldableSchedWrite sched> {
10471 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10472 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10473 (null_frag)>, AVX5128IBase,
10476 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10477 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10479 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10480 Sched<[sched.Folded, sched.ReadAfterFold]>;
10483 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10485 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10486 (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10487 _.KRCWM:$mask, addr:$src)>;
10489 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10490 (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10491 _.KRCWM:$mask, addr:$src)>;
10493 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10494 (_.VT _.RC:$src0))),
10495 (!cast<Instruction>(Name#_.ZSuffix#rmk)
10496 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10498 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10499 (!cast<Instruction>(Name#_.ZSuffix#rrk)
10500 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10501 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10502 (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10503 _.KRCWM:$mask, _.RC:$src)>;
10506 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10507 X86FoldableSchedWrite sched,
10508 AVX512VLVectorVTInfo VTInfo,
10509 Predicate Pred = HasAVX512> {
10510 let Predicates = [Pred] in
10511 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10512 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10514 let Predicates = [Pred, HasVLX] in {
10515 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10516 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10517 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10518 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10522 // FIXME: Is there a better scheduler class for VPEXPAND?
10523 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10524 avx512vl_i32_info>, EVEX;
10525 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10526 avx512vl_i64_info>, EVEX, REX_W;
10527 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10528 avx512vl_f32_info>, EVEX;
10529 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10530 avx512vl_f64_info>, EVEX, REX_W;
10532 //handle instruction reg_vec1 = op(reg_vec,imm)
10534 // op(broadcast(eltVt),imm)
10535 //all instruction created with FROUND_CURRENT
10536 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10537 SDPatternOperator OpNode,
10538 SDPatternOperator MaskOpNode,
10539 X86FoldableSchedWrite sched,
10540 X86VectorVTInfo _> {
10541 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10542 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10543 (ins _.RC:$src1, i32u8imm:$src2),
10544 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10545 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10546 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10548 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10549 (ins _.MemOp:$src1, i32u8imm:$src2),
10550 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10551 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10553 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10554 (i32 timm:$src2))>,
10555 Sched<[sched.Folded, sched.ReadAfterFold]>;
10556 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10557 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10558 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10559 "${src1}"#_.BroadcastStr#", $src2",
10560 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10562 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10563 (i32 timm:$src2))>, EVEX_B,
10564 Sched<[sched.Folded, sched.ReadAfterFold]>;
10568 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10569 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10570 SDNode OpNode, X86FoldableSchedWrite sched,
10571 X86VectorVTInfo _> {
10572 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10573 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10574 (ins _.RC:$src1, i32u8imm:$src2),
10575 OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10576 "$src1, {sae}, $src2",
10577 (OpNode (_.VT _.RC:$src1),
10578 (i32 timm:$src2))>,
10579 EVEX_B, Sched<[sched]>;
10582 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10583 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10584 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10586 let Predicates = [prd] in {
10587 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10588 sched.ZMM, _.info512>,
10589 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10590 sched.ZMM, _.info512>, EVEX_V512;
10592 let Predicates = [prd, HasVLX] in {
10593 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10594 sched.XMM, _.info128>, EVEX_V128;
10595 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10596 sched.YMM, _.info256>, EVEX_V256;
10600 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10601 // op(reg_vec2,mem_vec,imm)
10602 // op(reg_vec2,broadcast(eltVt),imm)
10603 //all instruction created with FROUND_CURRENT
10604 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10605 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10606 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10607 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10608 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10609 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10610 (OpNode (_.VT _.RC:$src1),
10612 (i32 timm:$src3))>,
10614 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10615 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10616 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10617 (OpNode (_.VT _.RC:$src1),
10618 (_.VT (bitconvert (_.LdFrag addr:$src2))),
10619 (i32 timm:$src3))>,
10620 Sched<[sched.Folded, sched.ReadAfterFold]>;
10621 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10622 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10623 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10624 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10625 (OpNode (_.VT _.RC:$src1),
10626 (_.VT (_.BroadcastLdFrag addr:$src2)),
10627 (i32 timm:$src3))>, EVEX_B,
10628 Sched<[sched.Folded, sched.ReadAfterFold]>;
10632 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10633 // op(reg_vec2,mem_vec,imm)
10634 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10635 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10636 X86VectorVTInfo SrcInfo>{
10637 let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in {
10638 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10639 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10640 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10641 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10642 (SrcInfo.VT SrcInfo.RC:$src2),
10643 (i8 timm:$src3)))>,
10645 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10646 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10647 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10648 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10649 (SrcInfo.VT (bitconvert
10650 (SrcInfo.LdFrag addr:$src2))),
10651 (i8 timm:$src3)))>,
10652 Sched<[sched.Folded, sched.ReadAfterFold]>;
10656 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10657 // op(reg_vec2,mem_vec,imm)
10658 // op(reg_vec2,broadcast(eltVt),imm)
10659 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10660 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10661 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10663 let ExeDomain = _.ExeDomain, ImmT = Imm8 in
10664 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10665 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10666 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10667 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10668 (OpNode (_.VT _.RC:$src1),
10669 (_.VT (_.BroadcastLdFrag addr:$src2)),
10670 (i8 timm:$src3))>, EVEX_B,
10671 Sched<[sched.Folded, sched.ReadAfterFold]>;
10674 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10675 // op(reg_vec2,mem_scalar,imm)
10676 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10677 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10678 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10679 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10680 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10681 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10682 (OpNode (_.VT _.RC:$src1),
10684 (i32 timm:$src3))>,
10686 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10687 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10688 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10689 (OpNode (_.VT _.RC:$src1),
10690 (_.ScalarIntMemFrags addr:$src2),
10691 (i32 timm:$src3))>,
10692 Sched<[sched.Folded, sched.ReadAfterFold]>;
10696 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10697 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10698 SDNode OpNode, X86FoldableSchedWrite sched,
10699 X86VectorVTInfo _> {
10700 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10701 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10702 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10703 OpcodeStr, "$src3, {sae}, $src2, $src1",
10704 "$src1, $src2, {sae}, $src3",
10705 (OpNode (_.VT _.RC:$src1),
10707 (i32 timm:$src3))>,
10708 EVEX_B, Sched<[sched]>;
10711 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10712 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10713 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10714 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10715 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10716 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10717 OpcodeStr, "$src3, {sae}, $src2, $src1",
10718 "$src1, $src2, {sae}, $src3",
10719 (OpNode (_.VT _.RC:$src1),
10721 (i32 timm:$src3))>,
10722 EVEX_B, Sched<[sched]>;
10725 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10726 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10727 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10728 let Predicates = [prd] in {
10729 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10730 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10734 let Predicates = [prd, HasVLX] in {
10735 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10737 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10742 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10743 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10744 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10745 let Predicates = [Pred] in {
10746 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10747 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV;
10749 let Predicates = [Pred, HasVLX] in {
10750 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10751 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV;
10752 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10753 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV;
10757 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10758 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10759 Predicate Pred = HasAVX512> {
10760 let Predicates = [Pred] in {
10761 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10764 let Predicates = [Pred, HasVLX] in {
10765 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10767 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10772 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10773 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10774 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10775 let Predicates = [prd] in {
10776 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10777 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10781 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10782 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10783 SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10784 X86SchedWriteWidths sched, Predicate prd>{
10785 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10786 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
10787 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
10788 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10789 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10790 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
10791 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10792 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10793 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
10796 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10797 X86VReduce, X86VReduce, X86VReduceSAE,
10798 SchedWriteFRnd, HasDQI>;
10799 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10800 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10801 SchedWriteFRnd, HasAVX512>;
10802 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10803 X86VGetMant, X86VGetMant, X86VGetMantSAE,
10804 SchedWriteFRnd, HasAVX512>;
10806 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10807 0x50, X86VRange, X86VRangeSAE,
10808 SchedWriteFAdd, HasDQI>,
10809 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10810 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10811 0x50, X86VRange, X86VRangeSAE,
10812 SchedWriteFAdd, HasDQI>,
10813 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10815 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10816 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10817 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10818 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10819 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10820 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10822 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10823 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10824 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10825 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10826 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10827 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10828 defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
10829 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
10830 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10832 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10833 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10834 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10835 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10836 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10837 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10838 defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
10839 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
10840 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10842 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10843 X86FoldableSchedWrite sched,
10845 X86VectorVTInfo CastInfo> {
10846 let ExeDomain = _.ExeDomain in {
10847 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10848 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10849 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10851 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10852 (i8 timm:$src3)))))>,
10854 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10855 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10856 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10859 (CastInfo.VT (X86Shuf128 _.RC:$src1,
10860 (CastInfo.LdFrag addr:$src2),
10861 (i8 timm:$src3)))))>,
10862 Sched<[sched.Folded, sched.ReadAfterFold]>;
10863 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10864 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10865 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10866 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10870 (X86Shuf128 _.RC:$src1,
10871 (_.BroadcastLdFrag addr:$src2),
10872 (i8 timm:$src3)))))>, EVEX_B,
10873 Sched<[sched.Folded, sched.ReadAfterFold]>;
10877 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10878 AVX512VLVectorVTInfo _,
10879 AVX512VLVectorVTInfo CastInfo, bits<8> opc>{
10880 let Predicates = [HasAVX512] in
10881 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10882 _.info512, CastInfo.info512>, EVEX_V512;
10884 let Predicates = [HasAVX512, HasVLX] in
10885 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10886 _.info256, CastInfo.info256>, EVEX_V256;
10889 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10890 avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10891 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10892 avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10893 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10894 avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10895 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10896 avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10898 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10899 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10900 let ExeDomain = _.ExeDomain in {
10901 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10902 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10903 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10904 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10906 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10907 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10908 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10909 (_.VT (X86VAlign _.RC:$src1,
10910 (bitconvert (_.LdFrag addr:$src2)),
10911 (i8 timm:$src3)))>,
10912 Sched<[sched.Folded, sched.ReadAfterFold]>;
10914 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10915 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10916 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10917 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10918 (X86VAlign _.RC:$src1,
10919 (_.VT (_.BroadcastLdFrag addr:$src2)),
10920 (i8 timm:$src3))>, EVEX_B,
10921 Sched<[sched.Folded, sched.ReadAfterFold]>;
10925 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10926 AVX512VLVectorVTInfo _> {
10927 let Predicates = [HasAVX512] in {
10928 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10929 AVX512AIi8Base, EVEX, VVVV, EVEX_V512;
10931 let Predicates = [HasAVX512, HasVLX] in {
10932 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10933 AVX512AIi8Base, EVEX, VVVV, EVEX_V128;
10934 // We can't really override the 256-bit version so change it back to unset.
10935 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10936 AVX512AIi8Base, EVEX, VVVV, EVEX_V256;
10940 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10941 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10942 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10943 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10946 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10947 SchedWriteShuffle, avx512vl_i8_info,
10948 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10950 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10952 def ValignqImm32XForm : SDNodeXForm<timm, [{
10953 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10955 def ValignqImm8XForm : SDNodeXForm<timm, [{
10956 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10958 def ValigndImm8XForm : SDNodeXForm<timm, [{
10959 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10962 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10963 X86VectorVTInfo From, X86VectorVTInfo To,
10964 SDNodeXForm ImmXForm> {
10965 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10967 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10970 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10971 To.RC:$src1, To.RC:$src2,
10972 (ImmXForm timm:$src3))>;
10974 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10976 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10979 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10980 To.RC:$src1, To.RC:$src2,
10981 (ImmXForm timm:$src3))>;
10983 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10985 (From.VT (OpNode From.RC:$src1,
10986 (From.LdFrag addr:$src2),
10989 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10990 To.RC:$src1, addr:$src2,
10991 (ImmXForm timm:$src3))>;
10993 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10995 (From.VT (OpNode From.RC:$src1,
10996 (From.LdFrag addr:$src2),
10999 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11000 To.RC:$src1, addr:$src2,
11001 (ImmXForm timm:$src3))>;
11004 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11005 X86VectorVTInfo From,
11006 X86VectorVTInfo To,
11007 SDNodeXForm ImmXForm> :
11008 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11009 def : Pat<(From.VT (OpNode From.RC:$src1,
11010 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11012 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11013 (ImmXForm timm:$src3))>;
11015 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11017 (From.VT (OpNode From.RC:$src1,
11019 (To.VT (To.BroadcastLdFrag addr:$src2))),
11022 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11023 To.RC:$src1, addr:$src2,
11024 (ImmXForm timm:$src3))>;
11026 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11028 (From.VT (OpNode From.RC:$src1,
11030 (To.VT (To.BroadcastLdFrag addr:$src2))),
11033 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11034 To.RC:$src1, addr:$src2,
11035 (ImmXForm timm:$src3))>;
11038 let Predicates = [HasAVX512] in {
11039 // For 512-bit we lower to the widest element type we can. So we only need
11040 // to handle converting valignq to valignd.
11041 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11042 v16i32_info, ValignqImm32XForm>;
11045 let Predicates = [HasVLX] in {
11046 // For 128-bit we lower to the widest element type we can. So we only need
11047 // to handle converting valignq to valignd.
11048 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11049 v4i32x_info, ValignqImm32XForm>;
11050 // For 256-bit we lower to the widest element type we can. So we only need
11051 // to handle converting valignq to valignd.
11052 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11053 v8i32x_info, ValignqImm32XForm>;
11056 let Predicates = [HasVLX, HasBWI] in {
11057 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11058 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11059 v16i8x_info, ValignqImm8XForm>;
11060 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11061 v16i8x_info, ValigndImm8XForm>;
11064 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11065 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11066 EVEX_CD8<8, CD8VF>;
11068 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11069 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11070 let ExeDomain = _.ExeDomain in {
11071 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11072 (ins _.RC:$src1), OpcodeStr,
11074 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11077 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11078 (ins _.MemOp:$src1), OpcodeStr,
11080 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11081 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11082 Sched<[sched.Folded]>;
11086 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11087 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11088 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11089 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11090 (ins _.ScalarMemOp:$src1), OpcodeStr,
11091 "${src1}"#_.BroadcastStr,
11092 "${src1}"#_.BroadcastStr,
11093 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11094 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11095 Sched<[sched.Folded]>;
11098 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11099 X86SchedWriteWidths sched,
11100 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11101 let Predicates = [prd] in
11102 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11105 let Predicates = [prd, HasVLX] in {
11106 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11108 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11113 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11114 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11116 let Predicates = [prd] in
11117 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11120 let Predicates = [prd, HasVLX] in {
11121 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11123 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11128 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11129 SDNode OpNode, X86SchedWriteWidths sched,
11131 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11132 avx512vl_i64_info, prd>, REX_W;
11133 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11134 avx512vl_i32_info, prd>;
11137 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11138 SDNode OpNode, X86SchedWriteWidths sched,
11140 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11141 avx512vl_i16_info, prd>, WIG;
11142 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11143 avx512vl_i8_info, prd>, WIG;
11146 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11147 bits<8> opc_d, bits<8> opc_q,
11148 string OpcodeStr, SDNode OpNode,
11149 X86SchedWriteWidths sched> {
11150 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11152 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11156 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11159 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11160 let Predicates = [HasAVX512, NoVLX] in {
11161 def : Pat<(v4i64 (abs VR256X:$src)),
11164 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11166 def : Pat<(v2i64 (abs VR128X:$src)),
11169 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11173 // Use 512bit version to implement 128/256 bit.
11174 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11175 AVX512VLVectorVTInfo _, Predicate prd> {
11176 let Predicates = [prd, NoVLX] in {
11177 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11179 (!cast<Instruction>(InstrStr # "Zrr")
11180 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11181 _.info256.RC:$src1,
11182 _.info256.SubRegIdx)),
11183 _.info256.SubRegIdx)>;
11185 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11187 (!cast<Instruction>(InstrStr # "Zrr")
11188 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11189 _.info128.RC:$src1,
11190 _.info128.SubRegIdx)),
11191 _.info128.SubRegIdx)>;
11195 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11196 SchedWriteVecIMul, HasCDI>;
11198 // FIXME: Is there a better scheduler class for VPCONFLICT?
11199 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11200 SchedWriteVecALU, HasCDI>;
11202 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11203 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11204 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11206 //===---------------------------------------------------------------------===//
11207 // Counts number of ones - VPOPCNTD and VPOPCNTQ
11208 //===---------------------------------------------------------------------===//
11210 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11211 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11212 SchedWriteVecALU, HasVPOPCNTDQ>;
11214 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11215 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11217 //===---------------------------------------------------------------------===//
11218 // Replicate Single FP - MOVSHDUP and MOVSLDUP
11219 //===---------------------------------------------------------------------===//
11221 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11222 X86SchedWriteWidths sched> {
11223 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11224 avx512vl_f32_info, HasAVX512>, TB, XS;
11227 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11228 SchedWriteFShuffle>;
11229 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11230 SchedWriteFShuffle>;
11232 //===----------------------------------------------------------------------===//
11233 // AVX-512 - MOVDDUP
11234 //===----------------------------------------------------------------------===//
11236 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11237 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11238 let ExeDomain = _.ExeDomain in {
11239 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11240 (ins _.RC:$src), OpcodeStr, "$src", "$src",
11241 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11243 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11244 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11245 (_.VT (_.BroadcastLdFrag addr:$src))>,
11246 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11247 Sched<[sched.Folded]>;
11251 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11252 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11253 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11254 VTInfo.info512>, EVEX_V512;
11256 let Predicates = [HasAVX512, HasVLX] in {
11257 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11258 VTInfo.info256>, EVEX_V256;
11259 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11260 VTInfo.info128>, EVEX_V128;
11264 multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11265 X86SchedWriteWidths sched> {
11266 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched,
11267 avx512vl_f64_info>, TB, XD, REX_W;
11270 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11272 let Predicates = [HasVLX] in {
11273 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11274 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11276 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11277 (v2f64 VR128X:$src0)),
11278 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11279 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11280 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11282 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11285 //===----------------------------------------------------------------------===//
11286 // AVX-512 - Unpack Instructions
11287 //===----------------------------------------------------------------------===//
11289 let Uses = []<Register>, mayRaiseFPException = 0 in {
11290 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11291 SchedWriteFShuffleSizes, 0, 1>;
11292 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11293 SchedWriteFShuffleSizes>;
11296 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11297 SchedWriteShuffle, HasBWI>;
11298 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11299 SchedWriteShuffle, HasBWI>;
11300 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11301 SchedWriteShuffle, HasBWI>;
11302 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11303 SchedWriteShuffle, HasBWI>;
11305 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11306 SchedWriteShuffle, HasAVX512>;
11307 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11308 SchedWriteShuffle, HasAVX512>;
11309 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11310 SchedWriteShuffle, HasAVX512>;
11311 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11312 SchedWriteShuffle, HasAVX512>;
11314 //===----------------------------------------------------------------------===//
11315 // AVX-512 - Extract & Insert Integer Instructions
11316 //===----------------------------------------------------------------------===//
11318 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11319 X86VectorVTInfo _> {
11320 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11321 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11322 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11323 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11325 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11328 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11329 let Predicates = [HasBWI] in {
11330 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11331 (ins _.RC:$src1, u8imm:$src2),
11332 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11333 [(set GR32orGR64:$dst,
11334 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11335 EVEX, TA, PD, Sched<[WriteVecExtract]>;
11337 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD;
11341 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11342 let Predicates = [HasBWI] in {
11343 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11344 (ins _.RC:$src1, u8imm:$src2),
11345 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11346 [(set GR32orGR64:$dst,
11347 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11348 EVEX, TB, PD, Sched<[WriteVecExtract]>;
11350 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11351 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11352 (ins _.RC:$src1, u8imm:$src2),
11353 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11354 EVEX, TA, PD, Sched<[WriteVecExtract]>;
11356 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD;
11360 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11361 RegisterClass GRC> {
11362 let Predicates = [HasDQI] in {
11363 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11364 (ins _.RC:$src1, u8imm:$src2),
11365 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11367 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11368 EVEX, TA, PD, Sched<[WriteVecExtract]>;
11370 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11371 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11372 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11373 [(store (extractelt (_.VT _.RC:$src1),
11374 imm:$src2),addr:$dst)]>,
11375 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD,
11376 Sched<[WriteVecExtractSt]>;
11380 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
11381 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
11382 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11383 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
11385 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11386 X86VectorVTInfo _, PatFrag LdFrag,
11387 SDPatternOperator immoperator> {
11388 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11389 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11390 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11392 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11393 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11396 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11397 X86VectorVTInfo _, PatFrag LdFrag> {
11398 let Predicates = [HasBWI] in {
11399 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11400 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11401 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11403 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV,
11404 Sched<[WriteVecInsert]>;
11406 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11410 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11411 X86VectorVTInfo _, RegisterClass GRC> {
11412 let Predicates = [HasDQI] in {
11413 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11414 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11415 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11417 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11418 EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>;
11420 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11421 _.ScalarLdFrag, imm>, TA, PD;
11425 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11426 extloadi8>, TA, PD, WIG;
11427 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11428 extloadi16>, TB, PD, WIG;
11429 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11430 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
11432 let Predicates = [HasAVX512, NoBWI] in {
11433 def : Pat<(X86pinsrb VR128:$src1,
11434 (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11436 (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11440 let Predicates = [HasBWI] in {
11441 def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11442 (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11443 GR8:$src2, sub_8bit), timm:$src3)>;
11444 def : Pat<(X86pinsrb VR128:$src1,
11445 (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11447 (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11451 // Always select FP16 instructions if available.
11452 let Predicates = [HasBWI], AddedComplexity = -10 in {
11453 def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11454 def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11455 def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11456 def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11459 //===----------------------------------------------------------------------===//
11460 // VSHUFPS - VSHUFPD Operations
11461 //===----------------------------------------------------------------------===//
11463 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11464 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11465 SchedWriteFShuffle>,
11466 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11470 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB;
11471 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W;
11473 //===----------------------------------------------------------------------===//
11474 // AVX-512 - Byte shift Left/Right
11475 //===----------------------------------------------------------------------===//
11477 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11478 Format MRMm, string OpcodeStr,
11479 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11480 def ri : AVX512<opc, MRMr,
11481 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11482 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11483 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11485 def mi : AVX512<opc, MRMm,
11486 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11487 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11488 [(set _.RC:$dst,(_.VT (OpNode
11489 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11490 (i8 timm:$src2))))]>,
11491 Sched<[sched.Folded, sched.ReadAfterFold]>;
11494 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11495 Format MRMm, string OpcodeStr,
11496 X86SchedWriteWidths sched, Predicate prd>{
11497 let Predicates = [prd] in
11498 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11499 sched.ZMM, v64i8_info>, EVEX_V512;
11500 let Predicates = [prd, HasVLX] in {
11501 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11502 sched.YMM, v32i8x_info>, EVEX_V256;
11503 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11504 sched.XMM, v16i8x_info>, EVEX_V128;
11507 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11508 SchedWriteShuffle, HasBWI>,
11509 AVX512PDIi8Base, EVEX, VVVV, WIG;
11510 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11511 SchedWriteShuffle, HasBWI>,
11512 AVX512PDIi8Base, EVEX, VVVV, WIG;
11514 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11515 string OpcodeStr, X86FoldableSchedWrite sched,
11516 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11517 let isCommutable = 1 in
11518 def rr : AVX512BI<opc, MRMSrcReg,
11519 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11520 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11521 [(set _dst.RC:$dst,(_dst.VT
11522 (OpNode (_src.VT _src.RC:$src1),
11523 (_src.VT _src.RC:$src2))))]>,
11525 def rm : AVX512BI<opc, MRMSrcMem,
11526 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11527 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11528 [(set _dst.RC:$dst,(_dst.VT
11529 (OpNode (_src.VT _src.RC:$src1),
11530 (_src.VT (bitconvert
11531 (_src.LdFrag addr:$src2))))))]>,
11532 Sched<[sched.Folded, sched.ReadAfterFold]>;
11535 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11536 string OpcodeStr, X86SchedWriteWidths sched,
11538 let Predicates = [prd] in
11539 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11540 v8i64_info, v64i8_info>, EVEX_V512;
11541 let Predicates = [prd, HasVLX] in {
11542 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11543 v4i64x_info, v32i8x_info>, EVEX_V256;
11544 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11545 v2i64x_info, v16i8x_info>, EVEX_V128;
11549 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11550 SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG;
11552 // Transforms to swizzle an immediate to enable better matching when
11553 // memory operand isn't in the right place.
11554 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11555 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11556 uint8_t Imm = N->getZExtValue();
11557 // Swap bits 1/4 and 3/6.
11558 uint8_t NewImm = Imm & 0xa5;
11559 if (Imm & 0x02) NewImm |= 0x10;
11560 if (Imm & 0x10) NewImm |= 0x02;
11561 if (Imm & 0x08) NewImm |= 0x40;
11562 if (Imm & 0x40) NewImm |= 0x08;
11563 return getI8Imm(NewImm, SDLoc(N));
11565 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11566 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11567 uint8_t Imm = N->getZExtValue();
11568 // Swap bits 2/4 and 3/5.
11569 uint8_t NewImm = Imm & 0xc3;
11570 if (Imm & 0x04) NewImm |= 0x10;
11571 if (Imm & 0x10) NewImm |= 0x04;
11572 if (Imm & 0x08) NewImm |= 0x20;
11573 if (Imm & 0x20) NewImm |= 0x08;
11574 return getI8Imm(NewImm, SDLoc(N));
11576 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11577 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11578 uint8_t Imm = N->getZExtValue();
11579 // Swap bits 1/2 and 5/6.
11580 uint8_t NewImm = Imm & 0x99;
11581 if (Imm & 0x02) NewImm |= 0x04;
11582 if (Imm & 0x04) NewImm |= 0x02;
11583 if (Imm & 0x20) NewImm |= 0x40;
11584 if (Imm & 0x40) NewImm |= 0x20;
11585 return getI8Imm(NewImm, SDLoc(N));
11587 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11588 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11589 uint8_t Imm = N->getZExtValue();
11590 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11591 uint8_t NewImm = Imm & 0x81;
11592 if (Imm & 0x02) NewImm |= 0x04;
11593 if (Imm & 0x04) NewImm |= 0x10;
11594 if (Imm & 0x08) NewImm |= 0x40;
11595 if (Imm & 0x10) NewImm |= 0x02;
11596 if (Imm & 0x20) NewImm |= 0x08;
11597 if (Imm & 0x40) NewImm |= 0x20;
11598 return getI8Imm(NewImm, SDLoc(N));
11600 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11601 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11602 uint8_t Imm = N->getZExtValue();
11603 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11604 uint8_t NewImm = Imm & 0x81;
11605 if (Imm & 0x02) NewImm |= 0x10;
11606 if (Imm & 0x04) NewImm |= 0x02;
11607 if (Imm & 0x08) NewImm |= 0x20;
11608 if (Imm & 0x10) NewImm |= 0x04;
11609 if (Imm & 0x20) NewImm |= 0x40;
11610 if (Imm & 0x40) NewImm |= 0x08;
11611 return getI8Imm(NewImm, SDLoc(N));
11614 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11615 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11617 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11618 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11619 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11620 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11621 (OpNode (_.VT _.RC:$src1),
11624 (i8 timm:$src4)), 1, 1>,
11625 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
11626 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11627 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11628 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11629 (OpNode (_.VT _.RC:$src1),
11631 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11632 (i8 timm:$src4)), 1, 0>,
11633 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11634 Sched<[sched.Folded, sched.ReadAfterFold]>;
11635 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11636 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11637 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11638 "$src2, ${src3}"#_.BroadcastStr#", $src4",
11639 (OpNode (_.VT _.RC:$src1),
11641 (_.VT (_.BroadcastLdFrag addr:$src3)),
11642 (i8 timm:$src4)), 1, 0>, EVEX_B,
11643 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11644 Sched<[sched.Folded, sched.ReadAfterFold]>;
11645 }// Constraints = "$src1 = $dst"
11647 // Additional patterns for matching passthru operand in other positions.
11648 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11649 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11651 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11652 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11653 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11654 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11656 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11657 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11659 // Additional patterns for matching zero masking with loads in other
11661 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11662 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11663 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11665 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11666 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11667 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11668 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11669 _.RC:$src2, (i8 timm:$src4)),
11671 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11672 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11674 // Additional patterns for matching masked loads with different
11676 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11677 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11678 _.RC:$src2, (i8 timm:$src4)),
11680 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11681 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11682 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11683 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11684 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11686 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11687 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11688 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11689 (OpNode _.RC:$src2, _.RC:$src1,
11690 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11692 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11693 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11694 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11695 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11696 _.RC:$src1, (i8 timm:$src4)),
11698 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11699 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11700 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11701 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11702 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11704 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11705 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11707 // Additional patterns for matching zero masking with broadcasts in other
11709 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11710 (OpNode (_.BroadcastLdFrag addr:$src3),
11711 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11713 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11714 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11715 (VPTERNLOG321_imm8 timm:$src4))>;
11716 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11717 (OpNode _.RC:$src1,
11718 (_.BroadcastLdFrag addr:$src3),
11719 _.RC:$src2, (i8 timm:$src4)),
11721 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11722 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11723 (VPTERNLOG132_imm8 timm:$src4))>;
11725 // Additional patterns for matching masked broadcasts with different
11727 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11728 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11729 _.RC:$src2, (i8 timm:$src4)),
11731 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11732 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11733 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11734 (OpNode (_.BroadcastLdFrag addr:$src3),
11735 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11737 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11738 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11739 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11740 (OpNode _.RC:$src2, _.RC:$src1,
11741 (_.BroadcastLdFrag addr:$src3),
11742 (i8 timm:$src4)), _.RC:$src1)),
11743 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11744 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11745 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11746 (OpNode _.RC:$src2,
11747 (_.BroadcastLdFrag addr:$src3),
11748 _.RC:$src1, (i8 timm:$src4)),
11750 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11751 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11752 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11753 (OpNode (_.BroadcastLdFrag addr:$src3),
11754 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11756 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11757 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11760 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11761 AVX512VLVectorVTInfo _> {
11762 let Predicates = [HasAVX512] in
11763 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11764 _.info512, NAME>, EVEX_V512;
11765 let Predicates = [HasAVX512, HasVLX] in {
11766 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11767 _.info128, NAME>, EVEX_V128;
11768 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11769 _.info256, NAME>, EVEX_V256;
11773 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11774 avx512vl_i32_info>;
11775 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11776 avx512vl_i64_info>, REX_W;
11778 // Patterns to implement vnot using vpternlog instead of creating all ones
11779 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11780 // so that the result is only dependent on src0. But we use the same source
11781 // for all operands to prevent a false dependency.
11782 // TODO: We should maybe have a more generalized algorithm for folding to
11784 let Predicates = [HasAVX512] in {
11785 def : Pat<(v64i8 (vnot VR512:$src)),
11786 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11787 def : Pat<(v32i16 (vnot VR512:$src)),
11788 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11789 def : Pat<(v16i32 (vnot VR512:$src)),
11790 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11791 def : Pat<(v8i64 (vnot VR512:$src)),
11792 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11795 let Predicates = [HasAVX512, NoVLX] in {
11796 def : Pat<(v16i8 (vnot VR128X:$src)),
11799 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11800 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11801 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11802 (i8 15)), sub_xmm)>;
11803 def : Pat<(v8i16 (vnot VR128X:$src)),
11806 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11807 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11808 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11809 (i8 15)), sub_xmm)>;
11810 def : Pat<(v4i32 (vnot VR128X:$src)),
11813 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11814 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11815 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11816 (i8 15)), sub_xmm)>;
11817 def : Pat<(v2i64 (vnot VR128X:$src)),
11820 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11821 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11822 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11823 (i8 15)), sub_xmm)>;
11825 def : Pat<(v32i8 (vnot VR256X:$src)),
11828 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11829 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11830 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11831 (i8 15)), sub_ymm)>;
11832 def : Pat<(v16i16 (vnot VR256X:$src)),
11835 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11836 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11837 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11838 (i8 15)), sub_ymm)>;
11839 def : Pat<(v8i32 (vnot VR256X:$src)),
11842 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11843 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11844 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11845 (i8 15)), sub_ymm)>;
11846 def : Pat<(v4i64 (vnot VR256X:$src)),
11849 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11850 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11851 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11852 (i8 15)), sub_ymm)>;
11855 let Predicates = [HasVLX] in {
11856 def : Pat<(v16i8 (vnot VR128X:$src)),
11857 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11858 def : Pat<(v8i16 (vnot VR128X:$src)),
11859 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11860 def : Pat<(v4i32 (vnot VR128X:$src)),
11861 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11862 def : Pat<(v2i64 (vnot VR128X:$src)),
11863 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11865 def : Pat<(v32i8 (vnot VR256X:$src)),
11866 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11867 def : Pat<(v16i16 (vnot VR256X:$src)),
11868 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11869 def : Pat<(v8i32 (vnot VR256X:$src)),
11870 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11871 def : Pat<(v4i64 (vnot VR256X:$src)),
11872 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11875 //===----------------------------------------------------------------------===//
11876 // AVX-512 - FixupImm
11877 //===----------------------------------------------------------------------===//
11879 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11880 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11881 X86VectorVTInfo TblVT>{
11882 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11883 Uses = [MXCSR], mayRaiseFPException = 1 in {
11884 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11885 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11886 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11887 (X86VFixupimm (_.VT _.RC:$src1),
11889 (TblVT.VT _.RC:$src3),
11890 (i32 timm:$src4))>, Sched<[sched]>;
11891 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11892 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11893 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11894 (X86VFixupimm (_.VT _.RC:$src1),
11896 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11897 (i32 timm:$src4))>,
11898 Sched<[sched.Folded, sched.ReadAfterFold]>;
11899 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11900 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11901 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11902 "$src2, ${src3}"#_.BroadcastStr#", $src4",
11903 (X86VFixupimm (_.VT _.RC:$src1),
11905 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11906 (i32 timm:$src4))>,
11907 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11908 } // Constraints = "$src1 = $dst"
11911 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11912 X86FoldableSchedWrite sched,
11913 X86VectorVTInfo _, X86VectorVTInfo TblVT>
11914 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11915 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11916 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11917 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11918 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11919 "$src2, $src3, {sae}, $src4",
11920 (X86VFixupimmSAE (_.VT _.RC:$src1),
11922 (TblVT.VT _.RC:$src3),
11923 (i32 timm:$src4))>,
11924 EVEX_B, Sched<[sched]>;
11928 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11929 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11930 X86VectorVTInfo _src3VT> {
11931 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11932 ExeDomain = _.ExeDomain in {
11933 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11934 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11935 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11936 (X86VFixupimms (_.VT _.RC:$src1),
11938 (_src3VT.VT _src3VT.RC:$src3),
11939 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11940 let Uses = [MXCSR] in
11941 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11942 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11943 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11944 "$src2, $src3, {sae}, $src4",
11945 (X86VFixupimmSAEs (_.VT _.RC:$src1),
11947 (_src3VT.VT _src3VT.RC:$src3),
11948 (i32 timm:$src4))>,
11949 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11950 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11951 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11952 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11953 (X86VFixupimms (_.VT _.RC:$src1),
11955 (_src3VT.VT (scalar_to_vector
11956 (_src3VT.ScalarLdFrag addr:$src3))),
11957 (i32 timm:$src4))>,
11958 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11962 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11963 AVX512VLVectorVTInfo _Vec,
11964 AVX512VLVectorVTInfo _Tbl> {
11965 let Predicates = [HasAVX512] in
11966 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11967 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11968 EVEX, VVVV, EVEX_V512;
11969 let Predicates = [HasAVX512, HasVLX] in {
11970 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11971 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11972 EVEX, VVVV, EVEX_V128;
11973 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11974 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11975 EVEX, VVVV, EVEX_V256;
11979 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11980 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11981 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
11982 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11983 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11984 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
11985 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11986 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11987 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11988 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
11990 // Patterns used to select SSE scalar fp arithmetic instructions from
11993 // (1) a scalar fp operation followed by a blend
11995 // The effect is that the backend no longer emits unnecessary vector
11996 // insert instructions immediately after SSE scalar fp instructions
11997 // like addss or mulss.
11999 // For example, given the following code:
12000 // __m128 foo(__m128 A, __m128 B) {
12005 // Previously we generated:
12006 // addss %xmm0, %xmm1
12007 // movss %xmm1, %xmm0
12009 // We now generate:
12010 // addss %xmm1, %xmm0
12012 // (2) a vector packed single/double fp operation followed by a vector insert
12014 // The effect is that the backend converts the packed fp instruction
12015 // followed by a vector insert into a single SSE scalar fp instruction.
12017 // For example, given the following code:
12018 // __m128 foo(__m128 A, __m128 B) {
12019 // __m128 C = A + B;
12020 // return (__m128) {c[0], a[1], a[2], a[3]};
12023 // Previously we generated:
12024 // addps %xmm0, %xmm1
12025 // movss %xmm1, %xmm0
12027 // We now generate:
12028 // addss %xmm1, %xmm0
12030 // TODO: Some canonicalization in lowering would simplify the number of
12031 // patterns we have to try to match.
12032 multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12033 string OpcPrefix, SDNode MoveNode,
12034 X86VectorVTInfo _, PatLeaf ZeroFP> {
12035 let Predicates = [HasAVX512] in {
12036 // extracted scalar math op with insert via movss
12037 def : Pat<(MoveNode
12038 (_.VT VR128X:$dst),
12039 (_.VT (scalar_to_vector
12040 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12042 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12043 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12044 def : Pat<(MoveNode
12045 (_.VT VR128X:$dst),
12046 (_.VT (scalar_to_vector
12047 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12048 (_.ScalarLdFrag addr:$src))))),
12049 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12051 // extracted masked scalar math op with insert via movss
12052 def : Pat<(MoveNode (_.VT VR128X:$src1),
12054 (X86selects_mask VK1WM:$mask,
12056 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12059 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12060 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12061 VK1WM:$mask, _.VT:$src1,
12062 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12063 def : Pat<(MoveNode (_.VT VR128X:$src1),
12065 (X86selects_mask VK1WM:$mask,
12067 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12068 (_.ScalarLdFrag addr:$src2)),
12070 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12071 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12072 VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12074 // extracted masked scalar math op with insert via movss
12075 def : Pat<(MoveNode (_.VT VR128X:$src1),
12077 (X86selects_mask VK1WM:$mask,
12079 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12080 _.FRC:$src2), (_.EltVT ZeroFP)))),
12081 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12082 VK1WM:$mask, _.VT:$src1,
12083 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12084 def : Pat<(MoveNode (_.VT VR128X:$src1),
12086 (X86selects_mask VK1WM:$mask,
12088 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12089 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12090 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12094 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12095 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12096 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12097 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12099 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12100 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12101 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12102 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12104 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12105 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12106 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12107 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12109 multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12110 SDNode Move, X86VectorVTInfo _> {
12111 let Predicates = [HasAVX512] in {
12112 def : Pat<(_.VT (Move _.VT:$dst,
12113 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12114 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12118 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12119 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12120 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12122 //===----------------------------------------------------------------------===//
12123 // AES instructions
12124 //===----------------------------------------------------------------------===//
12126 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12127 let Predicates = [HasVLX, HasVAES] in {
12128 defm Z128 : AESI_binop_rm_int<Op, OpStr,
12129 !cast<Intrinsic>(IntPrefix),
12130 loadv2i64, 0, VR128X, i128mem>,
12131 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
12132 defm Z256 : AESI_binop_rm_int<Op, OpStr,
12133 !cast<Intrinsic>(IntPrefix#"_256"),
12134 loadv4i64, 0, VR256X, i256mem>,
12135 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
12137 let Predicates = [HasAVX512, HasVAES] in
12138 defm Z : AESI_binop_rm_int<Op, OpStr,
12139 !cast<Intrinsic>(IntPrefix#"_512"),
12140 loadv8i64, 0, VR512, i512mem>,
12141 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
12144 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12145 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12146 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12147 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12149 //===----------------------------------------------------------------------===//
12150 // PCLMUL instructions - Carry less multiplication
12151 //===----------------------------------------------------------------------===//
12153 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12154 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12155 EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
12157 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12158 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12159 EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
12161 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12162 int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256,
12163 EVEX_CD8<64, CD8VF>, WIG;
12167 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12168 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12169 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12171 //===----------------------------------------------------------------------===//
12173 //===----------------------------------------------------------------------===//
12175 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12176 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12177 let Constraints = "$src1 = $dst",
12178 ExeDomain = VTI.ExeDomain in {
12179 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12180 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12181 "$src3, $src2", "$src2, $src3",
12182 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12183 T8, PD, EVEX, VVVV, Sched<[sched]>;
12184 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12185 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12186 "$src3, $src2", "$src2, $src3",
12187 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12188 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12189 T8, PD, EVEX, VVVV,
12190 Sched<[sched.Folded, sched.ReadAfterFold]>;
12194 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12195 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12196 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12197 let Constraints = "$src1 = $dst",
12198 ExeDomain = VTI.ExeDomain in
12199 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12200 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12201 "${src3}"#VTI.BroadcastStr#", $src2",
12202 "$src2, ${src3}"#VTI.BroadcastStr,
12203 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12204 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12205 T8, PD, EVEX, VVVV, EVEX_B,
12206 Sched<[sched.Folded, sched.ReadAfterFold]>;
12209 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12210 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12211 let Predicates = [HasVBMI2] in
12212 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12214 let Predicates = [HasVBMI2, HasVLX] in {
12215 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12217 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12222 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12223 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12224 let Predicates = [HasVBMI2] in
12225 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12227 let Predicates = [HasVBMI2, HasVLX] in {
12228 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12230 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12234 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12235 SDNode OpNode, X86SchedWriteWidths sched> {
12236 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12237 avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12238 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12239 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12240 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12241 avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
12244 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12245 SDNode OpNode, X86SchedWriteWidths sched> {
12246 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12247 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12248 REX_W, EVEX_CD8<16, CD8VF>;
12249 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12250 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
12251 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12252 sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
12256 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12257 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12258 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12259 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12262 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12263 avx512vl_i8_info, HasVBMI2>, EVEX;
12264 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12265 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12267 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12268 avx512vl_i8_info, HasVBMI2>, EVEX;
12269 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12270 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12272 //===----------------------------------------------------------------------===//
12274 //===----------------------------------------------------------------------===//
12276 let Constraints = "$src1 = $dst" in
12277 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12278 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12279 bit IsCommutable> {
12280 let ExeDomain = VTI.ExeDomain in {
12281 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12282 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12283 "$src3, $src2", "$src2, $src3",
12284 (VTI.VT (OpNode VTI.RC:$src1,
12285 VTI.RC:$src2, VTI.RC:$src3)),
12286 IsCommutable, IsCommutable>,
12287 EVEX, VVVV, T8, PD, Sched<[sched]>;
12288 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12289 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12290 "$src3, $src2", "$src2, $src3",
12291 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12292 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12293 EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD,
12294 Sched<[sched.Folded, sched.ReadAfterFold,
12295 sched.ReadAfterFold]>;
12296 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12297 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12298 OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12299 "$src2, ${src3}"#VTI.BroadcastStr,
12300 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12301 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12302 EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B,
12303 T8, PD, Sched<[sched.Folded, sched.ReadAfterFold,
12304 sched.ReadAfterFold]>;
12308 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12309 X86SchedWriteWidths sched, bit IsCommutable> {
12310 let Predicates = [HasVNNI] in
12311 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12312 IsCommutable>, EVEX_V512;
12313 let Predicates = [HasVNNI, HasVLX] in {
12314 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12315 IsCommutable>, EVEX_V256;
12316 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12317 IsCommutable>, EVEX_V128;
12321 // FIXME: Is there a better scheduler class for VPDP?
12322 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12323 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12324 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12325 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12327 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12328 let Predicates = [HasVNNI] in {
12329 def : Pat<(v16i32 (add VR512:$src1,
12330 (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12331 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12332 def : Pat<(v16i32 (add VR512:$src1,
12333 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12334 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12336 let Predicates = [HasVNNI,HasVLX] in {
12337 def : Pat<(v8i32 (add VR256X:$src1,
12338 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12339 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12340 def : Pat<(v8i32 (add VR256X:$src1,
12341 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12342 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12343 def : Pat<(v4i32 (add VR128X:$src1,
12344 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12345 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12346 def : Pat<(v4i32 (add VR128X:$src1,
12347 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12348 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12351 //===----------------------------------------------------------------------===//
12353 //===----------------------------------------------------------------------===//
12355 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12356 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12357 avx512vl_i8_info, HasBITALG>;
12358 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12359 avx512vl_i16_info, HasBITALG>, REX_W;
12361 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12362 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12364 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12365 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12366 (ins VTI.RC:$src1, VTI.RC:$src2),
12368 "$src2, $src1", "$src1, $src2",
12369 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12370 (VTI.VT VTI.RC:$src2)),
12371 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12372 (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD,
12374 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12375 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12377 "$src2, $src1", "$src1, $src2",
12378 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12379 (VTI.VT (VTI.LdFrag addr:$src2))),
12380 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12381 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12382 EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD,
12383 Sched<[sched.Folded, sched.ReadAfterFold]>;
12386 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12387 let Predicates = [HasBITALG] in
12388 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12389 let Predicates = [HasBITALG, HasVLX] in {
12390 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12391 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12395 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12396 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12398 //===----------------------------------------------------------------------===//
12400 //===----------------------------------------------------------------------===//
12402 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12403 X86SchedWriteWidths sched> {
12404 let Predicates = [HasGFNI, HasAVX512] in
12405 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12407 let Predicates = [HasGFNI, HasVLX] in {
12408 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12410 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12415 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12417 EVEX_CD8<8, CD8VF>, T8;
12419 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12420 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12421 X86VectorVTInfo BcstVTI>
12422 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12423 let ExeDomain = VTI.ExeDomain in
12424 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12425 (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3),
12426 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12427 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12428 (OpNode (VTI.VT VTI.RC:$src1),
12429 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12430 (i8 timm:$src3))>, EVEX_B,
12431 Sched<[sched.Folded, sched.ReadAfterFold]>;
12434 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12435 X86SchedWriteWidths sched> {
12436 let Predicates = [HasGFNI, HasAVX512] in
12437 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12438 v64i8_info, v8i64_info>, EVEX_V512;
12439 let Predicates = [HasGFNI, HasVLX] in {
12440 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12441 v32i8x_info, v4i64x_info>, EVEX_V256;
12442 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12443 v16i8x_info, v2i64x_info>, EVEX_V128;
12447 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12448 X86GF2P8affineinvqb, SchedWriteVecIMul>,
12449 EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12450 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12451 X86GF2P8affineqb, SchedWriteVecIMul>,
12452 EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12455 //===----------------------------------------------------------------------===//
12457 //===----------------------------------------------------------------------===//
12459 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12460 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12461 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12462 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12463 "v4fmaddps", "$src3, $src2", "$src2, $src3",
12464 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12465 Sched<[SchedWriteFMA.ZMM.Folded]>;
12467 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12468 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12469 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12470 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12471 Sched<[SchedWriteFMA.ZMM.Folded]>;
12473 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12474 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12475 "v4fmaddss", "$src3, $src2", "$src2, $src3",
12476 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12477 Sched<[SchedWriteFMA.Scl.Folded]>;
12479 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12480 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12481 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12482 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12483 Sched<[SchedWriteFMA.Scl.Folded]>;
12486 //===----------------------------------------------------------------------===//
12488 //===----------------------------------------------------------------------===//
12490 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12491 Constraints = "$src1 = $dst" in {
12492 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12493 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12494 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12495 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12496 Sched<[SchedWriteFMA.ZMM.Folded]>;
12498 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12499 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12500 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12501 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12502 Sched<[SchedWriteFMA.ZMM.Folded]>;
12505 let hasSideEffects = 0 in {
12506 let mayStore = 1, SchedRW = [WriteFStoreX] in
12507 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12508 let mayLoad = 1, SchedRW = [WriteFLoadX] in
12509 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12512 //===----------------------------------------------------------------------===//
12514 //===----------------------------------------------------------------------===//
12516 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12517 def rr : I<0x68, MRMSrcReg,
12518 (outs _.KRPC:$dst),
12519 (ins _.RC:$src1, _.RC:$src2),
12520 !strconcat("vp2intersect", _.Suffix,
12521 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12522 [(set _.KRPC:$dst, (X86vp2intersect
12523 _.RC:$src1, (_.VT _.RC:$src2)))]>,
12524 EVEX, VVVV, T8, XD, Sched<[sched]>;
12526 def rm : I<0x68, MRMSrcMem,
12527 (outs _.KRPC:$dst),
12528 (ins _.RC:$src1, _.MemOp:$src2),
12529 !strconcat("vp2intersect", _.Suffix,
12530 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12531 [(set _.KRPC:$dst, (X86vp2intersect
12532 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12533 EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>,
12534 Sched<[sched.Folded, sched.ReadAfterFold]>;
12536 def rmb : I<0x68, MRMSrcMem,
12537 (outs _.KRPC:$dst),
12538 (ins _.RC:$src1, _.ScalarMemOp:$src2),
12539 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12540 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12541 [(set _.KRPC:$dst, (X86vp2intersect
12542 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12543 EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12544 Sched<[sched.Folded, sched.ReadAfterFold]>;
12547 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12548 let Predicates = [HasAVX512, HasVP2INTERSECT] in
12549 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12551 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12552 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12553 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12557 let ExeDomain = SSEPackedInt in {
12558 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12559 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
12562 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12563 X86SchedWriteWidths sched,
12564 AVX512VLVectorVTInfo _SrcVTInfo,
12565 AVX512VLVectorVTInfo _DstVTInfo,
12566 SDNode OpNode, Predicate prd,
12567 bit IsCommutable = 0> {
12568 let Predicates = [prd] in
12569 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12570 _SrcVTInfo.info512, _DstVTInfo.info512,
12571 _SrcVTInfo.info512, IsCommutable>,
12572 EVEX_V512, EVEX_CD8<32, CD8VF>;
12573 let Predicates = [HasVLX, prd] in {
12574 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12575 _SrcVTInfo.info256, _DstVTInfo.info256,
12576 _SrcVTInfo.info256, IsCommutable>,
12577 EVEX_V256, EVEX_CD8<32, CD8VF>;
12578 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12579 _SrcVTInfo.info128, _DstVTInfo.info128,
12580 _SrcVTInfo.info128, IsCommutable>,
12581 EVEX_V128, EVEX_CD8<32, CD8VF>;
12585 let ExeDomain = SSEPackedSingle in
12586 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12587 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12588 avx512vl_f32_info, avx512vl_bf16_info,
12589 X86cvtne2ps2bf16, HasBF16, 0>, T8, XD;
12591 // Truncate Float to BFloat16
12592 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12593 X86SchedWriteWidths sched> {
12594 let ExeDomain = SSEPackedSingle in {
12595 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12596 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
12597 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12599 let Predicates = [HasBF16, HasVLX] in {
12600 let Uses = []<Register>, mayRaiseFPException = 0 in {
12601 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
12602 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12604 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
12605 X86cvtneps2bf16, X86cvtneps2bf16,
12606 sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12608 } // Predicates = [HasBF16, HasVLX]
12609 } // ExeDomain = SSEPackedSingle
12611 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12612 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12614 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12615 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12616 f128mem:$src), 0, "intel">;
12617 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12618 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12620 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12621 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12622 f256mem:$src), 0, "intel">;
12625 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12626 SchedWriteCvtPD2PS>, T8, XS,
12627 EVEX_CD8<32, CD8VF>;
12629 let Predicates = [HasBF16, HasVLX] in {
12630 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12631 // patterns have been disabled with null_frag.
12632 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12633 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12634 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
12636 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12637 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
12639 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12641 def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12642 (VCVTNEPS2BF16Z128rm addr:$src)>;
12643 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
12645 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12646 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
12648 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12650 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
12651 (X86VBroadcastld32 addr:$src)))),
12652 (VCVTNEPS2BF16Z128rmb addr:$src)>;
12653 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12654 (v8bf16 VR128X:$src0), VK4WM:$mask),
12655 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12656 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12657 v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
12658 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12660 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12661 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12662 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12663 (VCVTNEPS2BF16Z128rm addr:$src)>;
12665 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12666 (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12667 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12668 (VCVTNEPS2BF16Z256rm addr:$src)>;
12670 def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12671 (VPBROADCASTWZ128rm addr:$src)>;
12672 def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12673 (VPBROADCASTWZ256rm addr:$src)>;
12675 def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12676 (VPBROADCASTWZ128rr VR128X:$src)>;
12677 def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12678 (VPBROADCASTWZ256rr VR128X:$src)>;
12680 def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
12681 (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12682 def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
12683 (VCVTNEPS2BF16Z256rm addr:$src)>;
12685 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12688 let Predicates = [HasBF16] in {
12689 def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12690 (VPBROADCASTWZrm addr:$src)>;
12692 def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12693 (VPBROADCASTWZrr VR128X:$src)>;
12695 def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
12696 (VCVTNEPS2BF16Zrr VR512:$src)>;
12697 def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
12698 (VCVTNEPS2BF16Zrm addr:$src)>;
12699 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12702 let Constraints = "$src1 = $dst" in {
12703 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12704 X86FoldableSchedWrite sched,
12705 X86VectorVTInfo _, X86VectorVTInfo src_v> {
12706 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12707 (ins src_v.RC:$src2, src_v.RC:$src3),
12708 OpcodeStr, "$src3, $src2", "$src2, $src3",
12709 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12710 EVEX, VVVV, Sched<[sched]>;
12712 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12713 (ins src_v.RC:$src2, src_v.MemOp:$src3),
12714 OpcodeStr, "$src3, $src2", "$src2, $src3",
12715 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12716 (src_v.LdFrag addr:$src3)))>, EVEX, VVVV,
12717 Sched<[sched.Folded, sched.ReadAfterFold]>;
12719 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12720 (ins src_v.RC:$src2, f32mem:$src3),
12722 !strconcat("${src3}", _.BroadcastStr,", $src2"),
12723 !strconcat("$src2, ${src3}", _.BroadcastStr),
12724 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12725 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12726 EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
12729 } // Constraints = "$src1 = $dst"
12731 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12732 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12733 AVX512VLVectorVTInfo src_v, Predicate prd> {
12734 let Predicates = [prd] in {
12735 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12736 src_v.info512>, EVEX_V512;
12738 let Predicates = [HasVLX, prd] in {
12739 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12740 src_v.info256>, EVEX_V256;
12741 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12742 src_v.info128>, EVEX_V128;
12746 let ExeDomain = SSEPackedSingle in
12747 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12748 avx512vl_f32_info, avx512vl_bf16_info,
12749 HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>;
12751 //===----------------------------------------------------------------------===//
12753 //===----------------------------------------------------------------------===//
12755 let Predicates = [HasFP16] in {
12756 // Move word ( r/m16) to Packed word
12757 def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12758 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12759 def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12760 "vmovw\t{$src, $dst|$dst, $src}",
12762 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12763 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12765 def : Pat<(f16 (bitconvert GR16:$src)),
12766 (f16 (COPY_TO_REGCLASS
12768 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12770 def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12771 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12772 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12773 (VMOVW2SHrr GR32:$src)>;
12774 // FIXME: We should really find a way to improve these patterns.
12775 def : Pat<(v8i32 (X86vzmovl
12776 (insert_subvector undef,
12777 (v4i32 (scalar_to_vector
12778 (and GR32:$src, 0xffff))),
12780 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12781 def : Pat<(v16i32 (X86vzmovl
12782 (insert_subvector undef,
12783 (v4i32 (scalar_to_vector
12784 (and GR32:$src, 0xffff))),
12786 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12788 def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
12789 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12791 // AVX 128-bit movw instruction write zeros in the high 128-bit part.
12792 def : Pat<(v8i16 (X86vzload16 addr:$src)),
12793 (VMOVWrm addr:$src)>;
12794 def : Pat<(v16i16 (X86vzload16 addr:$src)),
12795 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12797 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12798 def : Pat<(v32i16 (X86vzload16 addr:$src)),
12799 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12801 def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12802 (VMOVWrm addr:$src)>;
12803 def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12804 (VMOVWrm addr:$src)>;
12805 def : Pat<(v8i32 (X86vzmovl
12806 (insert_subvector undef,
12807 (v4i32 (scalar_to_vector
12808 (i32 (zextloadi16 addr:$src)))),
12810 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12811 def : Pat<(v16i32 (X86vzmovl
12812 (insert_subvector undef,
12813 (v4i32 (scalar_to_vector
12814 (i32 (zextloadi16 addr:$src)))),
12816 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12818 // Move word from xmm register to r/m16
12819 def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12820 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12821 def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs),
12822 (ins i16mem:$dst, VR128X:$src),
12823 "vmovw\t{$src, $dst|$dst, $src}",
12824 [(store (i16 (extractelt (v8i16 VR128X:$src),
12825 (iPTR 0))), addr:$dst)]>,
12826 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12828 def : Pat<(i16 (bitconvert FR16X:$src)),
12829 (i16 (EXTRACT_SUBREG
12830 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12832 def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12833 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12835 // Allow "vmovw" to use GR64
12836 let hasSideEffects = 0 in {
12837 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12838 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
12839 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12840 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
12844 // Convert 16-bit float to i16/u16
12845 multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12846 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12847 AVX512VLVectorVTInfo _Dst,
12848 AVX512VLVectorVTInfo _Src,
12849 X86SchedWriteWidths sched> {
12850 let Predicates = [HasFP16] in {
12851 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12852 OpNode, MaskOpNode, sched.ZMM>,
12853 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
12854 OpNodeRnd, sched.ZMM>, EVEX_V512;
12856 let Predicates = [HasFP16, HasVLX] in {
12857 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12858 OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12859 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12860 OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12864 // Convert 16-bit float to i16/u16 truncate
12865 multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12866 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12867 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
12868 X86SchedWriteWidths sched> {
12869 let Predicates = [HasFP16] in {
12870 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12871 OpNode, MaskOpNode, sched.ZMM>,
12872 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
12873 OpNodeRnd, sched.ZMM>, EVEX_V512;
12875 let Predicates = [HasFP16, HasVLX] in {
12876 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12877 OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12878 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12879 OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12883 defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
12884 X86cvtp2UIntRnd, avx512vl_i16_info,
12885 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
12886 T_MAP5, EVEX_CD8<16, CD8VF>;
12887 defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
12888 X86VUintToFpRnd, avx512vl_f16_info,
12889 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
12890 T_MAP5, XD, EVEX_CD8<16, CD8VF>;
12891 defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
12892 X86cvttp2si, X86cvttp2siSAE,
12893 avx512vl_i16_info, avx512vl_f16_info,
12894 SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>;
12895 defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
12896 X86cvttp2ui, X86cvttp2uiSAE,
12897 avx512vl_i16_info, avx512vl_f16_info,
12898 SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>;
12899 defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
12900 X86cvtp2IntRnd, avx512vl_i16_info,
12901 avx512vl_f16_info, SchedWriteCvtPD2DQ>,
12902 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
12903 defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
12904 X86VSintToFpRnd, avx512vl_f16_info,
12905 avx512vl_i16_info, SchedWriteCvtPD2DQ>,
12906 T_MAP5, XS, EVEX_CD8<16, CD8VF>;
12908 // Convert Half to Signed/Unsigned Doubleword
12909 multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12910 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12911 X86SchedWriteWidths sched> {
12912 let Predicates = [HasFP16] in {
12913 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
12914 MaskOpNode, sched.ZMM>,
12915 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
12916 OpNodeRnd, sched.ZMM>, EVEX_V512;
12918 let Predicates = [HasFP16, HasVLX] in {
12919 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
12920 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
12921 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
12922 MaskOpNode, sched.YMM>, EVEX_V256;
12926 // Convert Half to Signed/Unsigned Doubleword with truncation
12927 multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12928 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12929 X86SchedWriteWidths sched> {
12930 let Predicates = [HasFP16] in {
12931 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
12932 MaskOpNode, sched.ZMM>,
12933 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
12934 OpNodeRnd, sched.ZMM>, EVEX_V512;
12936 let Predicates = [HasFP16, HasVLX] in {
12937 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
12938 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
12939 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
12940 MaskOpNode, sched.YMM>, EVEX_V256;
12945 defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
12946 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
12947 EVEX_CD8<16, CD8VH>;
12948 defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
12949 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5,
12950 EVEX_CD8<16, CD8VH>;
12952 defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
12953 X86cvttp2si, X86cvttp2siSAE,
12954 SchedWriteCvtPS2DQ>, T_MAP5, XS,
12955 EVEX_CD8<16, CD8VH>;
12957 defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
12958 X86cvttp2ui, X86cvttp2uiSAE,
12959 SchedWriteCvtPS2DQ>, T_MAP5,
12960 EVEX_CD8<16, CD8VH>;
12962 // Convert Half to Signed/Unsigned Quardword
12963 multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12964 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12965 X86SchedWriteWidths sched> {
12966 let Predicates = [HasFP16] in {
12967 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
12968 MaskOpNode, sched.ZMM>,
12969 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
12970 OpNodeRnd, sched.ZMM>, EVEX_V512;
12972 let Predicates = [HasFP16, HasVLX] in {
12973 // Explicitly specified broadcast string, since we take only 2 elements
12974 // from v8f16x_info source
12975 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
12976 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
12978 // Explicitly specified broadcast string, since we take only 4 elements
12979 // from v8f16x_info source
12980 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
12981 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
12986 // Convert Half to Signed/Unsigned Quardword with truncation
12987 multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12988 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12989 X86SchedWriteWidths sched> {
12990 let Predicates = [HasFP16] in {
12991 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
12992 MaskOpNode, sched.ZMM>,
12993 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
12994 OpNodeRnd, sched.ZMM>, EVEX_V512;
12996 let Predicates = [HasFP16, HasVLX] in {
12997 // Explicitly specified broadcast string, since we take only 2 elements
12998 // from v8f16x_info source
12999 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13000 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13001 // Explicitly specified broadcast string, since we take only 4 elements
13002 // from v8f16x_info source
13003 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13004 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13008 defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13009 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13010 EVEX_CD8<16, CD8VQ>;
13012 defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13013 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13014 EVEX_CD8<16, CD8VQ>;
13016 defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13017 X86cvttp2si, X86cvttp2siSAE,
13018 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13019 EVEX_CD8<16, CD8VQ>;
13021 defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13022 X86cvttp2ui, X86cvttp2uiSAE,
13023 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13024 EVEX_CD8<16, CD8VQ>;
13026 // Convert Signed/Unsigned Quardword to Half
13027 multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13028 SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13029 X86SchedWriteWidths sched> {
13030 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13031 // 512 memory forms of these instructions in Asm Parcer. They have the same
13032 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13033 // due to the same reason.
13034 let Predicates = [HasFP16] in {
13035 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13036 MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13037 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13038 OpNodeRnd, sched.ZMM>, EVEX_V512;
13040 let Predicates = [HasFP16, HasVLX] in {
13041 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13042 null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13043 i128mem, VK2WM>, EVEX_V128;
13044 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13045 null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13046 i256mem, VK4WM>, EVEX_V256;
13049 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13050 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13051 VR128X:$src), 0, "att">;
13052 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13053 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13054 VK2WM:$mask, VR128X:$src), 0, "att">;
13055 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13056 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13057 VK2WM:$mask, VR128X:$src), 0, "att">;
13058 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13059 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13060 i64mem:$src), 0, "att">;
13061 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13062 "$dst {${mask}}, ${src}{1to2}}",
13063 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13064 VK2WM:$mask, i64mem:$src), 0, "att">;
13065 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13066 "$dst {${mask}} {z}, ${src}{1to2}}",
13067 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13068 VK2WM:$mask, i64mem:$src), 0, "att">;
13070 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13071 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13072 VR256X:$src), 0, "att">;
13073 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13074 "$dst {${mask}}, $src}",
13075 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13076 VK4WM:$mask, VR256X:$src), 0, "att">;
13077 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13078 "$dst {${mask}} {z}, $src}",
13079 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13080 VK4WM:$mask, VR256X:$src), 0, "att">;
13081 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13082 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13083 i64mem:$src), 0, "att">;
13084 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13085 "$dst {${mask}}, ${src}{1to4}}",
13086 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13087 VK4WM:$mask, i64mem:$src), 0, "att">;
13088 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13089 "$dst {${mask}} {z}, ${src}{1to4}}",
13090 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13091 VK4WM:$mask, i64mem:$src), 0, "att">;
13093 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13094 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13095 VR512:$src), 0, "att">;
13096 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13097 "$dst {${mask}}, $src}",
13098 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13099 VK8WM:$mask, VR512:$src), 0, "att">;
13100 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13101 "$dst {${mask}} {z}, $src}",
13102 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13103 VK8WM:$mask, VR512:$src), 0, "att">;
13104 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13105 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13106 i64mem:$src), 0, "att">;
13107 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13108 "$dst {${mask}}, ${src}{1to8}}",
13109 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13110 VK8WM:$mask, i64mem:$src), 0, "att">;
13111 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13112 "$dst {${mask}} {z}, ${src}{1to8}}",
13113 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13114 VK8WM:$mask, i64mem:$src), 0, "att">;
13117 defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13118 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5,
13119 EVEX_CD8<64, CD8VF>;
13121 defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13122 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD,
13123 EVEX_CD8<64, CD8VF>;
13125 // Convert half to signed/unsigned int 32/64
13126 defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13127 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13128 T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13129 defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13130 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13131 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13132 defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13133 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13134 T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13135 defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13136 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13137 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13139 defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13140 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13141 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13142 defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13143 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13144 "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13145 defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13146 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13147 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13148 defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13149 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13150 "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13152 let Predicates = [HasFP16] in {
13153 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13154 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13155 T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13156 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13157 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13158 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13159 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13160 v8f16x_info, i32mem, loadi32,
13161 "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13162 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13163 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13164 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13165 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13166 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13168 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13169 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13172 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13173 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13174 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13175 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13177 def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13178 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13179 def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13180 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13182 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13183 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13184 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13185 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13187 def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13188 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13189 def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13190 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13192 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13193 // which produce unnecessary vmovsh instructions
13194 def : Pat<(v8f16 (X86Movsh
13195 (v8f16 VR128X:$dst),
13196 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13197 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13199 def : Pat<(v8f16 (X86Movsh
13200 (v8f16 VR128X:$dst),
13201 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13202 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13204 def : Pat<(v8f16 (X86Movsh
13205 (v8f16 VR128X:$dst),
13206 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13207 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13209 def : Pat<(v8f16 (X86Movsh
13210 (v8f16 VR128X:$dst),
13211 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13212 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13214 def : Pat<(v8f16 (X86Movsh
13215 (v8f16 VR128X:$dst),
13216 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13217 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13219 def : Pat<(v8f16 (X86Movsh
13220 (v8f16 VR128X:$dst),
13221 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13222 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13224 def : Pat<(v8f16 (X86Movsh
13225 (v8f16 VR128X:$dst),
13226 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13227 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13229 def : Pat<(v8f16 (X86Movsh
13230 (v8f16 VR128X:$dst),
13231 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13232 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13233 } // Predicates = [HasFP16]
13235 let Predicates = [HasFP16, HasVLX] in {
13236 // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13237 // patterns have been disabled with null_frag.
13238 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13239 (VCVTQQ2PHZ256rr VR256X:$src)>;
13240 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13242 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13243 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13245 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13247 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13248 (VCVTQQ2PHZ256rm addr:$src)>;
13249 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13251 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13252 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13254 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13256 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13257 (VCVTQQ2PHZ256rmb addr:$src)>;
13258 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13259 (v8f16 VR128X:$src0), VK4WM:$mask),
13260 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13261 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13262 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13263 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13265 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13266 (VCVTQQ2PHZ128rr VR128X:$src)>;
13267 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13269 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13270 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13272 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13274 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13275 (VCVTQQ2PHZ128rm addr:$src)>;
13276 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13278 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13279 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13281 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13283 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13284 (VCVTQQ2PHZ128rmb addr:$src)>;
13285 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13286 (v8f16 VR128X:$src0), VK2WM:$mask),
13287 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13288 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13289 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13290 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13292 // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13293 // patterns have been disabled with null_frag.
13294 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13295 (VCVTUQQ2PHZ256rr VR256X:$src)>;
13296 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13298 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13299 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13301 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13303 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13304 (VCVTUQQ2PHZ256rm addr:$src)>;
13305 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13307 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13308 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13310 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13312 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13313 (VCVTUQQ2PHZ256rmb addr:$src)>;
13314 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13315 (v8f16 VR128X:$src0), VK4WM:$mask),
13316 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13317 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13318 v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13319 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13321 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13322 (VCVTUQQ2PHZ128rr VR128X:$src)>;
13323 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13325 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13326 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13328 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13330 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13331 (VCVTUQQ2PHZ128rm addr:$src)>;
13332 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13334 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13335 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13337 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13339 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13340 (VCVTUQQ2PHZ128rmb addr:$src)>;
13341 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13342 (v8f16 VR128X:$src0), VK2WM:$mask),
13343 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13344 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13345 v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13346 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13349 let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13350 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13351 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13352 (ins _.RC:$src2, _.RC:$src3),
13353 OpcodeStr, "$src3, $src2", "$src2, $src3",
13354 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV;
13356 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13357 (ins _.RC:$src2, _.MemOp:$src3),
13358 OpcodeStr, "$src3, $src2", "$src2, $src3",
13359 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV;
13361 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13362 (ins _.RC:$src2, _.ScalarMemOp:$src3),
13363 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13364 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV;
13366 } // Constraints = "@earlyclobber $dst, $src1 = $dst"
13368 multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13369 X86VectorVTInfo _> {
13370 let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13371 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13372 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13373 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13374 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13375 EVEX, VVVV, EVEX_B, EVEX_RC;
13379 multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13380 let Predicates = [HasFP16] in {
13381 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13382 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13383 EVEX_V512, Sched<[WriteFMAZ]>;
13385 let Predicates = [HasVLX, HasFP16] in {
13386 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13387 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13391 multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13392 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13393 let Predicates = [HasFP16] in {
13394 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13395 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13396 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13397 "", "@earlyclobber $dst">, EVEX_V512;
13399 let Predicates = [HasVLX, HasFP16] in {
13400 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13401 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13402 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13403 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13408 let Uses = [MXCSR] in {
13409 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13410 T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13411 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13412 T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13414 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13415 x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13416 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13417 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13421 multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13422 bit IsCommutable> {
13423 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13424 defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13425 (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13426 "$src3, $src2", "$src2, $src3",
13427 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13428 Sched<[WriteFMAX]>;
13429 defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13430 (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13431 "$src3, $src2", "$src2, $src3",
13432 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13433 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13434 defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13435 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13436 "$rc, $src3, $src2", "$src2, $src3, $rc",
13437 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13438 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13442 multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13443 SDNode OpNodeRnd, bit IsCommutable> {
13444 let Predicates = [HasFP16] in {
13445 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13446 (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13447 "$src2, $src1", "$src1, $src2",
13448 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13449 IsCommutable, IsCommutable, IsCommutable,
13450 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13451 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13452 (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13453 "$src2, $src1", "$src1, $src2",
13454 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13455 0, 0, 0, X86selects, "@earlyclobber $dst">,
13456 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13457 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13458 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13459 "$rc, $src2, $src1", "$src1, $src2, $rc",
13460 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13461 0, 0, 0, X86selects, "@earlyclobber $dst">,
13462 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13466 let Uses = [MXCSR] in {
13467 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13468 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13469 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13470 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13472 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13473 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13474 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13475 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;