[InstCombine] Signed saturation patterns
[llvm-core.git] / lib / Target / X86 / X86InstrAVX512.td
blob9b5de59430a5214e50878bdcfe09fa80c04a612a
1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT).  These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                       string suffix = ""> {
22   RegisterClass RC = rc;
23   ValueType EltVT = eltvt;
24   int NumElts = numelts;
26   // Corresponding mask register class.
27   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29   // Corresponding mask register pair class.
30   RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                               !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33   // Corresponding write-mask register class.
34   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
36   // The mask VT.
37   ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39   // Suffix used in the instruction mnemonic.
40   string Suffix = suffix;
42   // VTName is a string name for vector VT. For vector types it will be
43   // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44   // It is a little bit complex for scalar types, where NumElts = 1.
45   // In this case we build v4f32 or v2f64
46   string VTName = "v" # !if (!eq (NumElts, 1),
47                         !if (!eq (EltVT.Size, 32), 4,
48                         !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
50   // The vector VT.
51   ValueType VT = !cast<ValueType>(VTName);
53   string EltTypeName = !cast<string>(EltVT);
54   // Size of the element type in bits, e.g. 32 for v16i32.
55   string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56   int EltSize = EltVT.Size;
58   // "i" for integer types and "f" for floating-point types
59   string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61   // Size of RC in bits, e.g. 512 for VR512.
62   int Size = VT.Size;
64   // The corresponding memory operand, e.g. i512mem for VR512.
65   X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66   X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67   // FP scalar memory operand for intrinsics - ssmem/sdmem.
68   Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69                            !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
71   // Load patterns
72   PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
74   PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
76   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77   PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
79   ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
80                                           !cast<ComplexPattern>("sse_load_f32"),
81                                     !if (!eq (EltTypeName, "f64"),
82                                           !cast<ComplexPattern>("sse_load_f64"),
83                                     ?));
85   // The string to specify embedded broadcast in assembly.
86   string BroadcastStr = "{1to" # NumElts # "}";
88   // 8-bit compressed displacement tuple/subvector format.  This is only
89   // defined for NumElts <= 8.
90   CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                                !cast<CD8VForm>("CD8VT" # NumElts), ?);
93   SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                           !if (!eq (Size, 256), sub_ymm, ?));
96   Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                      !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                      SSEPackedInt));
100   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
102   dag ImmAllZerosV = (VT immAllZerosV);
104   string ZSuffix = !if (!eq (Size, 128), "Z128",
105                    !if (!eq (Size, 256), "Z256", "Z"));
108 def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
109 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
110 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
111 def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
112 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
113 def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
115 // "x" in v32i8x_info means RC = VR256X
116 def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
117 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
118 def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
119 def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
120 def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
121 def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
123 def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
124 def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
125 def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
126 def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
127 def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
128 def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
130 // We map scalar types to the smallest (128-bit) vector type
131 // with the appropriate element type. This allows to use the same masking logic.
132 def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
133 def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
134 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
135 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
137 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
138                            X86VectorVTInfo i128> {
139   X86VectorVTInfo info512 = i512;
140   X86VectorVTInfo info256 = i256;
141   X86VectorVTInfo info128 = i128;
144 def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145                                              v16i8x_info>;
146 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147                                              v8i16x_info>;
148 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149                                              v4i32x_info>;
150 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151                                              v2i64x_info>;
152 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153                                              v4f32x_info>;
154 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
155                                              v2f64x_info>;
157 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158                        ValueType _vt> {
159   RegisterClass KRC = _krc;
160   RegisterClass KRCWM = _krcwm;
161   ValueType KVT = _vt;
164 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
165 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
166 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
167 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
168 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
169 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
170 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
172 // This multiclass generates the masking variants from the non-masking
173 // variant.  It only provides the assembly pieces for the masking variants.
174 // It assumes custom ISel patterns for masking which can be provided as
175 // template arguments.
176 multiclass AVX512_maskable_custom<bits<8> O, Format F,
177                                   dag Outs,
178                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
179                                   string OpcodeStr,
180                                   string AttSrcAsm, string IntelSrcAsm,
181                                   list<dag> Pattern,
182                                   list<dag> MaskingPattern,
183                                   list<dag> ZeroMaskingPattern,
184                                   string MaskingConstraint = "",
185                                   bit IsCommutable = 0,
186                                   bit IsKCommutable = 0,
187                                   bit IsKZCommutable = IsCommutable> {
188   let isCommutable = IsCommutable in
189     def NAME: AVX512<O, F, Outs, Ins,
190                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
191                                      "$dst, "#IntelSrcAsm#"}",
192                        Pattern>;
194   // Prefer over VMOV*rrk Pat<>
195   let isCommutable = IsKCommutable in
196     def NAME#k: AVX512<O, F, Outs, MaskingIns,
197                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
198                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
199                        MaskingPattern>,
200               EVEX_K {
201       // In case of the 3src subclass this is overridden with a let.
202       string Constraints = MaskingConstraint;
203     }
205   // Zero mask does not add any restrictions to commute operands transformation.
206   // So, it is Ok to use IsCommutable instead of IsKCommutable.
207   let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
208     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
209                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
210                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
211                        ZeroMaskingPattern>,
212               EVEX_KZ;
216 // Common base class of AVX512_maskable and AVX512_maskable_3src.
217 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
218                                   dag Outs,
219                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
220                                   string OpcodeStr,
221                                   string AttSrcAsm, string IntelSrcAsm,
222                                   dag RHS, dag MaskingRHS,
223                                   SDNode Select = vselect,
224                                   string MaskingConstraint = "",
225                                   bit IsCommutable = 0,
226                                   bit IsKCommutable = 0,
227                                   bit IsKZCommutable = IsCommutable> :
228   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
229                          AttSrcAsm, IntelSrcAsm,
230                          [(set _.RC:$dst, RHS)],
231                          [(set _.RC:$dst, MaskingRHS)],
232                          [(set _.RC:$dst,
233                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
234                          MaskingConstraint, IsCommutable,
235                          IsKCommutable, IsKZCommutable>;
237 // This multiclass generates the unconditional/non-masking, the masking and
238 // the zero-masking variant of the vector instruction.  In the masking case, the
239 // perserved vector elements come from a new dummy input operand tied to $dst.
240 // This version uses a separate dag for non-masking and masking.
241 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
242                            dag Outs, dag Ins, string OpcodeStr,
243                            string AttSrcAsm, string IntelSrcAsm,
244                            dag RHS, dag MaskRHS,
245                            bit IsCommutable = 0, bit IsKCommutable = 0,
246                            SDNode Select = vselect> :
247    AVX512_maskable_custom<O, F, Outs, Ins,
248                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
249                           !con((ins _.KRCWM:$mask), Ins),
250                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
251                           [(set _.RC:$dst, RHS)],
252                           [(set _.RC:$dst,
253                               (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
254                           [(set _.RC:$dst,
255                               (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
256                           "$src0 = $dst", IsCommutable, IsKCommutable>;
258 // This multiclass generates the unconditional/non-masking, the masking and
259 // the zero-masking variant of the vector instruction.  In the masking case, the
260 // perserved vector elements come from a new dummy input operand tied to $dst.
261 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
262                            dag Outs, dag Ins, string OpcodeStr,
263                            string AttSrcAsm, string IntelSrcAsm,
264                            dag RHS,
265                            bit IsCommutable = 0, bit IsKCommutable = 0,
266                            bit IsKZCommutable = IsCommutable,
267                            SDNode Select = vselect> :
268    AVX512_maskable_common<O, F, _, Outs, Ins,
269                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
270                           !con((ins _.KRCWM:$mask), Ins),
271                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
272                           (Select _.KRCWM:$mask, RHS, _.RC:$src0),
273                           Select, "$src0 = $dst", IsCommutable, IsKCommutable,
274                           IsKZCommutable>;
276 // This multiclass generates the unconditional/non-masking, the masking and
277 // the zero-masking variant of the scalar instruction.
278 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
279                            dag Outs, dag Ins, string OpcodeStr,
280                            string AttSrcAsm, string IntelSrcAsm,
281                            dag RHS> :
282    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
283                    RHS, 0, 0, 0, X86selects>;
285 // Similar to AVX512_maskable but in this case one of the source operands
286 // ($src1) is already tied to $dst so we just use that for the preserved
287 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
288 // $src1.
289 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
290                                 dag Outs, dag NonTiedIns, string OpcodeStr,
291                                 string AttSrcAsm, string IntelSrcAsm,
292                                 dag RHS,
293                                 bit IsCommutable = 0,
294                                 bit IsKCommutable = 0,
295                                 SDNode Select = vselect,
296                                 bit MaskOnly = 0> :
297    AVX512_maskable_common<O, F, _, Outs,
298                           !con((ins _.RC:$src1), NonTiedIns),
299                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
301                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
302                           !if(MaskOnly, (null_frag), RHS),
303                           (Select _.KRCWM:$mask, RHS, _.RC:$src1),
304                           Select, "", IsCommutable, IsKCommutable>;
306 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
307 // operand differs from the output VT. This requires a bitconvert on
308 // the preserved vector going into the vselect.
309 // NOTE: The unmasked pattern is disabled.
310 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
311                                      X86VectorVTInfo InVT,
312                                      dag Outs, dag NonTiedIns, string OpcodeStr,
313                                      string AttSrcAsm, string IntelSrcAsm,
314                                      dag RHS, bit IsCommutable = 0> :
315    AVX512_maskable_common<O, F, OutVT, Outs,
316                           !con((ins InVT.RC:$src1), NonTiedIns),
317                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
319                           OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
320                           (vselect InVT.KRCWM:$mask, RHS,
321                            (bitconvert InVT.RC:$src1)),
322                            vselect, "", IsCommutable>;
324 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
325                                      dag Outs, dag NonTiedIns, string OpcodeStr,
326                                      string AttSrcAsm, string IntelSrcAsm,
327                                      dag RHS,
328                                      bit IsCommutable = 0,
329                                      bit IsKCommutable = 0,
330                                      bit MaskOnly = 0> :
331    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
332                         IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
333                         X86selects, MaskOnly>;
335 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
336                                   dag Outs, dag Ins,
337                                   string OpcodeStr,
338                                   string AttSrcAsm, string IntelSrcAsm,
339                                   list<dag> Pattern> :
340    AVX512_maskable_custom<O, F, Outs, Ins,
341                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
342                           !con((ins _.KRCWM:$mask), Ins),
343                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
344                           "$src0 = $dst">;
346 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
347                                        dag Outs, dag NonTiedIns,
348                                        string OpcodeStr,
349                                        string AttSrcAsm, string IntelSrcAsm,
350                                        list<dag> Pattern> :
351    AVX512_maskable_custom<O, F, Outs,
352                           !con((ins _.RC:$src1), NonTiedIns),
353                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
355                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
356                           "">;
358 // Instruction with mask that puts result in mask register,
359 // like "compare" and "vptest"
360 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
361                                   dag Outs,
362                                   dag Ins, dag MaskingIns,
363                                   string OpcodeStr,
364                                   string AttSrcAsm, string IntelSrcAsm,
365                                   list<dag> Pattern,
366                                   list<dag> MaskingPattern,
367                                   bit IsCommutable = 0> {
368     let isCommutable = IsCommutable in {
369     def NAME: AVX512<O, F, Outs, Ins,
370                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
371                                      "$dst, "#IntelSrcAsm#"}",
372                        Pattern>;
374     def NAME#k: AVX512<O, F, Outs, MaskingIns,
375                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
376                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
377                        MaskingPattern>, EVEX_K;
378     }
381 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
382                                   dag Outs,
383                                   dag Ins, dag MaskingIns,
384                                   string OpcodeStr,
385                                   string AttSrcAsm, string IntelSrcAsm,
386                                   dag RHS, dag MaskingRHS,
387                                   bit IsCommutable = 0> :
388   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
389                          AttSrcAsm, IntelSrcAsm,
390                          [(set _.KRC:$dst, RHS)],
391                          [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
393 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
394                            dag Outs, dag Ins, string OpcodeStr,
395                            string AttSrcAsm, string IntelSrcAsm,
396                            dag RHS, dag RHS_su, bit IsCommutable = 0> :
397    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
398                           !con((ins _.KRCWM:$mask), Ins),
399                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
400                           (and _.KRCWM:$mask, RHS_su), IsCommutable>;
403 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
404 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
405 // swizzled by ExecutionDomainFix to pxor.
406 // We set canFoldAsLoad because this can be converted to a constant-pool
407 // load of an all-zeros value if folding it would be beneficial.
408 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
409     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
410 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
411                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
412 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
413                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
416 let Predicates = [HasAVX512] in {
417 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
418 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
419 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
420 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
421 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
424 // Alias instructions that allow VPTERNLOG to be used with a mask to create
425 // a mix of all ones and all zeros elements. This is done this way to force
426 // the same register to be used as input for all three sources.
427 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
428 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
429                                 (ins VK16WM:$mask), "",
430                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
431                                                       (v16i32 immAllOnesV),
432                                                       (v16i32 immAllZerosV)))]>;
433 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
434                                 (ins VK8WM:$mask), "",
435                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
436                                            (v8i64 immAllOnesV),
437                                            (v8i64 immAllZerosV)))]>;
440 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
441     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
442 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
443                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
444 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
445                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
448 let Predicates = [HasAVX512] in {
449 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
450 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
451 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
452 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
453 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
454 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
455 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
456 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
457 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
458 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
461 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
462 // This is expanded by ExpandPostRAPseudos.
463 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
464     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
465   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
466                           [(set FR32X:$dst, fp32imm0)]>;
467   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
468                           [(set FR64X:$dst, fp64imm0)]>;
469   def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
470                             [(set VR128X:$dst, fp128imm0)]>;
473 //===----------------------------------------------------------------------===//
474 // AVX-512 - VECTOR INSERT
477 // Supports two different pattern operators for mask and unmasked ops. Allows
478 // null_frag to be passed for one.
479 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
480                                   X86VectorVTInfo To,
481                                   SDPatternOperator vinsert_insert,
482                                   SDPatternOperator vinsert_for_mask,
483                                   X86FoldableSchedWrite sched> {
484   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
485     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
486                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
487                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
488                    "$src3, $src2, $src1", "$src1, $src2, $src3",
489                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
490                                          (From.VT From.RC:$src2),
491                                          (iPTR imm)),
492                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
493                                            (From.VT From.RC:$src2),
494                                            (iPTR imm))>,
495                    AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
496     let mayLoad = 1 in
497     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
498                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
499                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
500                    "$src3, $src2, $src1", "$src1, $src2, $src3",
501                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
502                                (From.VT (From.LdFrag addr:$src2)),
503                                (iPTR imm)),
504                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
505                                (From.VT (From.LdFrag addr:$src2)),
506                                (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
507                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
508                    Sched<[sched.Folded, sched.ReadAfterFold]>;
509   }
512 // Passes the same pattern operator for masked and unmasked ops.
513 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
514                             X86VectorVTInfo To,
515                             SDPatternOperator vinsert_insert,
516                             X86FoldableSchedWrite sched> :
517   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
519 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
520                        X86VectorVTInfo To, PatFrag vinsert_insert,
521                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
522   let Predicates = p in {
523     def : Pat<(vinsert_insert:$ins
524                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
525               (To.VT (!cast<Instruction>(InstrStr#"rr")
526                      To.RC:$src1, From.RC:$src2,
527                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
529     def : Pat<(vinsert_insert:$ins
530                   (To.VT To.RC:$src1),
531                   (From.VT (From.LdFrag addr:$src2)),
532                   (iPTR imm)),
533               (To.VT (!cast<Instruction>(InstrStr#"rm")
534                   To.RC:$src1, addr:$src2,
535                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
536   }
539 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
540                             ValueType EltVT64, int Opcode256,
541                             X86FoldableSchedWrite sched> {
543   let Predicates = [HasVLX] in
544     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
545                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
546                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
547                                  vinsert128_insert, sched>, EVEX_V256;
549   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
550                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
551                                  X86VectorVTInfo<16, EltVT32, VR512>,
552                                  vinsert128_insert, sched>, EVEX_V512;
554   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
555                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
556                                  X86VectorVTInfo< 8, EltVT64, VR512>,
557                                  vinsert256_insert, sched>, VEX_W, EVEX_V512;
559   // Even with DQI we'd like to only use these instructions for masking.
560   let Predicates = [HasVLX, HasDQI] in
561     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
562                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
563                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
564                                    null_frag, vinsert128_insert, sched>,
565                                    VEX_W1X, EVEX_V256;
567   // Even with DQI we'd like to only use these instructions for masking.
568   let Predicates = [HasDQI] in {
569     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
570                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
571                                  X86VectorVTInfo< 8, EltVT64, VR512>,
572                                  null_frag, vinsert128_insert, sched>,
573                                  VEX_W, EVEX_V512;
575     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
576                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
577                                    X86VectorVTInfo<16, EltVT32, VR512>,
578                                    null_frag, vinsert256_insert, sched>,
579                                    EVEX_V512;
580   }
583 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
584 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
585 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
587 // Codegen pattern with the alternative types,
588 // Even with AVX512DQ we'll still use these for unmasked operations.
589 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
590               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
591 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
592               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
594 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
595               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
596 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
597               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
599 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
600               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
601 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
602               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
604 // Codegen pattern with the alternative types insert VEC128 into VEC256
605 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
606               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
607 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
608               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
609 // Codegen pattern with the alternative types insert VEC128 into VEC512
610 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
611               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
612 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
613                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
614 // Codegen pattern with the alternative types insert VEC256 into VEC512
615 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
616               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
617 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
618               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
621 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
622                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
623                                  PatFrag vinsert_insert,
624                                  SDNodeXForm INSERT_get_vinsert_imm,
625                                  list<Predicate> p> {
626 let Predicates = p in {
627   def : Pat<(Cast.VT
628              (vselect Cast.KRCWM:$mask,
629                       (bitconvert
630                        (vinsert_insert:$ins (To.VT To.RC:$src1),
631                                             (From.VT From.RC:$src2),
632                                             (iPTR imm))),
633                       Cast.RC:$src0)),
634             (!cast<Instruction>(InstrStr#"rrk")
635              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
636              (INSERT_get_vinsert_imm To.RC:$ins))>;
637   def : Pat<(Cast.VT
638              (vselect Cast.KRCWM:$mask,
639                       (bitconvert
640                        (vinsert_insert:$ins (To.VT To.RC:$src1),
641                                             (From.VT
642                                              (bitconvert
643                                               (From.LdFrag addr:$src2))),
644                                             (iPTR imm))),
645                       Cast.RC:$src0)),
646             (!cast<Instruction>(InstrStr#"rmk")
647              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
648              (INSERT_get_vinsert_imm To.RC:$ins))>;
650   def : Pat<(Cast.VT
651              (vselect Cast.KRCWM:$mask,
652                       (bitconvert
653                        (vinsert_insert:$ins (To.VT To.RC:$src1),
654                                             (From.VT From.RC:$src2),
655                                             (iPTR imm))),
656                       Cast.ImmAllZerosV)),
657             (!cast<Instruction>(InstrStr#"rrkz")
658              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659              (INSERT_get_vinsert_imm To.RC:$ins))>;
660   def : Pat<(Cast.VT
661              (vselect Cast.KRCWM:$mask,
662                       (bitconvert
663                        (vinsert_insert:$ins (To.VT To.RC:$src1),
664                                             (From.VT (From.LdFrag addr:$src2)),
665                                             (iPTR imm))),
666                       Cast.ImmAllZerosV)),
667             (!cast<Instruction>(InstrStr#"rmkz")
668              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
669              (INSERT_get_vinsert_imm To.RC:$ins))>;
673 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
674                              v8f32x_info, vinsert128_insert,
675                              INSERT_get_vinsert128_imm, [HasVLX]>;
676 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
677                              v4f64x_info, vinsert128_insert,
678                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
680 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
681                              v8i32x_info, vinsert128_insert,
682                              INSERT_get_vinsert128_imm, [HasVLX]>;
683 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
684                              v8i32x_info, vinsert128_insert,
685                              INSERT_get_vinsert128_imm, [HasVLX]>;
686 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
687                              v8i32x_info, vinsert128_insert,
688                              INSERT_get_vinsert128_imm, [HasVLX]>;
689 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
690                              v4i64x_info, vinsert128_insert,
691                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
692 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
693                              v4i64x_info, vinsert128_insert,
694                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
695 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
696                              v4i64x_info, vinsert128_insert,
697                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
699 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
700                              v16f32_info, vinsert128_insert,
701                              INSERT_get_vinsert128_imm, [HasAVX512]>;
702 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
703                              v8f64_info, vinsert128_insert,
704                              INSERT_get_vinsert128_imm, [HasDQI]>;
706 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
707                              v16i32_info, vinsert128_insert,
708                              INSERT_get_vinsert128_imm, [HasAVX512]>;
709 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
710                              v16i32_info, vinsert128_insert,
711                              INSERT_get_vinsert128_imm, [HasAVX512]>;
712 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
713                              v16i32_info, vinsert128_insert,
714                              INSERT_get_vinsert128_imm, [HasAVX512]>;
715 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
716                              v8i64_info, vinsert128_insert,
717                              INSERT_get_vinsert128_imm, [HasDQI]>;
718 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
719                              v8i64_info, vinsert128_insert,
720                              INSERT_get_vinsert128_imm, [HasDQI]>;
721 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
722                              v8i64_info, vinsert128_insert,
723                              INSERT_get_vinsert128_imm, [HasDQI]>;
725 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
726                              v16f32_info, vinsert256_insert,
727                              INSERT_get_vinsert256_imm, [HasDQI]>;
728 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
729                              v8f64_info, vinsert256_insert,
730                              INSERT_get_vinsert256_imm, [HasAVX512]>;
732 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
733                              v16i32_info, vinsert256_insert,
734                              INSERT_get_vinsert256_imm, [HasDQI]>;
735 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
736                              v16i32_info, vinsert256_insert,
737                              INSERT_get_vinsert256_imm, [HasDQI]>;
738 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
739                              v16i32_info, vinsert256_insert,
740                              INSERT_get_vinsert256_imm, [HasDQI]>;
741 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
742                              v8i64_info, vinsert256_insert,
743                              INSERT_get_vinsert256_imm, [HasAVX512]>;
744 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
745                              v8i64_info, vinsert256_insert,
746                              INSERT_get_vinsert256_imm, [HasAVX512]>;
747 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
748                              v8i64_info, vinsert256_insert,
749                              INSERT_get_vinsert256_imm, [HasAVX512]>;
751 // vinsertps - insert f32 to XMM
752 let ExeDomain = SSEPackedSingle in {
753 let isCommutable = 1 in
754 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
755       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
756       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
757       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
758       EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
759 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
760       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
761       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
762       [(set VR128X:$dst, (X86insertps VR128X:$src1,
763                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
764                           timm:$src3))]>,
765       EVEX_4V, EVEX_CD8<32, CD8VT1>,
766       Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
769 //===----------------------------------------------------------------------===//
770 // AVX-512 VECTOR EXTRACT
771 //---
773 // Supports two different pattern operators for mask and unmasked ops. Allows
774 // null_frag to be passed for one.
775 multiclass vextract_for_size_split<int Opcode,
776                                    X86VectorVTInfo From, X86VectorVTInfo To,
777                                    SDPatternOperator vextract_extract,
778                                    SDPatternOperator vextract_for_mask,
779                                    SchedWrite SchedRR, SchedWrite SchedMR> {
781   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
782     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
783                 (ins From.RC:$src1, u8imm:$idx),
784                 "vextract" # To.EltTypeName # "x" # To.NumElts,
785                 "$idx, $src1", "$src1, $idx",
786                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
787                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
788                 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
790     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
791                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
792                     "vextract" # To.EltTypeName # "x" # To.NumElts #
793                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
794                     [(store (To.VT (vextract_extract:$idx
795                                     (From.VT From.RC:$src1), (iPTR imm))),
796                              addr:$dst)]>, EVEX,
797                     Sched<[SchedMR]>;
799     let mayStore = 1, hasSideEffects = 0 in
800     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
801                     (ins To.MemOp:$dst, To.KRCWM:$mask,
802                                         From.RC:$src1, u8imm:$idx),
803                      "vextract" # To.EltTypeName # "x" # To.NumElts #
804                           "\t{$idx, $src1, $dst {${mask}}|"
805                           "$dst {${mask}}, $src1, $idx}", []>,
806                     EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
807   }
810 // Passes the same pattern operator for masked and unmasked ops.
811 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
812                              X86VectorVTInfo To,
813                              SDPatternOperator vextract_extract,
814                              SchedWrite SchedRR, SchedWrite SchedMR> :
815   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
817 // Codegen pattern for the alternative types
818 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
819                 X86VectorVTInfo To, PatFrag vextract_extract,
820                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
821   let Predicates = p in {
822      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
823                (To.VT (!cast<Instruction>(InstrStr#"rr")
824                           From.RC:$src1,
825                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
826      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
827                               (iPTR imm))), addr:$dst),
828                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
829                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
830   }
833 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
834                              ValueType EltVT64, int Opcode256,
835                              SchedWrite SchedRR, SchedWrite SchedMR> {
836   let Predicates = [HasAVX512] in {
837     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
838                                    X86VectorVTInfo<16, EltVT32, VR512>,
839                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
840                                    vextract128_extract, SchedRR, SchedMR>,
841                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
842     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
843                                    X86VectorVTInfo< 8, EltVT64, VR512>,
844                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
845                                    vextract256_extract, SchedRR, SchedMR>,
846                                        VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
847   }
848   let Predicates = [HasVLX] in
849     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
850                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
851                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
852                                  vextract128_extract, SchedRR, SchedMR>,
853                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
855   // Even with DQI we'd like to only use these instructions for masking.
856   let Predicates = [HasVLX, HasDQI] in
857     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
858                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
859                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
860                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
861                                      VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
863   // Even with DQI we'd like to only use these instructions for masking.
864   let Predicates = [HasDQI] in {
865     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
866                                  X86VectorVTInfo< 8, EltVT64, VR512>,
867                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
868                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
869                                      VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
870     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
871                                  X86VectorVTInfo<16, EltVT32, VR512>,
872                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
873                                  null_frag, vextract256_extract, SchedRR, SchedMR>,
874                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
875   }
878 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
879 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
880 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
882 // extract_subvector codegen patterns with the alternative types.
883 // Even with AVX512DQ we'll still use these for unmasked operations.
884 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
885           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
886 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
887           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
889 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
890           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
891 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
892           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
894 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
895           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
896 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
897           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
899 // Codegen pattern with the alternative types extract VEC128 from VEC256
900 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
901           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
902 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
903           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
905 // Codegen pattern with the alternative types extract VEC128 from VEC512
906 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
907                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
908 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
909                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
910 // Codegen pattern with the alternative types extract VEC256 from VEC512
911 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
912                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
913 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
914                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
917 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
918 // smaller extract to enable EVEX->VEX.
919 let Predicates = [NoVLX] in {
920 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
921           (v2i64 (VEXTRACTI128rr
922                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
923                   (iPTR 1)))>;
924 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
925           (v2f64 (VEXTRACTF128rr
926                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
927                   (iPTR 1)))>;
928 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
929           (v4i32 (VEXTRACTI128rr
930                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
931                   (iPTR 1)))>;
932 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
933           (v4f32 (VEXTRACTF128rr
934                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
935                   (iPTR 1)))>;
936 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
937           (v8i16 (VEXTRACTI128rr
938                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
939                   (iPTR 1)))>;
940 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
941           (v16i8 (VEXTRACTI128rr
942                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
943                   (iPTR 1)))>;
946 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
947 // smaller extract to enable EVEX->VEX.
948 let Predicates = [HasVLX] in {
949 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
950           (v2i64 (VEXTRACTI32x4Z256rr
951                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
952                   (iPTR 1)))>;
953 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
954           (v2f64 (VEXTRACTF32x4Z256rr
955                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
956                   (iPTR 1)))>;
957 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
958           (v4i32 (VEXTRACTI32x4Z256rr
959                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
960                   (iPTR 1)))>;
961 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
962           (v4f32 (VEXTRACTF32x4Z256rr
963                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
964                   (iPTR 1)))>;
965 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
966           (v8i16 (VEXTRACTI32x4Z256rr
967                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
968                   (iPTR 1)))>;
969 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
970           (v16i8 (VEXTRACTI32x4Z256rr
971                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
972                   (iPTR 1)))>;
976 // Additional patterns for handling a bitcast between the vselect and the
977 // extract_subvector.
978 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
979                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
980                                   PatFrag vextract_extract,
981                                   SDNodeXForm EXTRACT_get_vextract_imm,
982                                   list<Predicate> p> {
983 let Predicates = p in {
984   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
985                               (bitconvert
986                                (To.VT (vextract_extract:$ext
987                                        (From.VT From.RC:$src), (iPTR imm)))),
988                               To.RC:$src0)),
989             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
990                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
991                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
993   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
994                               (bitconvert
995                                (To.VT (vextract_extract:$ext
996                                        (From.VT From.RC:$src), (iPTR imm)))),
997                               Cast.ImmAllZerosV)),
998             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
999                       Cast.KRCWM:$mask, From.RC:$src,
1000                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1004 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1005                               v4f32x_info, vextract128_extract,
1006                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1007 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1008                               v2f64x_info, vextract128_extract,
1009                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1011 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1012                               v4i32x_info, vextract128_extract,
1013                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1014 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1015                               v4i32x_info, vextract128_extract,
1016                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1017 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1018                               v4i32x_info, vextract128_extract,
1019                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1020 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1021                               v2i64x_info, vextract128_extract,
1022                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1023 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1024                               v2i64x_info, vextract128_extract,
1025                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1026 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1027                               v2i64x_info, vextract128_extract,
1028                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1030 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1031                               v4f32x_info, vextract128_extract,
1032                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1033 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1034                               v2f64x_info, vextract128_extract,
1035                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1037 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1038                               v4i32x_info, vextract128_extract,
1039                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1040 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1041                               v4i32x_info, vextract128_extract,
1042                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1043 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1044                               v4i32x_info, vextract128_extract,
1045                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1046 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1047                               v2i64x_info, vextract128_extract,
1048                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1049 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1050                               v2i64x_info, vextract128_extract,
1051                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1052 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1053                               v2i64x_info, vextract128_extract,
1054                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1056 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1057                               v8f32x_info, vextract256_extract,
1058                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1059 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1060                               v4f64x_info, vextract256_extract,
1061                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1063 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1064                               v8i32x_info, vextract256_extract,
1065                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1067                               v8i32x_info, vextract256_extract,
1068                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1069 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1070                               v8i32x_info, vextract256_extract,
1071                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1072 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1073                               v4i64x_info, vextract256_extract,
1074                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1075 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1076                               v4i64x_info, vextract256_extract,
1077                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1078 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1079                               v4i64x_info, vextract256_extract,
1080                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1082 // vextractps - extract 32 bits from XMM
1083 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1084       (ins VR128X:$src1, u8imm:$src2),
1085       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1086       [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1087       EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1089 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1090       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1091       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1092       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1093                           addr:$dst)]>,
1094       EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1096 //===---------------------------------------------------------------------===//
1097 // AVX-512 BROADCAST
1098 //---
1099 // broadcast with a scalar argument.
1100 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1101                             string Name,
1102                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1103   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1104             (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1105              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1106   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1107                                   (X86VBroadcast SrcInfo.FRC:$src),
1108                                   DestInfo.RC:$src0)),
1109             (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1110              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1111              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1112   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1113                                   (X86VBroadcast SrcInfo.FRC:$src),
1114                                   DestInfo.ImmAllZerosV)),
1115             (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1116              DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1119 // Split version to allow mask and broadcast node to be different types. This
1120 // helps support the 32x2 broadcasts.
1121 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1122                                      string Name,
1123                                      SchedWrite SchedRR, SchedWrite SchedRM,
1124                                      X86VectorVTInfo MaskInfo,
1125                                      X86VectorVTInfo DestInfo,
1126                                      X86VectorVTInfo SrcInfo,
1127                                      bit IsConvertibleToThreeAddress,
1128                                      SDPatternOperator UnmaskedOp = X86VBroadcast,
1129                                      SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1130   let hasSideEffects = 0 in
1131   def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1132                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1133                    [(set MaskInfo.RC:$dst,
1134                      (MaskInfo.VT
1135                       (bitconvert
1136                        (DestInfo.VT
1137                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1138                    DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1139   def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1140                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1141                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1142                       "${dst} {${mask}} {z}, $src}"),
1143                       [(set MaskInfo.RC:$dst,
1144                         (vselect MaskInfo.KRCWM:$mask,
1145                          (MaskInfo.VT
1146                           (bitconvert
1147                            (DestInfo.VT
1148                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1149                          MaskInfo.ImmAllZerosV))],
1150                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1151   let Constraints = "$src0 = $dst" in
1152   def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1153                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1154                          SrcInfo.RC:$src),
1155                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1156                     "${dst} {${mask}}, $src}"),
1157                     [(set MaskInfo.RC:$dst,
1158                       (vselect MaskInfo.KRCWM:$mask,
1159                        (MaskInfo.VT
1160                         (bitconvert
1161                          (DestInfo.VT
1162                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1163                        MaskInfo.RC:$src0))],
1164                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1166   let hasSideEffects = 0, mayLoad = 1 in
1167   def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1168                    (ins SrcInfo.ScalarMemOp:$src),
1169                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1170                    [(set MaskInfo.RC:$dst,
1171                      (MaskInfo.VT
1172                       (bitconvert
1173                        (DestInfo.VT
1174                         (UnmaskedBcastOp addr:$src)))))],
1175                    DestInfo.ExeDomain>, T8PD, EVEX,
1176                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1178   def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1179                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1180                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1181                       "${dst} {${mask}} {z}, $src}"),
1182                       [(set MaskInfo.RC:$dst,
1183                         (vselect MaskInfo.KRCWM:$mask,
1184                          (MaskInfo.VT
1185                           (bitconvert
1186                            (DestInfo.VT
1187                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1188                          MaskInfo.ImmAllZerosV))],
1189                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1190                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1192   let Constraints = "$src0 = $dst",
1193       isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1194   def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1195                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1196                          SrcInfo.ScalarMemOp:$src),
1197                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1198                     "${dst} {${mask}}, $src}"),
1199                     [(set MaskInfo.RC:$dst,
1200                       (vselect MaskInfo.KRCWM:$mask,
1201                        (MaskInfo.VT
1202                         (bitconvert
1203                          (DestInfo.VT
1204                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1205                        MaskInfo.RC:$src0))],
1206                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1207                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1210 // Helper class to force mask and broadcast result to same type.
1211 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1212                                SchedWrite SchedRR, SchedWrite SchedRM,
1213                                X86VectorVTInfo DestInfo,
1214                                X86VectorVTInfo SrcInfo,
1215                                bit IsConvertibleToThreeAddress> :
1216   avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1217                             DestInfo, DestInfo, SrcInfo,
1218                             IsConvertibleToThreeAddress>;
1220 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1221                                                        AVX512VLVectorVTInfo _> {
1222   let Predicates = [HasAVX512] in {
1223     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1224                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1225               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1226                                       _.info128>,
1227               EVEX_V512;
1228   }
1230   let Predicates = [HasVLX] in {
1231     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1232                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1233                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1234                                          _.info128>,
1235                  EVEX_V256;
1236   }
1239 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1240                                                        AVX512VLVectorVTInfo _> {
1241   let Predicates = [HasAVX512] in {
1242     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1243                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1244               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1245                                       _.info128>,
1246               EVEX_V512;
1247   }
1249   let Predicates = [HasVLX] in {
1250     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1251                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1252                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1253                                          _.info128>,
1254                  EVEX_V256;
1255     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1256                                      WriteFShuffle256Ld, _.info128, _.info128, 1>,
1257                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1258                                          _.info128>,
1259                  EVEX_V128;
1260   }
1262 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1263                                        avx512vl_f32_info>;
1264 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1265                                        avx512vl_f64_info>, VEX_W1X;
1267 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1268                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1269                                     RegisterClass SrcRC> {
1270   let ExeDomain = _.ExeDomain in
1271   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1272                          (ins SrcRC:$src),
1273                          "vpbroadcast"##_.Suffix, "$src", "$src",
1274                          (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1275                          Sched<[SchedRR]>;
1278 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1279                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1280                                     RegisterClass SrcRC, SubRegIndex Subreg> {
1281   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1282   defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1283                         (outs _.RC:$dst), (ins GR32:$src),
1284                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1285                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1286                         "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1287                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1289   def : Pat <(_.VT (OpNode SrcRC:$src)),
1290              (!cast<Instruction>(Name#r)
1291               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1293   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1294              (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1295               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1297   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1298              (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1299               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1302 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1303                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1304                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1305   let Predicates = [prd] in
1306     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1307               OpNode, SrcRC, Subreg>, EVEX_V512;
1308   let Predicates = [prd, HasVLX] in {
1309     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1310               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1311     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1312               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1313   }
1316 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1317                                        SDPatternOperator OpNode,
1318                                        RegisterClass SrcRC, Predicate prd> {
1319   let Predicates = [prd] in
1320     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1321                                       SrcRC>, EVEX_V512;
1322   let Predicates = [prd, HasVLX] in {
1323     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1324                                          SrcRC>, EVEX_V256;
1325     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1326                                          SrcRC>, EVEX_V128;
1327   }
1330 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1331                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1332 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1333                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1334                        HasBWI>;
1335 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1336                                                  X86VBroadcast, GR32, HasAVX512>;
1337 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1338                                                  X86VBroadcast, GR64, HasAVX512>, VEX_W;
1340 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1341                                         AVX512VLVectorVTInfo _, Predicate prd,
1342                                         bit IsConvertibleToThreeAddress> {
1343   let Predicates = [prd] in {
1344     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1345                                    WriteShuffle256Ld, _.info512, _.info128,
1346                                    IsConvertibleToThreeAddress>,
1347                                   EVEX_V512;
1348   }
1349   let Predicates = [prd, HasVLX] in {
1350     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1351                                     WriteShuffle256Ld, _.info256, _.info128,
1352                                     IsConvertibleToThreeAddress>,
1353                                  EVEX_V256;
1354     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1355                                     WriteShuffleXLd, _.info128, _.info128,
1356                                     IsConvertibleToThreeAddress>,
1357                                  EVEX_V128;
1358   }
1361 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1362                                            avx512vl_i8_info, HasBWI, 0>;
1363 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1364                                            avx512vl_i16_info, HasBWI, 0>;
1365 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1366                                            avx512vl_i32_info, HasAVX512, 1>;
1367 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1368                                            avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1370 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1371                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1372   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1373                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1374                            (_Dst.VT (X86SubVBroadcast
1375                              (_Src.VT (_Src.LdFrag addr:$src))))>,
1376                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1377                            AVX5128IBase, EVEX;
1380 // This should be used for the AVX512DQ broadcast instructions. It disables
1381 // the unmasked patterns so that we only use the DQ instructions when masking
1382 //  is requested.
1383 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1384                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1385   let hasSideEffects = 0, mayLoad = 1 in
1386   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1387                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1388                            (null_frag),
1389                            (_Dst.VT (X86SubVBroadcast
1390                              (_Src.VT (_Src.LdFrag addr:$src))))>,
1391                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1392                            AVX5128IBase, EVEX;
1395 let Predicates = [HasAVX512] in {
1396   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1397   def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1398             (VPBROADCASTQZm addr:$src)>;
1400   // FIXME this is to handle aligned extloads from i8.
1401   def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))),
1402             (VPBROADCASTDZm addr:$src)>;
1405 let Predicates = [HasVLX] in {
1406   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1407   def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1408             (VPBROADCASTQZ128m addr:$src)>;
1409   def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1410             (VPBROADCASTQZ256m addr:$src)>;
1412   // FIXME this is to handle aligned extloads from i8.
1413   def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
1414             (VPBROADCASTDZ128m addr:$src)>;
1415   def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
1416             (VPBROADCASTDZ256m addr:$src)>;
1418 let Predicates = [HasVLX, HasBWI] in {
1419   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1420   // This means we'll encounter truncated i32 loads; match that here.
1421   def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1422             (VPBROADCASTWZ128m addr:$src)>;
1423   def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1424             (VPBROADCASTWZ256m addr:$src)>;
1425   def : Pat<(v8i16 (X86VBroadcast
1426               (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1427             (VPBROADCASTWZ128m addr:$src)>;
1428   def : Pat<(v8i16 (X86VBroadcast
1429               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1430             (VPBROADCASTWZ128m addr:$src)>;
1431   def : Pat<(v16i16 (X86VBroadcast
1432               (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1433             (VPBROADCASTWZ256m addr:$src)>;
1434   def : Pat<(v16i16 (X86VBroadcast
1435               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1436             (VPBROADCASTWZ256m addr:$src)>;
1438   // FIXME this is to handle aligned extloads from i8.
1439   def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
1440             (VPBROADCASTWZ128m addr:$src)>;
1441   def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
1442             (VPBROADCASTWZ256m addr:$src)>;
1444 let Predicates = [HasBWI] in {
1445   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1446   // This means we'll encounter truncated i32 loads; match that here.
1447   def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1448             (VPBROADCASTWZm addr:$src)>;
1449   def : Pat<(v32i16 (X86VBroadcast
1450               (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1451             (VPBROADCASTWZm addr:$src)>;
1452   def : Pat<(v32i16 (X86VBroadcast
1453               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1454             (VPBROADCASTWZm addr:$src)>;
1456   // FIXME this is to handle aligned extloads from i8.
1457   def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))),
1458             (VPBROADCASTWZm addr:$src)>;
1461 //===----------------------------------------------------------------------===//
1462 // AVX-512 BROADCAST SUBVECTORS
1465 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1466                        v16i32_info, v4i32x_info>,
1467                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1468 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1469                        v16f32_info, v4f32x_info>,
1470                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1471 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1472                        v8i64_info, v4i64x_info>, VEX_W,
1473                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1474 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1475                        v8f64_info, v4f64x_info>, VEX_W,
1476                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1478 let Predicates = [HasAVX512] in {
1479 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1480           (VBROADCASTF64X4rm addr:$src)>;
1481 def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1482           (VBROADCASTI64X4rm addr:$src)>;
1483 def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1484           (VBROADCASTI64X4rm addr:$src)>;
1485 def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1486           (VBROADCASTI64X4rm addr:$src)>;
1488 // Provide fallback in case the load node that is used in the patterns above
1489 // is used by additional users, which prevents the pattern selection.
1490 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1491           (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1492                            (v4f64 VR256X:$src), 1)>;
1493 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1494           (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1495                            (v8f32 VR256X:$src), 1)>;
1496 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1497           (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1498                            (v4i64 VR256X:$src), 1)>;
1499 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1500           (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1501                            (v8i32 VR256X:$src), 1)>;
1502 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1503           (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1504                            (v16i16 VR256X:$src), 1)>;
1505 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1506           (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1507                            (v32i8 VR256X:$src), 1)>;
1509 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1510           (VBROADCASTF32X4rm addr:$src)>;
1511 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1512           (VBROADCASTI32X4rm addr:$src)>;
1513 def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1514           (VBROADCASTI32X4rm addr:$src)>;
1515 def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1516           (VBROADCASTI32X4rm addr:$src)>;
1518 // Patterns for selects of bitcasted operations.
1519 def : Pat<(vselect VK16WM:$mask,
1520                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1521                    (v16f32 immAllZerosV)),
1522           (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1523 def : Pat<(vselect VK16WM:$mask,
1524                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1525                    VR512:$src0),
1526           (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1527 def : Pat<(vselect VK16WM:$mask,
1528                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1529                    (v16i32 immAllZerosV)),
1530           (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1531 def : Pat<(vselect VK16WM:$mask,
1532                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1533                    VR512:$src0),
1534           (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1536 def : Pat<(vselect VK8WM:$mask,
1537                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1538                    (v8f64 immAllZerosV)),
1539           (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1540 def : Pat<(vselect VK8WM:$mask,
1541                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1542                    VR512:$src0),
1543           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1544 def : Pat<(vselect VK8WM:$mask,
1545                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1546                    (v8i64 immAllZerosV)),
1547           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1548 def : Pat<(vselect VK8WM:$mask,
1549                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1550                    VR512:$src0),
1551           (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1554 let Predicates = [HasVLX] in {
1555 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1556                            v8i32x_info, v4i32x_info>,
1557                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1558 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1559                            v8f32x_info, v4f32x_info>,
1560                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1562 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1563           (VBROADCASTF32X4Z256rm addr:$src)>;
1564 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1565           (VBROADCASTI32X4Z256rm addr:$src)>;
1566 def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1567           (VBROADCASTI32X4Z256rm addr:$src)>;
1568 def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1569           (VBROADCASTI32X4Z256rm addr:$src)>;
1571 // Patterns for selects of bitcasted operations.
1572 def : Pat<(vselect VK8WM:$mask,
1573                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1574                    (v8f32 immAllZerosV)),
1575           (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1576 def : Pat<(vselect VK8WM:$mask,
1577                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1578                    VR256X:$src0),
1579           (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1580 def : Pat<(vselect VK8WM:$mask,
1581                    (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1582                    (v8i32 immAllZerosV)),
1583           (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1584 def : Pat<(vselect VK8WM:$mask,
1585                    (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1586                    VR256X:$src0),
1587           (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1590 // Provide fallback in case the load node that is used in the patterns above
1591 // is used by additional users, which prevents the pattern selection.
1592 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1593           (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1594                               (v2f64 VR128X:$src), 1)>;
1595 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1596           (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1597                               (v4f32 VR128X:$src), 1)>;
1598 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1599           (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1600                               (v2i64 VR128X:$src), 1)>;
1601 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1602           (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1603                               (v4i32 VR128X:$src), 1)>;
1604 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1605           (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1606                               (v8i16 VR128X:$src), 1)>;
1607 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1608           (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1609                               (v16i8 VR128X:$src), 1)>;
1612 let Predicates = [HasVLX, HasDQI] in {
1613 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1614                            v4i64x_info, v2i64x_info>, VEX_W1X,
1615                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1616 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1617                            v4f64x_info, v2f64x_info>, VEX_W1X,
1618                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1620 // Patterns for selects of bitcasted operations.
1621 def : Pat<(vselect VK4WM:$mask,
1622                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1623                    (v4f64 immAllZerosV)),
1624           (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1625 def : Pat<(vselect VK4WM:$mask,
1626                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1627                    VR256X:$src0),
1628           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1629 def : Pat<(vselect VK4WM:$mask,
1630                    (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1631                    (v4i64 immAllZerosV)),
1632           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1633 def : Pat<(vselect VK4WM:$mask,
1634                    (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1635                    VR256X:$src0),
1636           (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1639 let Predicates = [HasDQI] in {
1640 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1641                        v8i64_info, v2i64x_info>, VEX_W,
1642                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1643 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1644                        v16i32_info, v8i32x_info>,
1645                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1646 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1647                        v8f64_info, v2f64x_info>, VEX_W,
1648                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1649 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1650                        v16f32_info, v8f32x_info>,
1651                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1653 // Patterns for selects of bitcasted operations.
1654 def : Pat<(vselect VK16WM:$mask,
1655                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1656                    (v16f32 immAllZerosV)),
1657           (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1658 def : Pat<(vselect VK16WM:$mask,
1659                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1660                    VR512:$src0),
1661           (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1662 def : Pat<(vselect VK16WM:$mask,
1663                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1664                    (v16i32 immAllZerosV)),
1665           (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1666 def : Pat<(vselect VK16WM:$mask,
1667                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1668                    VR512:$src0),
1669           (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1671 def : Pat<(vselect VK8WM:$mask,
1672                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1673                    (v8f64 immAllZerosV)),
1674           (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1675 def : Pat<(vselect VK8WM:$mask,
1676                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1677                    VR512:$src0),
1678           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1679 def : Pat<(vselect VK8WM:$mask,
1680                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1681                    (v8i64 immAllZerosV)),
1682           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1683 def : Pat<(vselect VK8WM:$mask,
1684                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1685                    VR512:$src0),
1686           (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1689 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1690                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1691   let Predicates = [HasDQI] in
1692     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1693                                           WriteShuffle256Ld, _Dst.info512,
1694                                           _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1695                                           EVEX_V512;
1696   let Predicates = [HasDQI, HasVLX] in
1697     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1698                                           WriteShuffle256Ld, _Dst.info256,
1699                                           _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1700                                           EVEX_V256;
1703 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1704                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1705   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1707   let Predicates = [HasDQI, HasVLX] in
1708     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1709                                           WriteShuffleXLd, _Dst.info128,
1710                                           _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1711                                           EVEX_V128;
1714 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1715                                           avx512vl_i32_info, avx512vl_i64_info>;
1716 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1717                                           avx512vl_f32_info, avx512vl_f64_info>;
1719 //===----------------------------------------------------------------------===//
1720 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1721 //---
1722 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1723                                   X86VectorVTInfo _, RegisterClass KRC> {
1724   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1725                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1726                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1727                   EVEX, Sched<[WriteShuffle]>;
1730 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1731                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1732   let Predicates = [HasCDI] in
1733     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1734   let Predicates = [HasCDI, HasVLX] in {
1735     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1736     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1737   }
1740 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1741                                                avx512vl_i32_info, VK16>;
1742 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1743                                                avx512vl_i64_info, VK8>, VEX_W;
1745 //===----------------------------------------------------------------------===//
1746 // -- VPERMI2 - 3 source operands form --
1747 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1748                          X86FoldableSchedWrite sched,
1749                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1750 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1751     hasSideEffects = 0 in {
1752   defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1753           (ins _.RC:$src2, _.RC:$src3),
1754           OpcodeStr, "$src3, $src2", "$src2, $src3",
1755           (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1756           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1758   let mayLoad = 1 in
1759   defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1760             (ins _.RC:$src2, _.MemOp:$src3),
1761             OpcodeStr, "$src3, $src2", "$src2, $src3",
1762             (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1763                    (_.VT (_.LdFrag addr:$src3)))), 1>,
1764             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1765   }
1768 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1769                             X86FoldableSchedWrite sched,
1770                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1771   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1772       hasSideEffects = 0, mayLoad = 1 in
1773   defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1774               (ins _.RC:$src2, _.ScalarMemOp:$src3),
1775               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1776               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1777               (_.VT (X86VPermt2 _.RC:$src2,
1778                IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1779               AVX5128IBase, EVEX_4V, EVEX_B,
1780               Sched<[sched.Folded, sched.ReadAfterFold]>;
1783 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1784                                X86FoldableSchedWrite sched,
1785                                AVX512VLVectorVTInfo VTInfo,
1786                                AVX512VLVectorVTInfo ShuffleMask> {
1787   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1788                            ShuffleMask.info512>,
1789             avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1790                              ShuffleMask.info512>, EVEX_V512;
1791   let Predicates = [HasVLX] in {
1792   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1793                                ShuffleMask.info128>,
1794                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1795                                   ShuffleMask.info128>, EVEX_V128;
1796   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1797                                ShuffleMask.info256>,
1798                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1799                                   ShuffleMask.info256>, EVEX_V256;
1800   }
1803 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1804                                   X86FoldableSchedWrite sched,
1805                                   AVX512VLVectorVTInfo VTInfo,
1806                                   AVX512VLVectorVTInfo Idx,
1807                                   Predicate Prd> {
1808   let Predicates = [Prd] in
1809   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1810                            Idx.info512>, EVEX_V512;
1811   let Predicates = [Prd, HasVLX] in {
1812   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1813                                Idx.info128>, EVEX_V128;
1814   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1815                                Idx.info256>,  EVEX_V256;
1816   }
1819 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1820                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1821 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1822                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1823 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1824                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1825                   VEX_W, EVEX_CD8<16, CD8VF>;
1826 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1827                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1828                   EVEX_CD8<8, CD8VF>;
1829 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1830                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1831 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1832                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1834 // Extra patterns to deal with extra bitcasts due to passthru and index being
1835 // different types on the fp versions.
1836 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1837                                   X86VectorVTInfo IdxVT,
1838                                   X86VectorVTInfo CastVT> {
1839   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1840                              (X86VPermt2 (_.VT _.RC:$src2),
1841                                          (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1842                              (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1843             (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1844                                                 _.RC:$src2, _.RC:$src3)>;
1845   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1846                              (X86VPermt2 _.RC:$src2,
1847                                          (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1848                                          (_.LdFrag addr:$src3)),
1849                              (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1850             (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1851                                                 _.RC:$src2, addr:$src3)>;
1852   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1853                              (X86VPermt2 _.RC:$src2,
1854                                          (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1855                                          (_.BroadcastLdFrag addr:$src3)),
1856                              (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1857             (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1858                                                  _.RC:$src2, addr:$src3)>;
1861 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1862 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1863 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1864 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1866 // VPERMT2
1867 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1868                          X86FoldableSchedWrite sched,
1869                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1870 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1871   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1872           (ins IdxVT.RC:$src2, _.RC:$src3),
1873           OpcodeStr, "$src3, $src2", "$src2, $src3",
1874           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1875           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1877   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1878             (ins IdxVT.RC:$src2, _.MemOp:$src3),
1879             OpcodeStr, "$src3, $src2", "$src2, $src3",
1880             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1881                    (_.LdFrag addr:$src3))), 1>,
1882             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1883   }
1885 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1886                             X86FoldableSchedWrite sched,
1887                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1888   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1889   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1890               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1891               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1892               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1893               (_.VT (X86VPermt2 _.RC:$src1,
1894                IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1895               AVX5128IBase, EVEX_4V, EVEX_B,
1896               Sched<[sched.Folded, sched.ReadAfterFold]>;
1899 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1900                                X86FoldableSchedWrite sched,
1901                                AVX512VLVectorVTInfo VTInfo,
1902                                AVX512VLVectorVTInfo ShuffleMask> {
1903   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1904                               ShuffleMask.info512>,
1905             avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1906                               ShuffleMask.info512>, EVEX_V512;
1907   let Predicates = [HasVLX] in {
1908   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1909                               ShuffleMask.info128>,
1910                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1911                               ShuffleMask.info128>, EVEX_V128;
1912   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1913                               ShuffleMask.info256>,
1914                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1915                               ShuffleMask.info256>, EVEX_V256;
1916   }
1919 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1920                                   X86FoldableSchedWrite sched,
1921                                   AVX512VLVectorVTInfo VTInfo,
1922                                   AVX512VLVectorVTInfo Idx, Predicate Prd> {
1923   let Predicates = [Prd] in
1924   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1925                            Idx.info512>, EVEX_V512;
1926   let Predicates = [Prd, HasVLX] in {
1927   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1928                                Idx.info128>, EVEX_V128;
1929   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1930                                Idx.info256>, EVEX_V256;
1931   }
1934 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1935                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1936 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1937                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1938 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1939                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1940                   VEX_W, EVEX_CD8<16, CD8VF>;
1941 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1942                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1943                   EVEX_CD8<8, CD8VF>;
1944 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1945                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1946 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1947                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1949 //===----------------------------------------------------------------------===//
1950 // AVX-512 - BLEND using mask
1953 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1954                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1955   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1956   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1957              (ins _.RC:$src1, _.RC:$src2),
1958              !strconcat(OpcodeStr,
1959              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1960              EVEX_4V, Sched<[sched]>;
1961   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1962              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1963              !strconcat(OpcodeStr,
1964              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1965              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1966   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1967              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1968              !strconcat(OpcodeStr,
1969              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1970              []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1971   let mayLoad = 1 in {
1972   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1973              (ins _.RC:$src1, _.MemOp:$src2),
1974              !strconcat(OpcodeStr,
1975              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1976              []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1977              Sched<[sched.Folded, sched.ReadAfterFold]>;
1978   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1979              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1980              !strconcat(OpcodeStr,
1981              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1982              []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1983              Sched<[sched.Folded, sched.ReadAfterFold]>;
1984   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1985              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1986              !strconcat(OpcodeStr,
1987              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1988              []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1989              Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1990   }
1991   }
1993 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1994                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1995   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1996   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1997       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1998        !strconcat(OpcodeStr,
1999             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2000             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2001       EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2002       Sched<[sched.Folded, sched.ReadAfterFold]>;
2004   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2005       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2006        !strconcat(OpcodeStr,
2007             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2008             "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2009       EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2010       Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2012   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2013       (ins _.RC:$src1, _.ScalarMemOp:$src2),
2014        !strconcat(OpcodeStr,
2015             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2016             "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2017       EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2018       Sched<[sched.Folded, sched.ReadAfterFold]>;
2019   }
2022 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2023                         AVX512VLVectorVTInfo VTInfo> {
2024   defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2025            WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2026                                  EVEX_V512;
2028   let Predicates = [HasVLX] in {
2029     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2030                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2031                                       EVEX_V256;
2032     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2033                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2034                                       EVEX_V128;
2035   }
2038 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2039                         AVX512VLVectorVTInfo VTInfo> {
2040   let Predicates = [HasBWI] in
2041     defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2042                                EVEX_V512;
2044   let Predicates = [HasBWI, HasVLX] in {
2045     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2046                                   EVEX_V256;
2047     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2048                                   EVEX_V128;
2049   }
2052 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2053                               avx512vl_f32_info>;
2054 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2055                               avx512vl_f64_info>, VEX_W;
2056 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2057                               avx512vl_i32_info>;
2058 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2059                               avx512vl_i64_info>, VEX_W;
2060 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2061                               avx512vl_i8_info>;
2062 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2063                               avx512vl_i16_info>, VEX_W;
2065 //===----------------------------------------------------------------------===//
2066 // Compare Instructions
2067 //===----------------------------------------------------------------------===//
2069 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2071 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2072                              PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2073                              X86FoldableSchedWrite sched> {
2074   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2075                       (outs _.KRC:$dst),
2076                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2077                       "vcmp"#_.Suffix,
2078                       "$cc, $src2, $src1", "$src1, $src2, $cc",
2079                       (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2080                       (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2081                                  timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
2082   let mayLoad = 1 in
2083   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2084                     (outs _.KRC:$dst),
2085                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2086                     "vcmp"#_.Suffix,
2087                     "$cc, $src2, $src1", "$src1, $src2, $cc",
2088                     (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2089                         timm:$cc),
2090                     (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2091                         timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2092                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2094   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2095                      (outs _.KRC:$dst),
2096                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2097                      "vcmp"#_.Suffix,
2098                      "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2099                      (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2100                                 timm:$cc),
2101                      (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2102                                    timm:$cc)>,
2103                      EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2105   let isCodeGenOnly = 1 in {
2106     let isCommutable = 1 in
2107     def rr : AVX512Ii8<0xC2, MRMSrcReg,
2108                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2109                 !strconcat("vcmp", _.Suffix,
2110                            "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2111                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2112                                           _.FRC:$src2,
2113                                           timm:$cc))]>,
2114                 EVEX_4V, VEX_LIG, Sched<[sched]>;
2115     def rm : AVX512Ii8<0xC2, MRMSrcMem,
2116               (outs _.KRC:$dst),
2117               (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2118               !strconcat("vcmp", _.Suffix,
2119                          "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2120               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2121                                         (_.ScalarLdFrag addr:$src2),
2122                                         timm:$cc))]>,
2123               EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2124               Sched<[sched.Folded, sched.ReadAfterFold]>;
2125   }
2128 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2129                           (X86cmpms node:$src1, node:$src2, node:$cc), [{
2130   return N->hasOneUse();
2131 }]>;
2132 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2133                           (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2134   return N->hasOneUse();
2135 }]>;
2137 let Predicates = [HasAVX512] in {
2138   let ExeDomain = SSEPackedSingle in
2139   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2140                                    X86cmpms_su, X86cmpmsSAE_su,
2141                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2142   let ExeDomain = SSEPackedDouble in
2143   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2144                                    X86cmpms_su, X86cmpmsSAE_su,
2145                                    SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2148 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2149                               X86FoldableSchedWrite sched,
2150                               X86VectorVTInfo _, bit IsCommutable> {
2151   let isCommutable = IsCommutable, hasSideEffects = 0 in
2152   def rr : AVX512BI<opc, MRMSrcReg,
2153              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2154              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2155              []>, EVEX_4V, Sched<[sched]>;
2156   let mayLoad = 1, hasSideEffects = 0 in
2157   def rm : AVX512BI<opc, MRMSrcMem,
2158              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2159              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2160              []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2161   let isCommutable = IsCommutable, hasSideEffects = 0 in
2162   def rrk : AVX512BI<opc, MRMSrcReg,
2163               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2164               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2165                           "$dst {${mask}}, $src1, $src2}"),
2166               []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2167   let mayLoad = 1, hasSideEffects = 0 in
2168   def rmk : AVX512BI<opc, MRMSrcMem,
2169               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2170               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2171                           "$dst {${mask}}, $src1, $src2}"),
2172               []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2175 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2176                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
2177                                   bit IsCommutable> :
2178            avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2179   let mayLoad = 1, hasSideEffects = 0 in {
2180   def rmb : AVX512BI<opc, MRMSrcMem,
2181               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2182               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2183                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2184               []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2185   def rmbk : AVX512BI<opc, MRMSrcMem,
2186                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2187                                        _.ScalarMemOp:$src2),
2188                !strconcat(OpcodeStr,
2189                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2190                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2191                []>, EVEX_4V, EVEX_K, EVEX_B,
2192                Sched<[sched.Folded, sched.ReadAfterFold]>;
2193   }
2196 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2197                                  X86SchedWriteWidths sched,
2198                                  AVX512VLVectorVTInfo VTInfo, Predicate prd,
2199                                  bit IsCommutable = 0> {
2200   let Predicates = [prd] in
2201   defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2202                               VTInfo.info512, IsCommutable>, EVEX_V512;
2204   let Predicates = [prd, HasVLX] in {
2205     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2206                                    VTInfo.info256, IsCommutable>, EVEX_V256;
2207     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2208                                    VTInfo.info128, IsCommutable>, EVEX_V128;
2209   }
2212 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2213                                      X86SchedWriteWidths sched,
2214                                      AVX512VLVectorVTInfo VTInfo,
2215                                      Predicate prd, bit IsCommutable = 0> {
2216   let Predicates = [prd] in
2217   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2218                                   VTInfo.info512, IsCommutable>, EVEX_V512;
2220   let Predicates = [prd, HasVLX] in {
2221     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2222                                        VTInfo.info256, IsCommutable>, EVEX_V256;
2223     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2224                                        VTInfo.info128, IsCommutable>, EVEX_V128;
2225   }
2228 // This fragment treats X86cmpm as commutable to help match loads in both
2229 // operands for PCMPEQ.
2230 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2231 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2232                          (setcc node:$src1, node:$src2, SETGT)>;
2234 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2235 // increase the pattern complexity the way an immediate would.
2236 let AddedComplexity = 2 in {
2237 // FIXME: Is there a better scheduler class for VPCMP?
2238 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2239                       SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2240                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2242 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2243                       SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2244                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2246 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2247                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2248                 EVEX_CD8<32, CD8VF>;
2250 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2251                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2252                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2254 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2255                       SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2256                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2258 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2259                       SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2260                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2262 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2263                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2264                 EVEX_CD8<32, CD8VF>;
2266 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2267                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2268                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2271 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2272                           PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2273                           X86FoldableSchedWrite sched,
2274                           X86VectorVTInfo _, string Name> {
2275   let isCommutable = 1 in
2276   def rri : AVX512AIi8<opc, MRMSrcReg,
2277              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2278              !strconcat("vpcmp", Suffix,
2279                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2280              [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2281                                                 (_.VT _.RC:$src2),
2282                                                 cond)))]>,
2283              EVEX_4V, Sched<[sched]>;
2284   def rmi : AVX512AIi8<opc, MRMSrcMem,
2285              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2286              !strconcat("vpcmp", Suffix,
2287                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2288              [(set _.KRC:$dst, (_.KVT
2289                                 (Frag:$cc
2290                                  (_.VT _.RC:$src1),
2291                                  (_.VT (_.LdFrag addr:$src2)),
2292                                  cond)))]>,
2293              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2294   let isCommutable = 1 in
2295   def rrik : AVX512AIi8<opc, MRMSrcReg,
2296               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2297                                       u8imm:$cc),
2298               !strconcat("vpcmp", Suffix,
2299                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2300                          "$dst {${mask}}, $src1, $src2, $cc}"),
2301               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2302                                      (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2303                                                          (_.VT _.RC:$src2),
2304                                                          cond))))]>,
2305               EVEX_4V, EVEX_K, Sched<[sched]>;
2306   def rmik : AVX512AIi8<opc, MRMSrcMem,
2307               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2308                                     u8imm:$cc),
2309               !strconcat("vpcmp", Suffix,
2310                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2311                          "$dst {${mask}}, $src1, $src2, $cc}"),
2312               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2313                                      (_.KVT
2314                                       (Frag_su:$cc
2315                                        (_.VT _.RC:$src1),
2316                                        (_.VT (_.LdFrag addr:$src2)),
2317                                        cond))))]>,
2318               EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2320   def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2321                                  (_.VT _.RC:$src1), cond)),
2322             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2323              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2325   def : Pat<(and _.KRCWM:$mask,
2326                  (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2327                                       (_.VT _.RC:$src1), cond))),
2328             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2329              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2330              (CommFrag.OperandTransform $cc))>;
2333 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2334                               PatFrag Frag_su, PatFrag CommFrag,
2335                               PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2336                               X86VectorVTInfo _, string Name> :
2337            avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2338                           sched, _, Name> {
2339   def rmib : AVX512AIi8<opc, MRMSrcMem,
2340              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2341                                      u8imm:$cc),
2342              !strconcat("vpcmp", Suffix,
2343                         "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2344                         "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2345              [(set _.KRC:$dst, (_.KVT (Frag:$cc
2346                                        (_.VT _.RC:$src1),
2347                                        (_.BroadcastLdFrag addr:$src2),
2348                                        cond)))]>,
2349              EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2350   def rmibk : AVX512AIi8<opc, MRMSrcMem,
2351               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2352                                        _.ScalarMemOp:$src2, u8imm:$cc),
2353               !strconcat("vpcmp", Suffix,
2354                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2355                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2356               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2357                                      (_.KVT (Frag_su:$cc
2358                                              (_.VT _.RC:$src1),
2359                                              (_.BroadcastLdFrag addr:$src2),
2360                                              cond))))]>,
2361               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2363   def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2),
2364                     (_.VT _.RC:$src1), cond)),
2365             (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2366              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2368   def : Pat<(and _.KRCWM:$mask,
2369                  (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2),
2370                                       (_.VT _.RC:$src1), cond))),
2371             (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2372              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2373              (CommFrag_su.OperandTransform $cc))>;
2376 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2377                              PatFrag Frag_su, PatFrag CommFrag,
2378                              PatFrag CommFrag_su, X86SchedWriteWidths sched,
2379                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2380   let Predicates = [prd] in
2381   defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2382                           sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2384   let Predicates = [prd, HasVLX] in {
2385     defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2386                                sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2387     defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2388                                sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2389   }
2392 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2393                                  PatFrag Frag_su, PatFrag CommFrag,
2394                                  PatFrag CommFrag_su, X86SchedWriteWidths sched,
2395                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2396   let Predicates = [prd] in
2397   defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2398                               sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2400   let Predicates = [prd, HasVLX] in {
2401     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2402                                    sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2403     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2404                                    sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2405   }
2408 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2409   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2410   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2411   return getI8Imm(SSECC, SDLoc(N));
2412 }]>;
2414 // Swapped operand version of the above.
2415 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2416   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2417   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2418   SSECC = X86::getSwappedVPCMPImm(SSECC);
2419   return getI8Imm(SSECC, SDLoc(N));
2420 }]>;
2422 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2423                        (setcc node:$src1, node:$src2, node:$cc), [{
2424   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2425   return !ISD::isUnsignedIntSetCC(CC);
2426 }], X86pcmpm_imm>;
2428 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2429                           (setcc node:$src1, node:$src2, node:$cc), [{
2430   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2431   return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2432 }], X86pcmpm_imm>;
2434 // Same as above, but commutes immediate. Use for load folding.
2435 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2436                                (setcc node:$src1, node:$src2, node:$cc), [{
2437   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2438   return !ISD::isUnsignedIntSetCC(CC);
2439 }], X86pcmpm_imm_commute>;
2441 def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2442                                   (setcc node:$src1, node:$src2, node:$cc), [{
2443   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2444   return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2445 }], X86pcmpm_imm_commute>;
2447 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2448                         (setcc node:$src1, node:$src2, node:$cc), [{
2449   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2450   return ISD::isUnsignedIntSetCC(CC);
2451 }], X86pcmpm_imm>;
2453 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2454                            (setcc node:$src1, node:$src2, node:$cc), [{
2455   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2456   return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2457 }], X86pcmpm_imm>;
2459 // Same as above, but commutes immediate. Use for load folding.
2460 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2461                                 (setcc node:$src1, node:$src2, node:$cc), [{
2462   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2463   return ISD::isUnsignedIntSetCC(CC);
2464 }], X86pcmpm_imm_commute>;
2466 def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2467                                    (setcc node:$src1, node:$src2, node:$cc), [{
2468   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2469   return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2470 }], X86pcmpm_imm_commute>;
2472 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2473 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2474                                 X86pcmpm_commute, X86pcmpm_commute_su,
2475                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2476                                 EVEX_CD8<8, CD8VF>;
2477 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2478                                  X86pcmpum_commute, X86pcmpum_commute_su,
2479                                  SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2480                                  EVEX_CD8<8, CD8VF>;
2482 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2483                                 X86pcmpm_commute, X86pcmpm_commute_su,
2484                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2485                                 VEX_W, EVEX_CD8<16, CD8VF>;
2486 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2487                                  X86pcmpum_commute, X86pcmpum_commute_su,
2488                                  SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2489                                  VEX_W, EVEX_CD8<16, CD8VF>;
2491 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2492                                     X86pcmpm_commute, X86pcmpm_commute_su,
2493                                     SchedWriteVecALU, avx512vl_i32_info,
2494                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2495 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2496                                      X86pcmpum_commute, X86pcmpum_commute_su,
2497                                      SchedWriteVecALU, avx512vl_i32_info,
2498                                      HasAVX512>, EVEX_CD8<32, CD8VF>;
2500 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2501                                     X86pcmpm_commute, X86pcmpm_commute_su,
2502                                     SchedWriteVecALU, avx512vl_i64_info,
2503                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2504 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2505                                      X86pcmpum_commute, X86pcmpum_commute_su,
2506                                      SchedWriteVecALU, avx512vl_i64_info,
2507                                      HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2509 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2510                          (X86cmpm node:$src1, node:$src2, node:$cc), [{
2511   return N->hasOneUse();
2512 }]>;
2513 def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2514                             (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2515   return N->hasOneUse();
2516 }]>;
2518 def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2519   uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2520   return getI8Imm(Imm, SDLoc(N));
2521 }]>;
2523 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2524                               string Name> {
2525   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2526                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2527                    "vcmp"#_.Suffix,
2528                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2529                    (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2530                    (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2531                    1>, Sched<[sched]>;
2533   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2534                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2535                 "vcmp"#_.Suffix,
2536                 "$cc, $src2, $src1", "$src1, $src2, $cc",
2537                 (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2538                          timm:$cc),
2539                 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2540                             timm:$cc)>,
2541                 Sched<[sched.Folded, sched.ReadAfterFold]>;
2543   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2544                 (outs _.KRC:$dst),
2545                 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2546                 "vcmp"#_.Suffix,
2547                 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2548                 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2549                 (X86cmpm (_.VT _.RC:$src1),
2550                         (_.VT (_.BroadcastLdFrag addr:$src2)),
2551                         timm:$cc),
2552                 (X86cmpm_su (_.VT _.RC:$src1),
2553                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2554                             timm:$cc)>,
2555                 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2557   // Patterns for selecting with loads in other operand.
2558   def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2559                      timm:$cc),
2560             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2561                                                       (X86cmpm_imm_commute timm:$cc))>;
2563   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2564                                             (_.VT _.RC:$src1),
2565                                             timm:$cc)),
2566             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2567                                                        _.RC:$src1, addr:$src2,
2568                                                        (X86cmpm_imm_commute timm:$cc))>;
2570   def : Pat<(X86cmpm (_.BroadcastLdFrag addr:$src2),
2571                      (_.VT _.RC:$src1), timm:$cc),
2572             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2573                                                        (X86cmpm_imm_commute timm:$cc))>;
2575   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2576                                             (_.VT _.RC:$src1),
2577                                             timm:$cc)),
2578             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2579                                                         _.RC:$src1, addr:$src2,
2580                                                         (X86cmpm_imm_commute timm:$cc))>;
2583 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2584   // comparison code form (VCMP[EQ/LT/LE/...]
2585   defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2586                      (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2587                      "vcmp"#_.Suffix,
2588                      "$cc, {sae}, $src2, $src1",
2589                      "$src1, $src2, {sae}, $cc",
2590                      (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2591                      (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2592                                     timm:$cc)>,
2593                      EVEX_B, Sched<[sched]>;
2596 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2597   let Predicates = [HasAVX512] in {
2598     defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2599                 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2601   }
2602   let Predicates = [HasAVX512,HasVLX] in {
2603    defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2604    defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2605   }
2608 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2609                           AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2610 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2611                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2613 // Patterns to select fp compares with load as first operand.
2614 let Predicates = [HasAVX512] in {
2615   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2616                             timm:$cc)),
2617             (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2619   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2620                             timm:$cc)),
2621             (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2624 // ----------------------------------------------------------------
2625 // FPClass
2627 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2628                               (X86Vfpclasss node:$src1, node:$src2), [{
2629   return N->hasOneUse();
2630 }]>;
2632 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2633                              (X86Vfpclass node:$src1, node:$src2), [{
2634   return N->hasOneUse();
2635 }]>;
2637 //handle fpclass instruction  mask =  op(reg_scalar,imm)
2638 //                                    op(mem_scalar,imm)
2639 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2640                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2641                                  Predicate prd> {
2642   let Predicates = [prd], ExeDomain = _.ExeDomain in {
2643       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2644                       (ins _.RC:$src1, i32u8imm:$src2),
2645                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2646                       [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2647                               (i32 timm:$src2)))]>,
2648                       Sched<[sched]>;
2649       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2650                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2651                       OpcodeStr##_.Suffix#
2652                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2653                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2654                                       (X86Vfpclasss_su (_.VT _.RC:$src1),
2655                                       (i32 timm:$src2))))]>,
2656                       EVEX_K, Sched<[sched]>;
2657     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2658                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2659                     OpcodeStr##_.Suffix##
2660                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2661                     [(set _.KRC:$dst,
2662                           (X86Vfpclasss _.ScalarIntMemCPat:$src1,
2663                                        (i32 timm:$src2)))]>,
2664                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2665     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2666                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2667                     OpcodeStr##_.Suffix##
2668                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2669                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
2670                         (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
2671                             (i32 timm:$src2))))]>,
2672                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2673   }
2676 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2677 //                                  fpclass(reg_vec, mem_vec, imm)
2678 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
2679 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2680                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2681                                  string mem>{
2682   let ExeDomain = _.ExeDomain in {
2683   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2684                       (ins _.RC:$src1, i32u8imm:$src2),
2685                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2686                       [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2687                                        (i32 timm:$src2)))]>,
2688                       Sched<[sched]>;
2689   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2690                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2691                       OpcodeStr##_.Suffix#
2692                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2693                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2694                                        (X86Vfpclass_su (_.VT _.RC:$src1),
2695                                        (i32 timm:$src2))))]>,
2696                       EVEX_K, Sched<[sched]>;
2697   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2698                     (ins _.MemOp:$src1, i32u8imm:$src2),
2699                     OpcodeStr##_.Suffix#"{"#mem#"}"#
2700                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2701                     [(set _.KRC:$dst,(X86Vfpclass
2702                                      (_.VT (_.LdFrag addr:$src1)),
2703                                      (i32 timm:$src2)))]>,
2704                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2705   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2706                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2707                     OpcodeStr##_.Suffix#"{"#mem#"}"#
2708                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2709                     [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2710                                   (_.VT (_.LdFrag addr:$src1)),
2711                                   (i32 timm:$src2))))]>,
2712                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2713   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2714                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2715                     OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2716                                       _.BroadcastStr##", $dst|$dst, ${src1}"
2717                                                   ##_.BroadcastStr##", $src2}",
2718                     [(set _.KRC:$dst,(X86Vfpclass
2719                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2720                                      (i32 timm:$src2)))]>,
2721                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2722   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2723                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2724                     OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2725                           _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2726                                                    _.BroadcastStr##", $src2}",
2727                     [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2728                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2729                                      (i32 timm:$src2))))]>,
2730                     EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2731   }
2733   // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2734   // the memory form.
2735   def : InstAlias<OpcodeStr#_.Suffix#mem#
2736                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2737                   (!cast<Instruction>(NAME#"rr")
2738                    _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2739   def : InstAlias<OpcodeStr#_.Suffix#mem#
2740                   "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2741                   (!cast<Instruction>(NAME#"rrk")
2742                    _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2743   def : InstAlias<OpcodeStr#_.Suffix#mem#
2744                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2745                   _.BroadcastStr#", $src2}",
2746                   (!cast<Instruction>(NAME#"rmb")
2747                    _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2748   def : InstAlias<OpcodeStr#_.Suffix#mem#
2749                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2750                   "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2751                   (!cast<Instruction>(NAME#"rmbk")
2752                    _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2755 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2756                                      bits<8> opc, X86SchedWriteWidths sched,
2757                                      Predicate prd>{
2758   let Predicates = [prd] in {
2759     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2760                                       _.info512, "z">, EVEX_V512;
2761   }
2762   let Predicates = [prd, HasVLX] in {
2763     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2764                                       _.info128, "x">, EVEX_V128;
2765     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2766                                       _.info256, "y">, EVEX_V256;
2767   }
2770 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2771                                  bits<8> opcScalar, X86SchedWriteWidths sched,
2772                                  Predicate prd> {
2773   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2774                                       sched, prd>,
2775                                       EVEX_CD8<32, CD8VF>;
2776   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2777                                       sched, prd>,
2778                                       EVEX_CD8<64, CD8VF> , VEX_W;
2779   defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2780                                    sched.Scl, f32x_info, prd>, VEX_LIG,
2781                                    EVEX_CD8<32, CD8VT1>;
2782   defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2783                                    sched.Scl, f64x_info, prd>, VEX_LIG,
2784                                    EVEX_CD8<64, CD8VT1>, VEX_W;
2787 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2788                                       HasDQI>, AVX512AIi8Base, EVEX;
2790 //-----------------------------------------------------------------
2791 // Mask register copy, including
2792 // - copy between mask registers
2793 // - load/store mask registers
2794 // - copy from GPR to mask register and vice versa
2796 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2797                          string OpcodeStr, RegisterClass KRC,
2798                          ValueType vvt, X86MemOperand x86memop> {
2799   let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2800   def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2801              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2802              Sched<[WriteMove]>;
2803   def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2804              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2805              [(set KRC:$dst, (vvt (load addr:$src)))]>,
2806              Sched<[WriteLoad]>;
2807   def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2808              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2809              [(store KRC:$src, addr:$dst)]>,
2810              Sched<[WriteStore]>;
2813 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2814                              string OpcodeStr,
2815                              RegisterClass KRC, RegisterClass GRC> {
2816   let hasSideEffects = 0 in {
2817     def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2818                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2819                Sched<[WriteMove]>;
2820     def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2821                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2822                Sched<[WriteMove]>;
2823   }
2826 let Predicates = [HasDQI] in
2827   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2828                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2829                VEX, PD;
2831 let Predicates = [HasAVX512] in
2832   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2833                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2834                VEX, PS;
2836 let Predicates = [HasBWI] in {
2837   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2838                VEX, PD, VEX_W;
2839   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2840                VEX, XD;
2841   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2842                VEX, PS, VEX_W;
2843   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2844                VEX, XD, VEX_W;
2847 // GR from/to mask register
2848 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2849           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2850 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2851           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2853 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2854           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2855 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2856           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2858 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2859           (KMOVWrk VK16:$src)>;
2860 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2861           (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2862 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2863           (COPY_TO_REGCLASS VK16:$src, GR32)>;
2864 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2865           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2867 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2868           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2869 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2870           (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2871 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2872           (COPY_TO_REGCLASS VK8:$src, GR32)>;
2873 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2874           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2876 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2877           (COPY_TO_REGCLASS GR32:$src, VK32)>;
2878 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2879           (COPY_TO_REGCLASS VK32:$src, GR32)>;
2880 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2881           (COPY_TO_REGCLASS GR64:$src, VK64)>;
2882 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2883           (COPY_TO_REGCLASS VK64:$src, GR64)>;
2885 // Load/store kreg
2886 let Predicates = [HasDQI] in {
2887   def : Pat<(store VK1:$src, addr:$dst),
2888             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2890   def : Pat<(v1i1 (load addr:$src)),
2891             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2892   def : Pat<(v2i1 (load addr:$src)),
2893             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2894   def : Pat<(v4i1 (load addr:$src)),
2895             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2898 let Predicates = [HasAVX512] in {
2899   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2900             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2901   def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2902             (KMOVWkm addr:$src)>;
2905 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2906                          SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2907                                               SDTCVecEltisVT<1, i1>,
2908                                               SDTCisPtrTy<2>]>>;
2910 let Predicates = [HasAVX512] in {
2911   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2912     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2913               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2915     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2916               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2918     def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2919               (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2921     def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2922               (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2923   }
2925   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2926   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2927   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2928   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2929   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2930   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2931   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2933   def : Pat<(insert_subvector (v16i1 immAllZerosV),
2934                               (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2935             (COPY_TO_REGCLASS
2936              (KMOVWkr (AND32ri8
2937                        (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2938                        (i32 1))), VK16)>;
2941 // Mask unary operation
2942 // - KNOT
2943 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2944                             RegisterClass KRC, SDPatternOperator OpNode,
2945                             X86FoldableSchedWrite sched, Predicate prd> {
2946   let Predicates = [prd] in
2947     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2948                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2949                [(set KRC:$dst, (OpNode KRC:$src))]>,
2950                Sched<[sched]>;
2953 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2954                                 SDPatternOperator OpNode,
2955                                 X86FoldableSchedWrite sched> {
2956   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2957                             sched, HasDQI>, VEX, PD;
2958   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2959                             sched, HasAVX512>, VEX, PS;
2960   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2961                             sched, HasBWI>, VEX, PD, VEX_W;
2962   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2963                             sched, HasBWI>, VEX, PS, VEX_W;
2966 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2967 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2969 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2970 let Predicates = [HasAVX512, NoDQI] in
2971 def : Pat<(vnot VK8:$src),
2972           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2974 def : Pat<(vnot VK4:$src),
2975           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2976 def : Pat<(vnot VK2:$src),
2977           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2979 // Mask binary operation
2980 // - KAND, KANDN, KOR, KXNOR, KXOR
2981 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2982                            RegisterClass KRC, SDPatternOperator OpNode,
2983                            X86FoldableSchedWrite sched, Predicate prd,
2984                            bit IsCommutable> {
2985   let Predicates = [prd], isCommutable = IsCommutable in
2986     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2987                !strconcat(OpcodeStr,
2988                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2989                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2990                Sched<[sched]>;
2993 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2994                                  SDPatternOperator OpNode,
2995                                  X86FoldableSchedWrite sched, bit IsCommutable,
2996                                  Predicate prdW = HasAVX512> {
2997   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2998                              sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2999   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3000                              sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3001   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3002                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3003   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3004                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3007 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
3008 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3009 // These nodes use 'vnot' instead of 'not' to support vectors.
3010 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3011 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3013 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3014 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3015 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3016 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3017 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3018 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3019 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3021 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3022                             Instruction Inst> {
3023   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3024   // for the DQI set, this type is legal and KxxxB instruction is used
3025   let Predicates = [NoDQI] in
3026   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3027             (COPY_TO_REGCLASS
3028               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3029                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3031   // All types smaller than 8 bits require conversion anyway
3032   def : Pat<(OpNode VK1:$src1, VK1:$src2),
3033         (COPY_TO_REGCLASS (Inst
3034                            (COPY_TO_REGCLASS VK1:$src1, VK16),
3035                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3036   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3037         (COPY_TO_REGCLASS (Inst
3038                            (COPY_TO_REGCLASS VK2:$src1, VK16),
3039                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3040   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3041         (COPY_TO_REGCLASS (Inst
3042                            (COPY_TO_REGCLASS VK4:$src1, VK16),
3043                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3046 defm : avx512_binop_pat<and,   and,  KANDWrr>;
3047 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3048 defm : avx512_binop_pat<or,    or,   KORWrr>;
3049 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3050 defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
3052 // Mask unpacking
3053 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3054                              X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3055                              Predicate prd> {
3056   let Predicates = [prd] in {
3057     let hasSideEffects = 0 in
3058     def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3059                (ins Src.KRC:$src1, Src.KRC:$src2),
3060                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3061                VEX_4V, VEX_L, Sched<[sched]>;
3063     def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3064               (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
3065   }
3068 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3069 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3070 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3072 // Mask bit testing
3073 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3074                               SDNode OpNode, X86FoldableSchedWrite sched,
3075                               Predicate prd> {
3076   let Predicates = [prd], Defs = [EFLAGS] in
3077     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3078                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3079                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3080                Sched<[sched]>;
3083 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3084                                 X86FoldableSchedWrite sched,
3085                                 Predicate prdW = HasAVX512> {
3086   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3087                                                                 VEX, PD;
3088   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3089                                                                 VEX, PS;
3090   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3091                                                                 VEX, PS, VEX_W;
3092   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3093                                                                 VEX, PD, VEX_W;
3096 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3097 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3098 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3100 // Mask shift
3101 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3102                                SDNode OpNode, X86FoldableSchedWrite sched> {
3103   let Predicates = [HasAVX512] in
3104     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3105                  !strconcat(OpcodeStr,
3106                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3107                             [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3108                  Sched<[sched]>;
3111 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3112                                  SDNode OpNode, X86FoldableSchedWrite sched> {
3113   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3114                                sched>, VEX, TAPD, VEX_W;
3115   let Predicates = [HasDQI] in
3116   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3117                                sched>, VEX, TAPD;
3118   let Predicates = [HasBWI] in {
3119   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3120                                sched>, VEX, TAPD, VEX_W;
3121   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3122                                sched>, VEX, TAPD;
3123   }
3126 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3127 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3129 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3130 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3131                                                  string InstStr,
3132                                                  X86VectorVTInfo Narrow,
3133                                                  X86VectorVTInfo Wide> {
3134 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3135                                 (Narrow.VT Narrow.RC:$src2), cond)),
3136           (COPY_TO_REGCLASS
3137            (!cast<Instruction>(InstStr#"Zrri")
3138             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3139             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3140             (Frag.OperandTransform $cc)), Narrow.KRC)>;
3142 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3143                            (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3144                                                     (Narrow.VT Narrow.RC:$src2),
3145                                                     cond)))),
3146           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3147            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3148            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3149            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3150            (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3153 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3154                                                      PatFrag CommFrag, PatFrag CommFrag_su,
3155                                                      string InstStr,
3156                                                      X86VectorVTInfo Narrow,
3157                                                      X86VectorVTInfo Wide> {
3158 // Broadcast load.
3159 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3160                                 (Narrow.BroadcastLdFrag addr:$src2), cond)),
3161           (COPY_TO_REGCLASS
3162            (!cast<Instruction>(InstStr#"Zrmib")
3163             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3164             addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
3166 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3167                            (Narrow.KVT
3168                             (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3169                                          (Narrow.BroadcastLdFrag addr:$src2),
3170                                          cond)))),
3171           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3172            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3173            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3174            addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3176 // Commuted with broadcast load.
3177 def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3178                                     (Narrow.VT Narrow.RC:$src1),
3179                                     cond)),
3180           (COPY_TO_REGCLASS
3181            (!cast<Instruction>(InstStr#"Zrmib")
3182             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3183             addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
3185 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3186                            (Narrow.KVT
3187                             (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3188                                              (Narrow.VT Narrow.RC:$src1), 
3189                                              cond)))),
3190           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3191            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3192            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3193            addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
3196 // Same as above, but for fp types which don't use PatFrags.
3197 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3198                                                 X86VectorVTInfo Narrow,
3199                                                 X86VectorVTInfo Wide> {
3200 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3201                                (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3202           (COPY_TO_REGCLASS
3203            (!cast<Instruction>(InstStr#"Zrri")
3204             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3205             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3206             timm:$cc), Narrow.KRC)>;
3208 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3209                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3210                                        (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3211           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3212            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3213            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3214            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3215            timm:$cc), Narrow.KRC)>;
3217 // Broadcast load.
3218 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3219                                (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3220           (COPY_TO_REGCLASS
3221            (!cast<Instruction>(InstStr#"Zrmbi")
3222             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3223             addr:$src2, timm:$cc), Narrow.KRC)>;
3225 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3226                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3227                                        (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3228           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3229            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3230            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3231            addr:$src2, timm:$cc), Narrow.KRC)>;
3233 // Commuted with broadcast load.
3234 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3235                                (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3236           (COPY_TO_REGCLASS
3237            (!cast<Instruction>(InstStr#"Zrmbi")
3238             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3239             addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3241 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3242                            (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3243                                        (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3244           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3245            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3246            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3247            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3250 let Predicates = [HasAVX512, NoVLX] in {
3251   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3252   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3254   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3255   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3257   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3258   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3260   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3261   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3263   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
3264   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3266   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
3267   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3269   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3270   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3272   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3273   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3275   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3276   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3277   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3278   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3281 let Predicates = [HasBWI, NoVLX] in {
3282   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3283   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3285   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3286   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3288   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3289   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3291   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3292   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3295 // Mask setting all 0s or 1s
3296 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3297   let Predicates = [HasAVX512] in
3298     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3299         SchedRW = [WriteZero] in
3300       def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3301                      [(set KRC:$dst, (VT Val))]>;
3304 multiclass avx512_mask_setop_w<PatFrag Val> {
3305   defm W : avx512_mask_setop<VK16, v16i1, Val>;
3306   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3307   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3310 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3311 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3313 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3314 let Predicates = [HasAVX512] in {
3315   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3316   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3317   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3318   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3319   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3320   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3321   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3322   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3325 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3326 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3327                                              RegisterClass RC, ValueType VT> {
3328   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3329             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3331   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3332             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3334 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3335 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3336 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3337 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3338 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3339 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3341 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3342 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3343 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3344 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3345 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3347 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3348 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3349 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3350 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3352 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3353 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3354 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3356 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3357 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3359 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3361 //===----------------------------------------------------------------------===//
3362 // AVX-512 - Aligned and unaligned load and store
3365 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3366                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3367                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3368                        bit NoRMPattern = 0,
3369                        SDPatternOperator SelectOprr = vselect> {
3370   let hasSideEffects = 0 in {
3371   let isMoveReg = 1 in
3372   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3373                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3374                     _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3375                     EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3376   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3377                       (ins _.KRCWM:$mask,  _.RC:$src),
3378                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3379                        "${dst} {${mask}} {z}, $src}"),
3380                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3381                                            (_.VT _.RC:$src),
3382                                            _.ImmAllZerosV)))], _.ExeDomain>,
3383                        EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3385   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3386   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3387                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3388                     !if(NoRMPattern, [],
3389                         [(set _.RC:$dst,
3390                           (_.VT (ld_frag addr:$src)))]),
3391                     _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3392                     EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3394   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3395     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3396                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3397                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3398                       "${dst} {${mask}}, $src1}"),
3399                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3400                                           (_.VT _.RC:$src1),
3401                                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3402                        EVEX, EVEX_K, Sched<[Sched.RR]>;
3403     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3404                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3405                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3406                       "${dst} {${mask}}, $src1}"),
3407                      [(set _.RC:$dst, (_.VT
3408                          (vselect _.KRCWM:$mask,
3409                           (_.VT (ld_frag addr:$src1)),
3410                            (_.VT _.RC:$src0))))], _.ExeDomain>,
3411                      EVEX, EVEX_K, Sched<[Sched.RM]>;
3412   }
3413   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3414                   (ins _.KRCWM:$mask, _.MemOp:$src),
3415                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3416                                 "${dst} {${mask}} {z}, $src}",
3417                   [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3418                     (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3419                   _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3420   }
3421   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3422             (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3424   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3425             (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3427   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3428             (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3429              _.KRCWM:$mask, addr:$ptr)>;
3432 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3433                                  AVX512VLVectorVTInfo _, Predicate prd,
3434                                  X86SchedWriteMoveLSWidths Sched,
3435                                  string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3436   let Predicates = [prd] in
3437   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3438                        _.info512.AlignedLdFrag, masked_load_aligned,
3439                        Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3441   let Predicates = [prd, HasVLX] in {
3442   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3443                           _.info256.AlignedLdFrag, masked_load_aligned,
3444                           Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3445   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3446                           _.info128.AlignedLdFrag, masked_load_aligned,
3447                           Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3448   }
3451 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3452                           AVX512VLVectorVTInfo _, Predicate prd,
3453                           X86SchedWriteMoveLSWidths Sched,
3454                           string EVEX2VEXOvrd, bit NoRMPattern = 0,
3455                           SDPatternOperator SelectOprr = vselect> {
3456   let Predicates = [prd] in
3457   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3458                        masked_load, Sched.ZMM, "",
3459                        NoRMPattern, SelectOprr>, EVEX_V512;
3461   let Predicates = [prd, HasVLX] in {
3462   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3463                          masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3464                          NoRMPattern, SelectOprr>, EVEX_V256;
3465   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3466                          masked_load, Sched.XMM, EVEX2VEXOvrd,
3467                          NoRMPattern, SelectOprr>, EVEX_V128;
3468   }
3471 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3472                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3473                         X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3474                         bit NoMRPattern = 0> {
3475   let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3476   let isMoveReg = 1 in
3477   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3478                          OpcodeStr # "\t{$src, $dst|$dst, $src}",
3479                          [], _.ExeDomain>, EVEX,
3480                          FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3481                          EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3482   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3483                          (ins _.KRCWM:$mask, _.RC:$src),
3484                          OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3485                          "${dst} {${mask}}, $src}",
3486                          [], _.ExeDomain>,  EVEX, EVEX_K,
3487                          FoldGenData<BaseName#_.ZSuffix#rrk>,
3488                          Sched<[Sched.RR]>;
3489   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3490                           (ins _.KRCWM:$mask, _.RC:$src),
3491                           OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3492                           "${dst} {${mask}} {z}, $src}",
3493                           [], _.ExeDomain>, EVEX, EVEX_KZ,
3494                           FoldGenData<BaseName#_.ZSuffix#rrkz>,
3495                           Sched<[Sched.RR]>;
3496   }
3498   let hasSideEffects = 0, mayStore = 1 in
3499   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3500                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3501                     !if(NoMRPattern, [],
3502                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3503                     _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3504                     EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3505   def mrk : AVX512PI<opc, MRMDestMem, (outs),
3506                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3507               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3508                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3509                NotMemoryFoldable;
3511   def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3512            (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3513                                                         _.KRCWM:$mask, _.RC:$src)>;
3515   def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3516                   (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3517                    _.RC:$dst, _.RC:$src), 0>;
3518   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3519                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3520                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3521   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3522                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3523                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3526 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3527                             AVX512VLVectorVTInfo _, Predicate prd,
3528                             X86SchedWriteMoveLSWidths Sched,
3529                             string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3530   let Predicates = [prd] in
3531   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3532                         masked_store, Sched.ZMM, "",
3533                         NoMRPattern>, EVEX_V512;
3534   let Predicates = [prd, HasVLX] in {
3535     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3536                              masked_store, Sched.YMM,
3537                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3538     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3539                              masked_store, Sched.XMM, EVEX2VEXOvrd,
3540                              NoMRPattern>, EVEX_V128;
3541   }
3544 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3545                                   AVX512VLVectorVTInfo _, Predicate prd,
3546                                   X86SchedWriteMoveLSWidths Sched,
3547                                   string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3548   let Predicates = [prd] in
3549   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3550                         masked_store_aligned, Sched.ZMM, "",
3551                         NoMRPattern>, EVEX_V512;
3553   let Predicates = [prd, HasVLX] in {
3554     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3555                              masked_store_aligned, Sched.YMM,
3556                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3557     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3558                              masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3559                              NoMRPattern>, EVEX_V128;
3560   }
3563 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3564                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3565                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3566                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3567                PS, EVEX_CD8<32, CD8VF>;
3569 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3570                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3571                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3572                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3573                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3575 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3576                               SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3577                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3578                                SchedWriteFMoveLS, "VMOVUPS">,
3579                                PS, EVEX_CD8<32, CD8VF>;
3581 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3582                               SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3583                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3584                                SchedWriteFMoveLS, "VMOVUPD">,
3585                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3587 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3588                                        HasAVX512, SchedWriteVecMoveLS,
3589                                        "VMOVDQA", 1>,
3590                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3591                                         HasAVX512, SchedWriteVecMoveLS,
3592                                         "VMOVDQA", 1>,
3593                  PD, EVEX_CD8<32, CD8VF>;
3595 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3596                                        HasAVX512, SchedWriteVecMoveLS,
3597                                        "VMOVDQA">,
3598                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3599                                         HasAVX512, SchedWriteVecMoveLS,
3600                                         "VMOVDQA">,
3601                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
3603 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3604                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3605                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3606                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3607                 XD, EVEX_CD8<8, CD8VF>;
3609 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3610                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3611                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3612                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3613                  XD, VEX_W, EVEX_CD8<16, CD8VF>;
3615 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3616                                 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3617                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3618                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3619                  XS, EVEX_CD8<32, CD8VF>;
3621 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3622                                 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3623                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3624                                  SchedWriteVecMoveLS, "VMOVDQU">,
3625                  XS, VEX_W, EVEX_CD8<64, CD8VF>;
3627 // Special instructions to help with spilling when we don't have VLX. We need
3628 // to load or store from a ZMM register instead. These are converted in
3629 // expandPostRAPseudos.
3630 let isReMaterializable = 1, canFoldAsLoad = 1,
3631     isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3632 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3633                             "", []>, Sched<[WriteFLoadX]>;
3634 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3635                             "", []>, Sched<[WriteFLoadY]>;
3636 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3637                             "", []>, Sched<[WriteFLoadX]>;
3638 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3639                             "", []>, Sched<[WriteFLoadY]>;
3642 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3643 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3644                             "", []>, Sched<[WriteFStoreX]>;
3645 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3646                             "", []>, Sched<[WriteFStoreY]>;
3647 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3648                             "", []>, Sched<[WriteFStoreX]>;
3649 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3650                             "", []>, Sched<[WriteFStoreY]>;
3653 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3654                           (v8i64 VR512:$src))),
3655    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3656                                               VK8), VR512:$src)>;
3658 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3659                            (v16i32 VR512:$src))),
3660                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3662 // These patterns exist to prevent the above patterns from introducing a second
3663 // mask inversion when one already exists.
3664 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3665                           (v8i64 immAllZerosV),
3666                           (v8i64 VR512:$src))),
3667                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3668 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3669                            (v16i32 immAllZerosV),
3670                            (v16i32 VR512:$src))),
3671                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3673 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3674                               X86VectorVTInfo Wide> {
3675  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3676                                Narrow.RC:$src1, Narrow.RC:$src0)),
3677            (EXTRACT_SUBREG
3678             (Wide.VT
3679              (!cast<Instruction>(InstrStr#"rrk")
3680               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3681               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3682               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3683             Narrow.SubRegIdx)>;
3685  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3686                                Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3687            (EXTRACT_SUBREG
3688             (Wide.VT
3689              (!cast<Instruction>(InstrStr#"rrkz")
3690               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3691               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3692             Narrow.SubRegIdx)>;
3695 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3696 // available. Use a 512-bit operation and extract.
3697 let Predicates = [HasAVX512, NoVLX] in {
3698   defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3699   defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3700   defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3701   defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3703   defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3704   defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3705   defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3706   defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3709 let Predicates = [HasBWI, NoVLX] in {
3710   defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3711   defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3713   defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3714   defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3717 let Predicates = [HasAVX512] in {
3718   // 512-bit load.
3719   def : Pat<(alignedloadv16i32 addr:$src),
3720             (VMOVDQA64Zrm addr:$src)>;
3721   def : Pat<(alignedloadv32i16 addr:$src),
3722             (VMOVDQA64Zrm addr:$src)>;
3723   def : Pat<(alignedloadv64i8 addr:$src),
3724             (VMOVDQA64Zrm addr:$src)>;
3725   def : Pat<(loadv16i32 addr:$src),
3726             (VMOVDQU64Zrm addr:$src)>;
3727   def : Pat<(loadv32i16 addr:$src),
3728             (VMOVDQU64Zrm addr:$src)>;
3729   def : Pat<(loadv64i8 addr:$src),
3730             (VMOVDQU64Zrm addr:$src)>;
3732   // 512-bit store.
3733   def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3734             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3735   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3736             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3737   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3738             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3739   def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3740             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3741   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3742             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3743   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3744             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3747 let Predicates = [HasVLX] in {
3748   // 128-bit load.
3749   def : Pat<(alignedloadv4i32 addr:$src),
3750             (VMOVDQA64Z128rm addr:$src)>;
3751   def : Pat<(alignedloadv8i16 addr:$src),
3752             (VMOVDQA64Z128rm addr:$src)>;
3753   def : Pat<(alignedloadv16i8 addr:$src),
3754             (VMOVDQA64Z128rm addr:$src)>;
3755   def : Pat<(loadv4i32 addr:$src),
3756             (VMOVDQU64Z128rm addr:$src)>;
3757   def : Pat<(loadv8i16 addr:$src),
3758             (VMOVDQU64Z128rm addr:$src)>;
3759   def : Pat<(loadv16i8 addr:$src),
3760             (VMOVDQU64Z128rm addr:$src)>;
3762   // 128-bit store.
3763   def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3764             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3765   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3766             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3767   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3768             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3769   def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3770             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3771   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3772             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3773   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3774             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3776   // 256-bit load.
3777   def : Pat<(alignedloadv8i32 addr:$src),
3778             (VMOVDQA64Z256rm addr:$src)>;
3779   def : Pat<(alignedloadv16i16 addr:$src),
3780             (VMOVDQA64Z256rm addr:$src)>;
3781   def : Pat<(alignedloadv32i8 addr:$src),
3782             (VMOVDQA64Z256rm addr:$src)>;
3783   def : Pat<(loadv8i32 addr:$src),
3784             (VMOVDQU64Z256rm addr:$src)>;
3785   def : Pat<(loadv16i16 addr:$src),
3786             (VMOVDQU64Z256rm addr:$src)>;
3787   def : Pat<(loadv32i8 addr:$src),
3788             (VMOVDQU64Z256rm addr:$src)>;
3790   // 256-bit store.
3791   def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3792             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3793   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3794             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3795   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3796             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3797   def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3798             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3799   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3800             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3801   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3802             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3805 // Move Int Doubleword to Packed Double Int
3807 let ExeDomain = SSEPackedInt in {
3808 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3809                       "vmovd\t{$src, $dst|$dst, $src}",
3810                       [(set VR128X:$dst,
3811                         (v4i32 (scalar_to_vector GR32:$src)))]>,
3812                         EVEX, Sched<[WriteVecMoveFromGpr]>;
3813 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3814                       "vmovd\t{$src, $dst|$dst, $src}",
3815                       [(set VR128X:$dst,
3816                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3817                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3818 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3819                       "vmovq\t{$src, $dst|$dst, $src}",
3820                         [(set VR128X:$dst,
3821                           (v2i64 (scalar_to_vector GR64:$src)))]>,
3822                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3823 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3824 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3825                       (ins i64mem:$src),
3826                       "vmovq\t{$src, $dst|$dst, $src}", []>,
3827                       EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3828 let isCodeGenOnly = 1 in {
3829 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3830                        "vmovq\t{$src, $dst|$dst, $src}",
3831                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3832                        EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3833 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3834                          "vmovq\t{$src, $dst|$dst, $src}",
3835                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3836                          EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3838 } // ExeDomain = SSEPackedInt
3840 // Move Int Doubleword to Single Scalar
3842 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3843 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3844                       "vmovd\t{$src, $dst|$dst, $src}",
3845                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3846                       EVEX, Sched<[WriteVecMoveFromGpr]>;
3847 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3849 // Move doubleword from xmm register to r/m32
3851 let ExeDomain = SSEPackedInt in {
3852 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3853                        "vmovd\t{$src, $dst|$dst, $src}",
3854                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3855                                         (iPTR 0)))]>,
3856                        EVEX, Sched<[WriteVecMoveToGpr]>;
3857 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3858                        (ins i32mem:$dst, VR128X:$src),
3859                        "vmovd\t{$src, $dst|$dst, $src}",
3860                        [(store (i32 (extractelt (v4i32 VR128X:$src),
3861                                      (iPTR 0))), addr:$dst)]>,
3862                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3863 } // ExeDomain = SSEPackedInt
3865 // Move quadword from xmm1 register to r/m64
3867 let ExeDomain = SSEPackedInt in {
3868 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3869                       "vmovq\t{$src, $dst|$dst, $src}",
3870                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3871                                                    (iPTR 0)))]>,
3872                       PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3873                       Requires<[HasAVX512]>;
3875 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3876 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3877                       "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3878                       EVEX, VEX_W, Sched<[WriteVecStore]>,
3879                       Requires<[HasAVX512, In64BitMode]>;
3881 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3882                       (ins i64mem:$dst, VR128X:$src),
3883                       "vmovq\t{$src, $dst|$dst, $src}",
3884                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3885                               addr:$dst)]>,
3886                       EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3887                       Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3889 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3890 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3891                              (ins VR128X:$src),
3892                              "vmovq\t{$src, $dst|$dst, $src}", []>,
3893                              EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3894 } // ExeDomain = SSEPackedInt
3896 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3897                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3899 let Predicates = [HasAVX512] in {
3900   def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3901             (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3904 // Move Scalar Single to Double Int
3906 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3907 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3908                       (ins FR32X:$src),
3909                       "vmovd\t{$src, $dst|$dst, $src}",
3910                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3911                       EVEX, Sched<[WriteVecMoveToGpr]>;
3912 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3914 // Move Quadword Int to Packed Quadword Int
3916 let ExeDomain = SSEPackedInt in {
3917 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3918                       (ins i64mem:$src),
3919                       "vmovq\t{$src, $dst|$dst, $src}",
3920                       [(set VR128X:$dst,
3921                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3922                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3923 } // ExeDomain = SSEPackedInt
3925 // Allow "vmovd" but print "vmovq".
3926 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3927                 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3928 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3929                 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3931 //===----------------------------------------------------------------------===//
3932 // AVX-512  MOVSS, MOVSD
3933 //===----------------------------------------------------------------------===//
3935 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3936                               X86VectorVTInfo _> {
3937   let Predicates = [HasAVX512, OptForSize] in
3938   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3939              (ins _.RC:$src1, _.RC:$src2),
3940              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3941              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3942              _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3943   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3944               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3945               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3946               "$dst {${mask}} {z}, $src1, $src2}"),
3947               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3948                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3949                                       _.ImmAllZerosV)))],
3950               _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3951   let Constraints = "$src0 = $dst"  in
3952   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3953              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3954              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3955              "$dst {${mask}}, $src1, $src2}"),
3956              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3957                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3958                                      (_.VT _.RC:$src0))))],
3959              _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3960   let canFoldAsLoad = 1, isReMaterializable = 1 in {
3961   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3962              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3963              [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3964              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3965   // _alt version uses FR32/FR64 register class.
3966   let isCodeGenOnly = 1 in
3967   def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3968                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3969                  [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3970                  _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3971   }
3972   let mayLoad = 1, hasSideEffects = 0 in {
3973     let Constraints = "$src0 = $dst" in
3974     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3975                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3976                !strconcat(asm, "\t{$src, $dst {${mask}}|",
3977                "$dst {${mask}}, $src}"),
3978                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3979     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3980                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3981                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3982                "$dst {${mask}} {z}, $src}"),
3983                [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3984   }
3985   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3986              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3987              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3988              EVEX, Sched<[WriteFStore]>;
3989   let mayStore = 1, hasSideEffects = 0 in
3990   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3991               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3992               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3993               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3994               NotMemoryFoldable;
3997 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3998                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4000 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4001                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4004 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4005                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
4007 def : Pat<(_.VT (OpNode _.RC:$src0,
4008                         (_.VT (scalar_to_vector
4009                                   (_.EltVT (X86selects VK1WM:$mask,
4010                                                        (_.EltVT _.FRC:$src1),
4011                                                        (_.EltVT _.FRC:$src2))))))),
4012           (!cast<Instruction>(InstrStr#rrk)
4013                         (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4014                         VK1WM:$mask,
4015                         (_.VT _.RC:$src0),
4016                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4018 def : Pat<(_.VT (OpNode _.RC:$src0,
4019                         (_.VT (scalar_to_vector
4020                                   (_.EltVT (X86selects VK1WM:$mask,
4021                                                        (_.EltVT _.FRC:$src1),
4022                                                        (_.EltVT ZeroFP))))))),
4023           (!cast<Instruction>(InstrStr#rrkz)
4024                         VK1WM:$mask,
4025                         (_.VT _.RC:$src0),
4026                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4029 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4030                                         dag Mask, RegisterClass MaskRC> {
4032 def : Pat<(masked_store
4033              (_.info512.VT (insert_subvector undef,
4034                                (_.info128.VT _.info128.RC:$src),
4035                                (iPTR 0))), addr:$dst, Mask),
4036           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4037                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4038                       _.info128.RC:$src)>;
4042 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4043                                                AVX512VLVectorVTInfo _,
4044                                                dag Mask, RegisterClass MaskRC,
4045                                                SubRegIndex subreg> {
4047 def : Pat<(masked_store
4048              (_.info512.VT (insert_subvector undef,
4049                                (_.info128.VT _.info128.RC:$src),
4050                                (iPTR 0))), addr:$dst, Mask),
4051           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4052                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4053                       _.info128.RC:$src)>;
4057 // This matches the more recent codegen from clang that avoids emitting a 512
4058 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4059 // bits on AVX512F only targets.
4060 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4061                                                AVX512VLVectorVTInfo _,
4062                                                dag Mask512, dag Mask128,
4063                                                RegisterClass MaskRC,
4064                                                SubRegIndex subreg> {
4066 // AVX512F pattern.
4067 def : Pat<(masked_store
4068              (_.info512.VT (insert_subvector undef,
4069                                (_.info128.VT _.info128.RC:$src),
4070                                (iPTR 0))), addr:$dst, Mask512),
4071           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4072                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4073                       _.info128.RC:$src)>;
4075 // AVX512VL pattern.
4076 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4077           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4078                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4079                       _.info128.RC:$src)>;
4082 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4083                                        dag Mask, RegisterClass MaskRC> {
4085 def : Pat<(_.info128.VT (extract_subvector
4086                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4087                                         _.info512.ImmAllZerosV)),
4088                            (iPTR 0))),
4089           (!cast<Instruction>(InstrStr#rmkz)
4090                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4091                       addr:$srcAddr)>;
4093 def : Pat<(_.info128.VT (extract_subvector
4094                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4095                       (_.info512.VT (insert_subvector undef,
4096                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4097                             (iPTR 0))))),
4098                 (iPTR 0))),
4099           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4100                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4101                       addr:$srcAddr)>;
4105 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4106                                               AVX512VLVectorVTInfo _,
4107                                               dag Mask, RegisterClass MaskRC,
4108                                               SubRegIndex subreg> {
4110 def : Pat<(_.info128.VT (extract_subvector
4111                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4112                                         _.info512.ImmAllZerosV)),
4113                            (iPTR 0))),
4114           (!cast<Instruction>(InstrStr#rmkz)
4115                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4116                       addr:$srcAddr)>;
4118 def : Pat<(_.info128.VT (extract_subvector
4119                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4120                       (_.info512.VT (insert_subvector undef,
4121                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4122                             (iPTR 0))))),
4123                 (iPTR 0))),
4124           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4125                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4126                       addr:$srcAddr)>;
4130 // This matches the more recent codegen from clang that avoids emitting a 512
4131 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4132 // bits on AVX512F only targets.
4133 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4134                                               AVX512VLVectorVTInfo _,
4135                                               dag Mask512, dag Mask128,
4136                                               RegisterClass MaskRC,
4137                                               SubRegIndex subreg> {
4138 // AVX512F patterns.
4139 def : Pat<(_.info128.VT (extract_subvector
4140                          (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4141                                         _.info512.ImmAllZerosV)),
4142                            (iPTR 0))),
4143           (!cast<Instruction>(InstrStr#rmkz)
4144                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4145                       addr:$srcAddr)>;
4147 def : Pat<(_.info128.VT (extract_subvector
4148                 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4149                       (_.info512.VT (insert_subvector undef,
4150                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4151                             (iPTR 0))))),
4152                 (iPTR 0))),
4153           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4154                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4155                       addr:$srcAddr)>;
4157 // AVX512Vl patterns.
4158 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4159                          _.info128.ImmAllZerosV)),
4160           (!cast<Instruction>(InstrStr#rmkz)
4161                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4162                       addr:$srcAddr)>;
4164 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4165                          (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4166           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4167                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4168                       addr:$srcAddr)>;
4171 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4172 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4174 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4175                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4176 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4177                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4178 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4179                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4181 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4182                    (v16i1 (insert_subvector
4183                            (v16i1 immAllZerosV),
4184                            (v4i1 (extract_subvector
4185                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4186                                   (iPTR 0))),
4187                            (iPTR 0))),
4188                    (v4i1 (extract_subvector
4189                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4190                           (iPTR 0))), GR8, sub_8bit>;
4191 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4192                    (v8i1
4193                     (extract_subvector
4194                      (v16i1
4195                       (insert_subvector
4196                        (v16i1 immAllZerosV),
4197                        (v2i1 (extract_subvector
4198                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4199                               (iPTR 0))),
4200                        (iPTR 0))),
4201                      (iPTR 0))),
4202                    (v2i1 (extract_subvector
4203                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4204                           (iPTR 0))), GR8, sub_8bit>;
4206 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4207                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4208 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4209                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4210 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4211                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4213 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4214                    (v16i1 (insert_subvector
4215                            (v16i1 immAllZerosV),
4216                            (v4i1 (extract_subvector
4217                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4218                                   (iPTR 0))),
4219                            (iPTR 0))),
4220                    (v4i1 (extract_subvector
4221                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4222                           (iPTR 0))), GR8, sub_8bit>;
4223 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4224                    (v8i1
4225                     (extract_subvector
4226                      (v16i1
4227                       (insert_subvector
4228                        (v16i1 immAllZerosV),
4229                        (v2i1 (extract_subvector
4230                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4231                               (iPTR 0))),
4232                        (iPTR 0))),
4233                      (iPTR 0))),
4234                    (v2i1 (extract_subvector
4235                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4236                           (iPTR 0))), GR8, sub_8bit>;
4238 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4239           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4240            (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4241            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4242            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4244 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4245           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4246            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4248 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4249           (COPY_TO_REGCLASS
4250            (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4251                                                        VK1WM:$mask, addr:$src)),
4252            FR32X)>;
4253 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4254           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4256 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4257           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4258            (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4259            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4260            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4262 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4263           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4264            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4266 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4267           (COPY_TO_REGCLASS
4268            (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4269                                                        VK1WM:$mask, addr:$src)),
4270            FR64X)>;
4271 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4272           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4274 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4275   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4276                            (ins VR128X:$src1, VR128X:$src2),
4277                            "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4278                            []>, XS, EVEX_4V, VEX_LIG,
4279                            FoldGenData<"VMOVSSZrr">,
4280                            Sched<[SchedWriteFShuffle.XMM]>;
4282   let Constraints = "$src0 = $dst" in
4283   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4284                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4285                                                    VR128X:$src1, VR128X:$src2),
4286                              "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4287                                         "$dst {${mask}}, $src1, $src2}",
4288                              []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4289                              FoldGenData<"VMOVSSZrrk">,
4290                              Sched<[SchedWriteFShuffle.XMM]>;
4292   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4293                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4294                          "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4295                                     "$dst {${mask}} {z}, $src1, $src2}",
4296                          []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4297                          FoldGenData<"VMOVSSZrrkz">,
4298                          Sched<[SchedWriteFShuffle.XMM]>;
4300   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4301                            (ins VR128X:$src1, VR128X:$src2),
4302                            "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4303                            []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4304                            FoldGenData<"VMOVSDZrr">,
4305                            Sched<[SchedWriteFShuffle.XMM]>;
4307   let Constraints = "$src0 = $dst" in
4308   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4309                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4310                                                    VR128X:$src1, VR128X:$src2),
4311                              "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4312                                         "$dst {${mask}}, $src1, $src2}",
4313                              []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4314                              VEX_W, FoldGenData<"VMOVSDZrrk">,
4315                              Sched<[SchedWriteFShuffle.XMM]>;
4317   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4318                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4319                                                           VR128X:$src2),
4320                               "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4321                                          "$dst {${mask}} {z}, $src1, $src2}",
4322                               []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4323                               VEX_W, FoldGenData<"VMOVSDZrrkz">,
4324                               Sched<[SchedWriteFShuffle.XMM]>;
4327 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4328                 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4329 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4330                              "$dst {${mask}}, $src1, $src2}",
4331                 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4332                                 VR128X:$src1, VR128X:$src2), 0>;
4333 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4334                              "$dst {${mask}} {z}, $src1, $src2}",
4335                 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4336                                  VR128X:$src1, VR128X:$src2), 0>;
4337 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4338                 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4339 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4340                              "$dst {${mask}}, $src1, $src2}",
4341                 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4342                                 VR128X:$src1, VR128X:$src2), 0>;
4343 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4344                              "$dst {${mask}} {z}, $src1, $src2}",
4345                 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4346                                  VR128X:$src1, VR128X:$src2), 0>;
4348 let Predicates = [HasAVX512, OptForSize] in {
4349   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4350             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4351   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4352             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4354   // Move low f32 and clear high bits.
4355   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4356             (SUBREG_TO_REG (i32 0),
4357              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4358               (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4359   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4360             (SUBREG_TO_REG (i32 0),
4361              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4362               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4364   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4365             (SUBREG_TO_REG (i32 0),
4366              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4367               (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4368   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4369             (SUBREG_TO_REG (i32 0),
4370              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4371               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4374 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4375 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4376 let Predicates = [HasAVX512, OptForSpeed] in {
4377   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4378             (SUBREG_TO_REG (i32 0),
4379              (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4380                           (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4381                           (i8 1))), sub_xmm)>;
4382   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4383             (SUBREG_TO_REG (i32 0),
4384              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4385                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4386                           (i8 3))), sub_xmm)>;
4389 let Predicates = [HasAVX512] in {
4390   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4391             (VMOVSSZrm addr:$src)>;
4392   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4393             (VMOVSDZrm addr:$src)>;
4395   // Represent the same patterns above but in the form they appear for
4396   // 256-bit types
4397   def : Pat<(v8f32 (X86vzload32 addr:$src)),
4398             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4399   def : Pat<(v4f64 (X86vzload64 addr:$src)),
4400             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4402   // Represent the same patterns above but in the form they appear for
4403   // 512-bit types
4404   def : Pat<(v16f32 (X86vzload32 addr:$src)),
4405             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4406   def : Pat<(v8f64 (X86vzload64 addr:$src)),
4407             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4410 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4411 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4412                                 (ins VR128X:$src),
4413                                 "vmovq\t{$src, $dst|$dst, $src}",
4414                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
4415                                                    (v2i64 VR128X:$src))))]>,
4416                                 EVEX, VEX_W;
4419 let Predicates = [HasAVX512] in {
4420   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4421             (VMOVDI2PDIZrr GR32:$src)>;
4423   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4424             (VMOV64toPQIZrr GR64:$src)>;
4426   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4427   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4428             (VMOVDI2PDIZrm addr:$src)>;
4429   def : Pat<(v4i32 (X86vzload32 addr:$src)),
4430             (VMOVDI2PDIZrm addr:$src)>;
4431   def : Pat<(v8i32 (X86vzload32 addr:$src)),
4432             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4433   def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4434             (VMOVZPQILo2PQIZrr VR128X:$src)>;
4435   def : Pat<(v2i64 (X86vzload64 addr:$src)),
4436             (VMOVQI2PQIZrm addr:$src)>;
4437   def : Pat<(v4i64 (X86vzload64 addr:$src)),
4438             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4440   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4441   def : Pat<(v16i32 (X86vzload32 addr:$src)),
4442             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4443   def : Pat<(v8i64 (X86vzload64 addr:$src)),
4444             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4446   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4447             (SUBREG_TO_REG (i32 0),
4448              (v2f64 (VMOVZPQILo2PQIZrr
4449                      (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4450              sub_xmm)>;
4451   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4452             (SUBREG_TO_REG (i32 0),
4453              (v2i64 (VMOVZPQILo2PQIZrr
4454                      (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4455              sub_xmm)>;
4457   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4458             (SUBREG_TO_REG (i32 0),
4459              (v2f64 (VMOVZPQILo2PQIZrr
4460                      (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4461              sub_xmm)>;
4462   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4463             (SUBREG_TO_REG (i32 0),
4464              (v2i64 (VMOVZPQILo2PQIZrr
4465                      (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4466              sub_xmm)>;
4469 //===----------------------------------------------------------------------===//
4470 // AVX-512 - Non-temporals
4471 //===----------------------------------------------------------------------===//
4473 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4474                       (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4475                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4476                       EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4478 let Predicates = [HasVLX] in {
4479   def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4480                        (ins i256mem:$src),
4481                        "vmovntdqa\t{$src, $dst|$dst, $src}",
4482                        [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4483                        EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4485   def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4486                       (ins i128mem:$src),
4487                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4488                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4489                       EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4492 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4493                         X86SchedWriteMoveLS Sched,
4494                         PatFrag st_frag = alignednontemporalstore> {
4495   let SchedRW = [Sched.MR], AddedComplexity = 400 in
4496   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4497                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4498                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
4499                     _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4502 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4503                            AVX512VLVectorVTInfo VTInfo,
4504                            X86SchedWriteMoveLSWidths Sched> {
4505   let Predicates = [HasAVX512] in
4506     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4508   let Predicates = [HasAVX512, HasVLX] in {
4509     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4510     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4511   }
4514 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4515                                 SchedWriteVecMoveLSNT>, PD;
4516 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4517                                 SchedWriteFMoveLSNT>, PD, VEX_W;
4518 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4519                                 SchedWriteFMoveLSNT>, PS;
4521 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4522   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4523             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4524   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4525             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4526   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4527             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4529   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4530             (VMOVNTDQAZrm addr:$src)>;
4531   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4532             (VMOVNTDQAZrm addr:$src)>;
4533   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4534             (VMOVNTDQAZrm addr:$src)>;
4535   def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4536             (VMOVNTDQAZrm addr:$src)>;
4537   def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4538             (VMOVNTDQAZrm addr:$src)>;
4539   def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4540             (VMOVNTDQAZrm addr:$src)>;
4543 let Predicates = [HasVLX], AddedComplexity = 400 in {
4544   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4545             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4546   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4547             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4548   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4549             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4551   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4552             (VMOVNTDQAZ256rm addr:$src)>;
4553   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4554             (VMOVNTDQAZ256rm addr:$src)>;
4555   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4556             (VMOVNTDQAZ256rm addr:$src)>;
4557   def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4558             (VMOVNTDQAZ256rm addr:$src)>;
4559   def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4560             (VMOVNTDQAZ256rm addr:$src)>;
4561   def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4562             (VMOVNTDQAZ256rm addr:$src)>;
4564   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4565             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4566   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4567             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4568   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4569             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4571   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4572             (VMOVNTDQAZ128rm addr:$src)>;
4573   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4574             (VMOVNTDQAZ128rm addr:$src)>;
4575   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4576             (VMOVNTDQAZ128rm addr:$src)>;
4577   def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4578             (VMOVNTDQAZ128rm addr:$src)>;
4579   def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4580             (VMOVNTDQAZ128rm addr:$src)>;
4581   def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4582             (VMOVNTDQAZ128rm addr:$src)>;
4585 //===----------------------------------------------------------------------===//
4586 // AVX-512 - Integer arithmetic
4588 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4589                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4590                            bit IsCommutable = 0> {
4591   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4592                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4593                     "$src2, $src1", "$src1, $src2",
4594                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4595                     IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4596                     Sched<[sched]>;
4598   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4599                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4600                   "$src2, $src1", "$src1, $src2",
4601                   (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4602                   AVX512BIBase, EVEX_4V,
4603                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4606 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4607                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
4608                             bit IsCommutable = 0> :
4609            avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4610   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4611                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4612                   "${src2}"##_.BroadcastStr##", $src1",
4613                   "$src1, ${src2}"##_.BroadcastStr,
4614                   (_.VT (OpNode _.RC:$src1,
4615                                 (_.BroadcastLdFrag addr:$src2)))>,
4616                   AVX512BIBase, EVEX_4V, EVEX_B,
4617                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4620 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4621                               AVX512VLVectorVTInfo VTInfo,
4622                               X86SchedWriteWidths sched, Predicate prd,
4623                               bit IsCommutable = 0> {
4624   let Predicates = [prd] in
4625     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4626                              IsCommutable>, EVEX_V512;
4628   let Predicates = [prd, HasVLX] in {
4629     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4630                                 sched.YMM, IsCommutable>, EVEX_V256;
4631     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4632                                 sched.XMM, IsCommutable>, EVEX_V128;
4633   }
4636 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4637                                AVX512VLVectorVTInfo VTInfo,
4638                                X86SchedWriteWidths sched, Predicate prd,
4639                                bit IsCommutable = 0> {
4640   let Predicates = [prd] in
4641     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4642                              IsCommutable>, EVEX_V512;
4644   let Predicates = [prd, HasVLX] in {
4645     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4646                                  sched.YMM, IsCommutable>, EVEX_V256;
4647     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4648                                  sched.XMM, IsCommutable>, EVEX_V128;
4649   }
4652 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4653                                 X86SchedWriteWidths sched, Predicate prd,
4654                                 bit IsCommutable = 0> {
4655   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4656                                   sched, prd, IsCommutable>,
4657                                   VEX_W, EVEX_CD8<64, CD8VF>;
4660 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4661                                 X86SchedWriteWidths sched, Predicate prd,
4662                                 bit IsCommutable = 0> {
4663   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4664                                   sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4667 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4668                                 X86SchedWriteWidths sched, Predicate prd,
4669                                 bit IsCommutable = 0> {
4670   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4671                                  sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4672                                  VEX_WIG;
4675 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4676                                 X86SchedWriteWidths sched, Predicate prd,
4677                                 bit IsCommutable = 0> {
4678   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4679                                  sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4680                                  VEX_WIG;
4683 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4684                                  SDNode OpNode, X86SchedWriteWidths sched,
4685                                  Predicate prd, bit IsCommutable = 0> {
4686   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4687                                    IsCommutable>;
4689   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4690                                    IsCommutable>;
4693 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4694                                  SDNode OpNode, X86SchedWriteWidths sched,
4695                                  Predicate prd, bit IsCommutable = 0> {
4696   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4697                                    IsCommutable>;
4699   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4700                                    IsCommutable>;
4703 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4704                                   bits<8> opc_d, bits<8> opc_q,
4705                                   string OpcodeStr, SDNode OpNode,
4706                                   X86SchedWriteWidths sched,
4707                                   bit IsCommutable = 0> {
4708   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4709                                     sched, HasAVX512, IsCommutable>,
4710               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4711                                     sched, HasBWI, IsCommutable>;
4714 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4715                             X86FoldableSchedWrite sched,
4716                             SDNode OpNode,X86VectorVTInfo _Src,
4717                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4718                             bit IsCommutable = 0> {
4719   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4720                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4721                             "$src2, $src1","$src1, $src2",
4722                             (_Dst.VT (OpNode
4723                                          (_Src.VT _Src.RC:$src1),
4724                                          (_Src.VT _Src.RC:$src2))),
4725                             IsCommutable>,
4726                             AVX512BIBase, EVEX_4V, Sched<[sched]>;
4727   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4728                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4729                         "$src2, $src1", "$src1, $src2",
4730                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4731                                       (_Src.LdFrag addr:$src2)))>,
4732                         AVX512BIBase, EVEX_4V,
4733                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4735   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4736                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4737                     OpcodeStr,
4738                     "${src2}"##_Brdct.BroadcastStr##", $src1",
4739                      "$src1, ${src2}"##_Brdct.BroadcastStr,
4740                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4741                                  (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4742                     AVX512BIBase, EVEX_4V, EVEX_B,
4743                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4746 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4747                                     SchedWriteVecALU, 1>;
4748 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4749                                     SchedWriteVecALU, 0>;
4750 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4751                                     SchedWriteVecALU, HasBWI, 1>;
4752 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4753                                     SchedWriteVecALU, HasBWI, 0>;
4754 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4755                                      SchedWriteVecALU, HasBWI, 1>;
4756 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4757                                      SchedWriteVecALU, HasBWI, 0>;
4758 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4759                                     SchedWritePMULLD, HasAVX512, 1>, T8PD;
4760 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4761                                     SchedWriteVecIMul, HasBWI, 1>;
4762 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4763                                     SchedWriteVecIMul, HasDQI, 1>, T8PD,
4764                                     NotEVEX2VEXConvertible;
4765 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4766                                     HasBWI, 1>;
4767 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4768                                      HasBWI, 1>;
4769 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4770                                       SchedWriteVecIMul, HasBWI, 1>, T8PD;
4771 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4772                                    SchedWriteVecALU, HasBWI, 1>;
4773 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4774                                     SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4775 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4776                                      SchedWriteVecIMul, HasAVX512, 1>;
4778 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4779                             X86SchedWriteWidths sched,
4780                             AVX512VLVectorVTInfo _SrcVTInfo,
4781                             AVX512VLVectorVTInfo _DstVTInfo,
4782                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4783   let Predicates = [prd] in
4784     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4785                                  _SrcVTInfo.info512, _DstVTInfo.info512,
4786                                  v8i64_info, IsCommutable>,
4787                                   EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4788   let Predicates = [HasVLX, prd] in {
4789     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4790                                       _SrcVTInfo.info256, _DstVTInfo.info256,
4791                                       v4i64x_info, IsCommutable>,
4792                                       EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4793     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4794                                       _SrcVTInfo.info128, _DstVTInfo.info128,
4795                                       v2i64x_info, IsCommutable>,
4796                                      EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4797   }
4800 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4801                                 avx512vl_i8_info, avx512vl_i8_info,
4802                                 X86multishift, HasVBMI, 0>, T8PD;
4804 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4805                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4806                             X86FoldableSchedWrite sched> {
4807   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4808                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4809                     OpcodeStr,
4810                     "${src2}"##_Src.BroadcastStr##", $src1",
4811                      "$src1, ${src2}"##_Src.BroadcastStr,
4812                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4813                                  (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4814                     EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4815                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4818 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4819                             SDNode OpNode,X86VectorVTInfo _Src,
4820                             X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4821                             bit IsCommutable = 0> {
4822   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4823                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4824                             "$src2, $src1","$src1, $src2",
4825                             (_Dst.VT (OpNode
4826                                          (_Src.VT _Src.RC:$src1),
4827                                          (_Src.VT _Src.RC:$src2))),
4828                             IsCommutable, IsCommutable>,
4829                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4830   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4831                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4832                         "$src2, $src1", "$src1, $src2",
4833                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4834                                       (_Src.LdFrag addr:$src2)))>,
4835                          EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4836                          Sched<[sched.Folded, sched.ReadAfterFold]>;
4839 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4840                                     SDNode OpNode> {
4841   let Predicates = [HasBWI] in
4842   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4843                                  v32i16_info, SchedWriteShuffle.ZMM>,
4844                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4845                                  v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4846   let Predicates = [HasBWI, HasVLX] in {
4847     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4848                                      v16i16x_info, SchedWriteShuffle.YMM>,
4849                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4850                                       v16i16x_info, SchedWriteShuffle.YMM>,
4851                                       EVEX_V256;
4852     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4853                                      v8i16x_info, SchedWriteShuffle.XMM>,
4854                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4855                                       v8i16x_info, SchedWriteShuffle.XMM>,
4856                                       EVEX_V128;
4857   }
4859 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4860                             SDNode OpNode> {
4861   let Predicates = [HasBWI] in
4862   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4863                                 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4864   let Predicates = [HasBWI, HasVLX] in {
4865     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4866                                      v32i8x_info, SchedWriteShuffle.YMM>,
4867                                      EVEX_V256, VEX_WIG;
4868     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4869                                      v16i8x_info, SchedWriteShuffle.XMM>,
4870                                      EVEX_V128, VEX_WIG;
4871   }
4874 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4875                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
4876                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4877   let Predicates = [HasBWI] in
4878   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4879                                 _Dst.info512, SchedWriteVecIMul.ZMM,
4880                                 IsCommutable>, EVEX_V512;
4881   let Predicates = [HasBWI, HasVLX] in {
4882     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4883                                      _Dst.info256, SchedWriteVecIMul.YMM,
4884                                      IsCommutable>, EVEX_V256;
4885     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4886                                      _Dst.info128, SchedWriteVecIMul.XMM,
4887                                      IsCommutable>, EVEX_V128;
4888   }
4891 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4892 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4893 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4894 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4896 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4897                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4898 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4899                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4901 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4902                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4903 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4904                                     SchedWriteVecALU, HasBWI, 1>;
4905 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4906                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4907 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4908                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4909                                     NotEVEX2VEXConvertible;
4911 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4912                                     SchedWriteVecALU, HasBWI, 1>;
4913 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4914                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4915 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4916                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4917 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4918                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4919                                     NotEVEX2VEXConvertible;
4921 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4922                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4923 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4924                                     SchedWriteVecALU, HasBWI, 1>;
4925 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4926                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4927 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4928                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4929                                     NotEVEX2VEXConvertible;
4931 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4932                                     SchedWriteVecALU, HasBWI, 1>;
4933 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4934                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4935 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4936                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4937 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4938                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4939                                     NotEVEX2VEXConvertible;
4941 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4942 let Predicates = [HasDQI, NoVLX] in {
4943   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4944             (EXTRACT_SUBREG
4945                 (VPMULLQZrr
4946                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4947                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4948              sub_ymm)>;
4949   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4950             (EXTRACT_SUBREG
4951                 (VPMULLQZrmb
4952                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4953                     addr:$src2),
4954              sub_ymm)>;
4956   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4957             (EXTRACT_SUBREG
4958                 (VPMULLQZrr
4959                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4960                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4961              sub_xmm)>;
4962   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4963             (EXTRACT_SUBREG
4964                 (VPMULLQZrmb
4965                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4966                     addr:$src2),
4967              sub_xmm)>;
4970 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
4971   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4972             (EXTRACT_SUBREG
4973                 (!cast<Instruction>(Instr#"rr")
4974                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4975                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4976              sub_ymm)>;
4977   def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4978             (EXTRACT_SUBREG
4979                 (!cast<Instruction>(Instr#"rmb")
4980                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4981                     addr:$src2),
4982              sub_ymm)>;
4984   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4985             (EXTRACT_SUBREG
4986                 (!cast<Instruction>(Instr#"rr")
4987                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4988                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4989              sub_xmm)>;
4990   def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4991             (EXTRACT_SUBREG
4992                 (!cast<Instruction>(Instr#"rmb")
4993                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4994                     addr:$src2),
4995              sub_xmm)>;
4998 let Predicates = [HasAVX512, NoVLX] in {
4999   defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5000   defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5001   defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5002   defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5005 //===----------------------------------------------------------------------===//
5006 // AVX-512  Logical Instructions
5007 //===----------------------------------------------------------------------===//
5009 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5010                                    SchedWriteVecLogic, HasAVX512, 1>;
5011 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5012                                   SchedWriteVecLogic, HasAVX512, 1>;
5013 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5014                                    SchedWriteVecLogic, HasAVX512, 1>;
5015 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5016                                     SchedWriteVecLogic, HasAVX512>;
5018 let Predicates = [HasVLX] in {
5019   def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5020             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5021   def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5022             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5024   def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5025             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5026   def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5027             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5029   def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5030             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5031   def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5032             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5034   def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5035             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5036   def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5037             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5039   def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5040             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5041   def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5042             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5044   def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5045             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5046   def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5047             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5049   def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5050             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5051   def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5052             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5054   def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5055             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5056   def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5057             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5059   def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5060             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5061   def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5062             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5064   def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5065             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5066   def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5067             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5069   def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5070             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5071   def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5072             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5074   def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5075             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5076   def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5077             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5079   def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5080             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5081   def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5082             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5084   def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5085             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5086   def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5087             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5089   def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5090             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5091   def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5092             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5094   def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5095             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5096   def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5097             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5100 let Predicates = [HasAVX512] in {
5101   def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5102             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5103   def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5104             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5106   def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5107             (VPORQZrr VR512:$src1, VR512:$src2)>;
5108   def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5109             (VPORQZrr VR512:$src1, VR512:$src2)>;
5111   def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5112             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5113   def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5114             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5116   def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5117             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5118   def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5119             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5121   def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5122             (VPANDQZrm VR512:$src1, addr:$src2)>;
5123   def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5124             (VPANDQZrm VR512:$src1, addr:$src2)>;
5126   def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5127             (VPORQZrm VR512:$src1, addr:$src2)>;
5128   def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5129             (VPORQZrm VR512:$src1, addr:$src2)>;
5131   def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5132             (VPXORQZrm VR512:$src1, addr:$src2)>;
5133   def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5134             (VPXORQZrm VR512:$src1, addr:$src2)>;
5136   def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5137             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5138   def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5139             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5142 // Patterns to catch vselect with different type than logic op.
5143 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5144                                     X86VectorVTInfo _,
5145                                     X86VectorVTInfo IntInfo> {
5146   // Masked register-register logical operations.
5147   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5148                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5149                    _.RC:$src0)),
5150             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5151              _.RC:$src1, _.RC:$src2)>;
5153   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5154                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5155                    _.ImmAllZerosV)),
5156             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5157              _.RC:$src2)>;
5159   // Masked register-memory logical operations.
5160   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5161                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5162                                             (load addr:$src2)))),
5163                    _.RC:$src0)),
5164             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5165              _.RC:$src1, addr:$src2)>;
5166   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5167                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5168                                             (load addr:$src2)))),
5169                    _.ImmAllZerosV)),
5170             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5171              addr:$src2)>;
5174 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5175                                          X86VectorVTInfo _,
5176                                          X86VectorVTInfo IntInfo> {
5177   // Register-broadcast logical operations.
5178   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5179                    (bitconvert
5180                     (IntInfo.VT (OpNode _.RC:$src1,
5181                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5182                    _.RC:$src0)),
5183             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5184              _.RC:$src1, addr:$src2)>;
5185   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5186                    (bitconvert
5187                     (IntInfo.VT (OpNode _.RC:$src1,
5188                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5189                    _.ImmAllZerosV)),
5190             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5191              _.RC:$src1, addr:$src2)>;
5194 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5195                                          AVX512VLVectorVTInfo SelectInfo,
5196                                          AVX512VLVectorVTInfo IntInfo> {
5197 let Predicates = [HasVLX] in {
5198   defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5199                                  IntInfo.info128>;
5200   defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5201                                  IntInfo.info256>;
5203 let Predicates = [HasAVX512] in {
5204   defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5205                                  IntInfo.info512>;
5209 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5210                                                AVX512VLVectorVTInfo SelectInfo,
5211                                                AVX512VLVectorVTInfo IntInfo> {
5212 let Predicates = [HasVLX] in {
5213   defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5214                                        SelectInfo.info128, IntInfo.info128>;
5215   defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5216                                        SelectInfo.info256, IntInfo.info256>;
5218 let Predicates = [HasAVX512] in {
5219   defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5220                                        SelectInfo.info512, IntInfo.info512>;
5224 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5225   // i64 vselect with i32/i16/i8 logic op
5226   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5227                                        avx512vl_i32_info>;
5228   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5229                                        avx512vl_i16_info>;
5230   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5231                                        avx512vl_i8_info>;
5233   // i32 vselect with i64/i16/i8 logic op
5234   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5235                                        avx512vl_i64_info>;
5236   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5237                                        avx512vl_i16_info>;
5238   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5239                                        avx512vl_i8_info>;
5241   // f32 vselect with i64/i32/i16/i8 logic op
5242   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5243                                        avx512vl_i64_info>;
5244   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5245                                        avx512vl_i32_info>;
5246   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5247                                        avx512vl_i16_info>;
5248   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5249                                        avx512vl_i8_info>;
5251   // f64 vselect with i64/i32/i16/i8 logic op
5252   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5253                                        avx512vl_i64_info>;
5254   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5255                                        avx512vl_i32_info>;
5256   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5257                                        avx512vl_i16_info>;
5258   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5259                                        avx512vl_i8_info>;
5261   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5262                                              avx512vl_f32_info,
5263                                              avx512vl_i32_info>;
5264   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5265                                              avx512vl_f64_info,
5266                                              avx512vl_i64_info>;
5269 defm : avx512_logical_lowering_types<"VPAND", and>;
5270 defm : avx512_logical_lowering_types<"VPOR",  or>;
5271 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5272 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5274 //===----------------------------------------------------------------------===//
5275 // AVX-512  FP arithmetic
5276 //===----------------------------------------------------------------------===//
5278 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5279                             SDNode OpNode, SDNode VecNode,
5280                             X86FoldableSchedWrite sched, bit IsCommutable> {
5281   let ExeDomain = _.ExeDomain in {
5282   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5283                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5284                            "$src2, $src1", "$src1, $src2",
5285                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5286                            Sched<[sched]>;
5288   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5289                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5290                          "$src2, $src1", "$src1, $src2",
5291                          (_.VT (VecNode _.RC:$src1,
5292                                         _.ScalarIntMemCPat:$src2))>,
5293                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5294   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5295   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5296                          (ins _.FRC:$src1, _.FRC:$src2),
5297                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5298                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5299                           Sched<[sched]> {
5300     let isCommutable = IsCommutable;
5301   }
5302   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5303                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5304                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5305                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5306                          (_.ScalarLdFrag addr:$src2)))]>,
5307                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5308   }
5309   }
5312 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5313                                   SDNode VecNode, X86FoldableSchedWrite sched,
5314                                   bit IsCommutable = 0> {
5315   let ExeDomain = _.ExeDomain in
5316   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5317                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5318                           "$rc, $src2, $src1", "$src1, $src2, $rc",
5319                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5320                           (i32 timm:$rc))>,
5321                           EVEX_B, EVEX_RC, Sched<[sched]>;
5323 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5324                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5325                                 X86FoldableSchedWrite sched, bit IsCommutable,
5326                                 string EVEX2VexOvrd> {
5327   let ExeDomain = _.ExeDomain in {
5328   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5329                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5330                            "$src2, $src1", "$src1, $src2",
5331                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5332                            Sched<[sched]>;
5334   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5335                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5336                          "$src2, $src1", "$src1, $src2",
5337                          (_.VT (VecNode _.RC:$src1,
5338                                         _.ScalarIntMemCPat:$src2))>,
5339                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5341   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5342   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5343                          (ins _.FRC:$src1, _.FRC:$src2),
5344                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5345                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5346                           Sched<[sched]>,
5347                           EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5348     let isCommutable = IsCommutable;
5349   }
5350   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5351                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5352                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5353                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5354                          (_.ScalarLdFrag addr:$src2)))]>,
5355                          Sched<[sched.Folded, sched.ReadAfterFold]>,
5356                          EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5357   }
5359   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5360                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5361                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5362                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5363                             EVEX_B, Sched<[sched]>;
5364   }
5367 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5368                                 SDNode VecNode, SDNode RndNode,
5369                                 X86SchedWriteSizes sched, bit IsCommutable> {
5370   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5371                               sched.PS.Scl, IsCommutable>,
5372              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5373                               sched.PS.Scl, IsCommutable>,
5374                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5375   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5376                               sched.PD.Scl, IsCommutable>,
5377              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5378                               sched.PD.Scl, IsCommutable>,
5379                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5382 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5383                               SDNode VecNode, SDNode SaeNode,
5384                               X86SchedWriteSizes sched, bit IsCommutable> {
5385   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5386                               VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5387                               NAME#"SS">,
5388                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5389   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5390                               VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5391                               NAME#"SD">,
5392                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5394 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
5395                                  SchedWriteFAddSizes, 1>;
5396 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
5397                                  SchedWriteFMulSizes, 1>;
5398 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
5399                                  SchedWriteFAddSizes, 0>;
5400 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
5401                                  SchedWriteFDivSizes, 0>;
5402 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5403                                SchedWriteFCmpSizes, 0>;
5404 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5405                                SchedWriteFCmpSizes, 0>;
5407 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5408 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5409 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5410                                     X86VectorVTInfo _, SDNode OpNode,
5411                                     X86FoldableSchedWrite sched,
5412                                     string EVEX2VEXOvrd> {
5413   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5414   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5415                          (ins _.FRC:$src1, _.FRC:$src2),
5416                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5417                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5418                           Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5419     let isCommutable = 1;
5420   }
5421   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5422                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5423                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5424                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5425                          (_.ScalarLdFrag addr:$src2)))]>,
5426                          Sched<[sched.Folded, sched.ReadAfterFold]>,
5427                          EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5428   }
5430 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5431                                          SchedWriteFCmp.Scl, "VMINCSS">, XS,
5432                                          EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5434 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5435                                          SchedWriteFCmp.Scl, "VMINCSD">, XD,
5436                                          VEX_W, EVEX_4V, VEX_LIG,
5437                                          EVEX_CD8<64, CD8VT1>;
5439 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5440                                          SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5441                                          EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5443 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5444                                          SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5445                                          VEX_W, EVEX_4V, VEX_LIG,
5446                                          EVEX_CD8<64, CD8VT1>;
5448 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5449                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
5450                             bit IsCommutable,
5451                             bit IsKCommutable = IsCommutable> {
5452   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5453   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5454                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5455                   "$src2, $src1", "$src1, $src2",
5456                   (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5457                   IsKCommutable, IsKCommutable>,
5458                   EVEX_4V, Sched<[sched]>;
5459   let mayLoad = 1 in {
5460     defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5461                     (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5462                     "$src2, $src1", "$src1, $src2",
5463                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5464                     EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5465     defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5466                      (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5467                      "${src2}"##_.BroadcastStr##", $src1",
5468                      "$src1, ${src2}"##_.BroadcastStr,
5469                      (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5470                      EVEX_4V, EVEX_B,
5471                      Sched<[sched.Folded, sched.ReadAfterFold]>;
5472     }
5473   }
5476 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5477                                   SDPatternOperator OpNodeRnd,
5478                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5479   let ExeDomain = _.ExeDomain in
5480   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5481                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5482                   "$rc, $src2, $src1", "$src1, $src2, $rc",
5483                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5484                   EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5487 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5488                                 SDPatternOperator OpNodeSAE,
5489                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5490   let ExeDomain = _.ExeDomain in
5491   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5492                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5493                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5494                   (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5495                   EVEX_4V, EVEX_B, Sched<[sched]>;
5498 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5499                              Predicate prd, X86SchedWriteSizes sched,
5500                              bit IsCommutable = 0,
5501                              bit IsPD128Commutable = IsCommutable> {
5502   let Predicates = [prd] in {
5503   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5504                               sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5505                               EVEX_CD8<32, CD8VF>;
5506   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5507                               sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5508                               EVEX_CD8<64, CD8VF>;
5509   }
5511     // Define only if AVX512VL feature is present.
5512   let Predicates = [prd, HasVLX] in {
5513     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5514                                    sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5515                                    EVEX_CD8<32, CD8VF>;
5516     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5517                                    sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5518                                    EVEX_CD8<32, CD8VF>;
5519     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5520                                    sched.PD.XMM, IsPD128Commutable,
5521                                    IsCommutable>, EVEX_V128, PD, VEX_W,
5522                                    EVEX_CD8<64, CD8VF>;
5523     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5524                                    sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5525                                    EVEX_CD8<64, CD8VF>;
5526   }
5529 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5530                                    X86SchedWriteSizes sched> {
5531   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5532                                     v16f32_info>,
5533                                     EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5534   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5535                                     v8f64_info>,
5536                                     EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5539 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5540                                  X86SchedWriteSizes sched> {
5541   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5542                                   v16f32_info>,
5543                                   EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5544   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5545                                   v8f64_info>,
5546                                   EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5549 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5550                               SchedWriteFAddSizes, 1>,
5551             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5552 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5553                               SchedWriteFMulSizes, 1>,
5554             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5555 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5556                               SchedWriteFAddSizes>,
5557             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5558 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5559                               SchedWriteFDivSizes>,
5560             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5561 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5562                               SchedWriteFCmpSizes, 0>,
5563             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5564 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5565                               SchedWriteFCmpSizes, 0>,
5566             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5567 let isCodeGenOnly = 1 in {
5568   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5569                                  SchedWriteFCmpSizes, 1>;
5570   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5571                                  SchedWriteFCmpSizes, 1>;
5573 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5574                                SchedWriteFLogicSizes, 1>;
5575 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5576                                SchedWriteFLogicSizes, 0>;
5577 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5578                                SchedWriteFLogicSizes, 1>;
5579 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5580                                SchedWriteFLogicSizes, 1>;
5582 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5583                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5584   let ExeDomain = _.ExeDomain in {
5585   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5586                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5587                   "$src2, $src1", "$src1, $src2",
5588                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5589                   EVEX_4V, Sched<[sched]>;
5590   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5591                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5592                   "$src2, $src1", "$src1, $src2",
5593                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5594                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5595   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5596                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5597                    "${src2}"##_.BroadcastStr##", $src1",
5598                    "$src1, ${src2}"##_.BroadcastStr,
5599                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5600                    EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5601   }
5604 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5605                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5606   let ExeDomain = _.ExeDomain in {
5607   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5608                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5609                   "$src2, $src1", "$src1, $src2",
5610                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5611                   Sched<[sched]>;
5612   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5613                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5614                   "$src2, $src1", "$src1, $src2",
5615                   (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
5616                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5617   }
5620 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5621                                 X86SchedWriteWidths sched> {
5622   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5623              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5624                               EVEX_V512, EVEX_CD8<32, CD8VF>;
5625   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5626              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5627                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5628   defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5629              avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
5630                                     X86scalefsRnd, sched.Scl>,
5631                                     EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5632   defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5633              avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
5634                                     X86scalefsRnd, sched.Scl>,
5635                                     EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5637   // Define only if AVX512VL feature is present.
5638   let Predicates = [HasVLX] in {
5639     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5640                                    EVEX_V128, EVEX_CD8<32, CD8VF>;
5641     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5642                                    EVEX_V256, EVEX_CD8<32, CD8VF>;
5643     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5644                                    EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5645     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5646                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5647   }
5649 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5650                                     SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5652 //===----------------------------------------------------------------------===//
5653 // AVX-512  VPTESTM instructions
5654 //===----------------------------------------------------------------------===//
5656 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5657                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
5658                          string Name> {
5659   // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5660   // There are just too many permuations due to commutability and bitcasts.
5661   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5662   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5663                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5664                       "$src2, $src1", "$src1, $src2",
5665                    (null_frag), (null_frag), 1>,
5666                    EVEX_4V, Sched<[sched]>;
5667   let mayLoad = 1 in
5668   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5669                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5670                        "$src2, $src1", "$src1, $src2",
5671                    (null_frag), (null_frag)>,
5672                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5673                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5674   }
5677 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5678                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5679   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5680   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5681                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5682                     "${src2}"##_.BroadcastStr##", $src1",
5683                     "$src1, ${src2}"##_.BroadcastStr,
5684                     (null_frag), (null_frag)>,
5685                     EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5686                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5689 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5690                                   X86SchedWriteWidths sched,
5691                                   AVX512VLVectorVTInfo _> {
5692   let Predicates  = [HasAVX512] in
5693   defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5694            avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5696   let Predicates = [HasAVX512, HasVLX] in {
5697   defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5698               avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5699   defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5700               avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5701   }
5704 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5705                             X86SchedWriteWidths sched> {
5706   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5707                                  avx512vl_i32_info>;
5708   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5709                                  avx512vl_i64_info>, VEX_W;
5712 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5713                             X86SchedWriteWidths sched> {
5714   let Predicates = [HasBWI] in {
5715   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5716                             v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5717   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5718                             v64i8_info, NAME#"B">, EVEX_V512;
5719   }
5720   let Predicates = [HasVLX, HasBWI] in {
5722   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5723                             v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5724   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5725                             v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5726   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5727                             v32i8x_info, NAME#"B">, EVEX_V256;
5728   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5729                             v16i8x_info, NAME#"B">, EVEX_V128;
5730   }
5733 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5734                                    X86SchedWriteWidths sched> :
5735   avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5736   avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5738 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5739                                          SchedWriteVecLogic>, T8PD;
5740 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5741                                          SchedWriteVecLogic>, T8XS;
5743 //===----------------------------------------------------------------------===//
5744 // AVX-512  Shift instructions
5745 //===----------------------------------------------------------------------===//
5747 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5748                             string OpcodeStr, SDNode OpNode,
5749                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5750   let ExeDomain = _.ExeDomain in {
5751   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5752                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5753                       "$src2, $src1", "$src1, $src2",
5754                    (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5755                    Sched<[sched]>;
5756   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5757                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5758                        "$src2, $src1", "$src1, $src2",
5759                    (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5760                           (i8 timm:$src2)))>,
5761                    Sched<[sched.Folded]>;
5762   }
5765 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5766                              string OpcodeStr, SDNode OpNode,
5767                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5768   let ExeDomain = _.ExeDomain in
5769   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5770                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5771       "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5772      (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5773      EVEX_B, Sched<[sched.Folded]>;
5776 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5777                             X86FoldableSchedWrite sched, ValueType SrcVT,
5778                             X86VectorVTInfo _> {
5779    // src2 is always 128-bit
5780   let ExeDomain = _.ExeDomain in {
5781   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5782                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5783                       "$src2, $src1", "$src1, $src2",
5784                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5785                    AVX512BIBase, EVEX_4V, Sched<[sched]>;
5786   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5787                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5788                        "$src2, $src1", "$src1, $src2",
5789                    (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5790                    AVX512BIBase,
5791                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5792   }
5795 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5796                               X86SchedWriteWidths sched, ValueType SrcVT,
5797                               AVX512VLVectorVTInfo VTInfo,
5798                               Predicate prd> {
5799   let Predicates = [prd] in
5800   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5801                                VTInfo.info512>, EVEX_V512,
5802                                EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5803   let Predicates = [prd, HasVLX] in {
5804   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5805                                VTInfo.info256>, EVEX_V256,
5806                                EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5807   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5808                                VTInfo.info128>, EVEX_V128,
5809                                EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5810   }
5813 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5814                               string OpcodeStr, SDNode OpNode,
5815                               X86SchedWriteWidths sched,
5816                               bit NotEVEX2VEXConvertibleQ = 0> {
5817   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5818                               avx512vl_i32_info, HasAVX512>;
5819   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5820   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5821                               avx512vl_i64_info, HasAVX512>, VEX_W;
5822   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5823                               avx512vl_i16_info, HasBWI>;
5826 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5827                                   string OpcodeStr, SDNode OpNode,
5828                                   X86SchedWriteWidths sched,
5829                                   AVX512VLVectorVTInfo VTInfo> {
5830   let Predicates = [HasAVX512] in
5831   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5832                               sched.ZMM, VTInfo.info512>,
5833              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5834                                VTInfo.info512>, EVEX_V512;
5835   let Predicates = [HasAVX512, HasVLX] in {
5836   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5837                               sched.YMM, VTInfo.info256>,
5838              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5839                                VTInfo.info256>, EVEX_V256;
5840   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5841                               sched.XMM, VTInfo.info128>,
5842              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5843                                VTInfo.info128>, EVEX_V128;
5844   }
5847 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5848                               string OpcodeStr, SDNode OpNode,
5849                               X86SchedWriteWidths sched> {
5850   let Predicates = [HasBWI] in
5851   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5852                                sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5853   let Predicates = [HasVLX, HasBWI] in {
5854   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5855                                sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5856   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5857                                sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5858   }
5861 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5862                                Format ImmFormR, Format ImmFormM,
5863                                string OpcodeStr, SDNode OpNode,
5864                                X86SchedWriteWidths sched,
5865                                bit NotEVEX2VEXConvertibleQ = 0> {
5866   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5867                                  sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5868   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5869   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5870                                  sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5873 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5874                                  SchedWriteVecShiftImm>,
5875              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5876                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5878 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5879                                  SchedWriteVecShiftImm>,
5880              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5881                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5883 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5884                                  SchedWriteVecShiftImm, 1>,
5885              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5886                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5888 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5889                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5890 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5891                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5893 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5894                                 SchedWriteVecShift>;
5895 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5896                                 SchedWriteVecShift, 1>;
5897 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5898                                 SchedWriteVecShift>;
5900 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5901 let Predicates = [HasAVX512, NoVLX] in {
5902   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5903             (EXTRACT_SUBREG (v8i64
5904               (VPSRAQZrr
5905                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5906                  VR128X:$src2)), sub_ymm)>;
5908   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5909             (EXTRACT_SUBREG (v8i64
5910               (VPSRAQZrr
5911                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5912                  VR128X:$src2)), sub_xmm)>;
5914   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
5915             (EXTRACT_SUBREG (v8i64
5916               (VPSRAQZri
5917                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5918                  timm:$src2)), sub_ymm)>;
5920   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
5921             (EXTRACT_SUBREG (v8i64
5922               (VPSRAQZri
5923                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5924                  timm:$src2)), sub_xmm)>;
5927 //===-------------------------------------------------------------------===//
5928 // Variable Bit Shifts
5929 //===-------------------------------------------------------------------===//
5931 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5932                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5933   let ExeDomain = _.ExeDomain in {
5934   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5935                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5936                       "$src2, $src1", "$src1, $src2",
5937                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5938                    AVX5128IBase, EVEX_4V, Sched<[sched]>;
5939   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5940                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5941                        "$src2, $src1", "$src1, $src2",
5942                    (_.VT (OpNode _.RC:$src1,
5943                    (_.VT (_.LdFrag addr:$src2))))>,
5944                    AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5945                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5946   }
5949 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5950                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5951   let ExeDomain = _.ExeDomain in
5952   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5953                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5954                     "${src2}"##_.BroadcastStr##", $src1",
5955                     "$src1, ${src2}"##_.BroadcastStr,
5956                     (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
5957                     AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5958                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5961 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5962                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5963   let Predicates  = [HasAVX512] in
5964   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5965            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5967   let Predicates = [HasAVX512, HasVLX] in {
5968   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5969               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5970   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5971               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5972   }
5975 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5976                                   SDNode OpNode, X86SchedWriteWidths sched> {
5977   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5978                                  avx512vl_i32_info>;
5979   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5980                                  avx512vl_i64_info>, VEX_W;
5983 // Use 512bit version to implement 128/256 bit in case NoVLX.
5984 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5985                                      SDNode OpNode, list<Predicate> p> {
5986   let Predicates = p in {
5987   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5988                                   (_.info256.VT _.info256.RC:$src2))),
5989             (EXTRACT_SUBREG
5990                 (!cast<Instruction>(OpcodeStr#"Zrr")
5991                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5992                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5993              sub_ymm)>;
5995   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5996                                   (_.info128.VT _.info128.RC:$src2))),
5997             (EXTRACT_SUBREG
5998                 (!cast<Instruction>(OpcodeStr#"Zrr")
5999                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6000                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6001              sub_xmm)>;
6002   }
6004 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6005                               SDNode OpNode, X86SchedWriteWidths sched> {
6006   let Predicates = [HasBWI] in
6007   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6008               EVEX_V512, VEX_W;
6009   let Predicates = [HasVLX, HasBWI] in {
6011   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6012               EVEX_V256, VEX_W;
6013   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6014               EVEX_V128, VEX_W;
6015   }
6018 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6019               avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6021 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6022               avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6024 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6025               avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6027 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6028 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6030 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6031 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6032 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6033 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6036 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6037 let Predicates = [HasAVX512, NoVLX] in {
6038   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6039             (EXTRACT_SUBREG (v8i64
6040               (VPROLVQZrr
6041                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6042                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6043                        sub_xmm)>;
6044   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6045             (EXTRACT_SUBREG (v8i64
6046               (VPROLVQZrr
6047                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6048                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6049                        sub_ymm)>;
6051   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6052             (EXTRACT_SUBREG (v16i32
6053               (VPROLVDZrr
6054                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6055                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6056                         sub_xmm)>;
6057   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6058             (EXTRACT_SUBREG (v16i32
6059               (VPROLVDZrr
6060                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6061                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6062                         sub_ymm)>;
6064   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6065             (EXTRACT_SUBREG (v8i64
6066               (VPROLQZri
6067                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6068                         timm:$src2)), sub_xmm)>;
6069   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6070             (EXTRACT_SUBREG (v8i64
6071               (VPROLQZri
6072                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6073                        timm:$src2)), sub_ymm)>;
6075   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6076             (EXTRACT_SUBREG (v16i32
6077               (VPROLDZri
6078                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6079                         timm:$src2)), sub_xmm)>;
6080   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6081             (EXTRACT_SUBREG (v16i32
6082               (VPROLDZri
6083                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6084                         timm:$src2)), sub_ymm)>;
6087 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6088 let Predicates = [HasAVX512, NoVLX] in {
6089   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6090             (EXTRACT_SUBREG (v8i64
6091               (VPRORVQZrr
6092                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6093                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6094                        sub_xmm)>;
6095   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6096             (EXTRACT_SUBREG (v8i64
6097               (VPRORVQZrr
6098                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6099                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6100                        sub_ymm)>;
6102   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6103             (EXTRACT_SUBREG (v16i32
6104               (VPRORVDZrr
6105                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6106                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6107                         sub_xmm)>;
6108   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6109             (EXTRACT_SUBREG (v16i32
6110               (VPRORVDZrr
6111                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6112                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6113                         sub_ymm)>;
6115   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6116             (EXTRACT_SUBREG (v8i64
6117               (VPRORQZri
6118                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6119                         timm:$src2)), sub_xmm)>;
6120   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6121             (EXTRACT_SUBREG (v8i64
6122               (VPRORQZri
6123                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6124                        timm:$src2)), sub_ymm)>;
6126   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6127             (EXTRACT_SUBREG (v16i32
6128               (VPRORDZri
6129                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6130                         timm:$src2)), sub_xmm)>;
6131   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6132             (EXTRACT_SUBREG (v16i32
6133               (VPRORDZri
6134                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6135                         timm:$src2)), sub_ymm)>;
6138 //===-------------------------------------------------------------------===//
6139 // 1-src variable permutation VPERMW/D/Q
6140 //===-------------------------------------------------------------------===//
6142 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6143                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6144   let Predicates  = [HasAVX512] in
6145   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6146            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6148   let Predicates = [HasAVX512, HasVLX] in
6149   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6150               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6153 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6154                                  string OpcodeStr, SDNode OpNode,
6155                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6156   let Predicates = [HasAVX512] in
6157   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6158                               sched, VTInfo.info512>,
6159              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6160                                sched, VTInfo.info512>, EVEX_V512;
6161   let Predicates = [HasAVX512, HasVLX] in
6162   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6163                               sched, VTInfo.info256>,
6164              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6165                                sched, VTInfo.info256>, EVEX_V256;
6168 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6169                               Predicate prd, SDNode OpNode,
6170                               X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6171   let Predicates = [prd] in
6172   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6173               EVEX_V512 ;
6174   let Predicates = [HasVLX, prd] in {
6175   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6176               EVEX_V256 ;
6177   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6178               EVEX_V128 ;
6179   }
6182 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6183                                WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6184 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6185                                WriteVarShuffle256, avx512vl_i8_info>;
6187 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6188                                     WriteVarShuffle256, avx512vl_i32_info>;
6189 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6190                                     WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6191 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6192                                      WriteFVarShuffle256, avx512vl_f32_info>;
6193 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6194                                      WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6196 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6197                              X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6198                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6199 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6200                              X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6201                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6203 //===----------------------------------------------------------------------===//
6204 // AVX-512 - VPERMIL
6205 //===----------------------------------------------------------------------===//
6207 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6208                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
6209                              X86VectorVTInfo Ctrl> {
6210   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6211                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6212                   "$src2, $src1", "$src1, $src2",
6213                   (_.VT (OpNode _.RC:$src1,
6214                                (Ctrl.VT Ctrl.RC:$src2)))>,
6215                   T8PD, EVEX_4V, Sched<[sched]>;
6216   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6217                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6218                   "$src2, $src1", "$src1, $src2",
6219                   (_.VT (OpNode
6220                            _.RC:$src1,
6221                            (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6222                   T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6223                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6224   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6225                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6226                    "${src2}"##_.BroadcastStr##", $src1",
6227                    "$src1, ${src2}"##_.BroadcastStr,
6228                    (_.VT (OpNode
6229                             _.RC:$src1,
6230                             (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6231                    T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6232                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6235 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6236                                     X86SchedWriteWidths sched,
6237                                     AVX512VLVectorVTInfo _,
6238                                     AVX512VLVectorVTInfo Ctrl> {
6239   let Predicates = [HasAVX512] in {
6240     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6241                                   _.info512, Ctrl.info512>, EVEX_V512;
6242   }
6243   let Predicates = [HasAVX512, HasVLX] in {
6244     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6245                                   _.info128, Ctrl.info128>, EVEX_V128;
6246     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6247                                   _.info256, Ctrl.info256>, EVEX_V256;
6248   }
6251 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6252                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6253   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6254                                       _, Ctrl>;
6255   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6256                                     X86VPermilpi, SchedWriteFShuffle, _>,
6257                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6260 let ExeDomain = SSEPackedSingle in
6261 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6262                                avx512vl_i32_info>;
6263 let ExeDomain = SSEPackedDouble in
6264 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6265                                avx512vl_i64_info>, VEX_W1X;
6267 //===----------------------------------------------------------------------===//
6268 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6269 //===----------------------------------------------------------------------===//
6271 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6272                              X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6273                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6274 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6275                                   X86PShufhw, SchedWriteShuffle>,
6276                                   EVEX, AVX512XSIi8Base;
6277 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6278                                   X86PShuflw, SchedWriteShuffle>,
6279                                   EVEX, AVX512XDIi8Base;
6281 //===----------------------------------------------------------------------===//
6282 // AVX-512 - VPSHUFB
6283 //===----------------------------------------------------------------------===//
6285 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6286                                X86SchedWriteWidths sched> {
6287   let Predicates = [HasBWI] in
6288   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6289                               EVEX_V512;
6291   let Predicates = [HasVLX, HasBWI] in {
6292   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6293                               EVEX_V256;
6294   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6295                               EVEX_V128;
6296   }
6299 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6300                                   SchedWriteVarShuffle>, VEX_WIG;
6302 //===----------------------------------------------------------------------===//
6303 // Move Low to High and High to Low packed FP Instructions
6304 //===----------------------------------------------------------------------===//
6306 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6307           (ins VR128X:$src1, VR128X:$src2),
6308           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6309           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6310           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6311 let isCommutable = 1 in
6312 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6313           (ins VR128X:$src1, VR128X:$src2),
6314           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6315           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6316           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6318 //===----------------------------------------------------------------------===//
6319 // VMOVHPS/PD VMOVLPS Instructions
6320 // All patterns was taken from SSS implementation.
6321 //===----------------------------------------------------------------------===//
6323 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6324                                   SDPatternOperator OpNode,
6325                                   X86VectorVTInfo _> {
6326   let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6327   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6328                   (ins _.RC:$src1, f64mem:$src2),
6329                   !strconcat(OpcodeStr,
6330                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6331                   [(set _.RC:$dst,
6332                      (OpNode _.RC:$src1,
6333                        (_.VT (bitconvert
6334                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6335                   Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6338 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6339 // SSE1. And MOVLPS pattern is even more complex.
6340 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6341                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6342 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6343                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6344 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6345                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6346 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6347                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6349 let Predicates = [HasAVX512] in {
6350   // VMOVHPD patterns
6351   def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6352                     (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6353            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6354   def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6355             (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6357   // VMOVLPD patterns
6358   def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6359             (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6362 let SchedRW = [WriteFStore] in {
6363 let mayStore = 1, hasSideEffects = 0 in
6364 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6365                        (ins f64mem:$dst, VR128X:$src),
6366                        "vmovhps\t{$src, $dst|$dst, $src}",
6367                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6368 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6369                        (ins f64mem:$dst, VR128X:$src),
6370                        "vmovhpd\t{$src, $dst|$dst, $src}",
6371                        [(store (f64 (extractelt
6372                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6373                                      (iPTR 0))), addr:$dst)]>,
6374                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6375 let mayStore = 1, hasSideEffects = 0 in
6376 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6377                        (ins f64mem:$dst, VR128X:$src),
6378                        "vmovlps\t{$src, $dst|$dst, $src}",
6379                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6380 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6381                        (ins f64mem:$dst, VR128X:$src),
6382                        "vmovlpd\t{$src, $dst|$dst, $src}",
6383                        [(store (f64 (extractelt (v2f64 VR128X:$src),
6384                                      (iPTR 0))), addr:$dst)]>,
6385                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6386 } // SchedRW
6388 let Predicates = [HasAVX512] in {
6389   // VMOVHPD patterns
6390   def : Pat<(store (f64 (extractelt
6391                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6392                            (iPTR 0))), addr:$dst),
6393            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6395 //===----------------------------------------------------------------------===//
6396 // FMA - Fused Multiply Operations
6399 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6400                                X86FoldableSchedWrite sched,
6401                                X86VectorVTInfo _, string Suff> {
6402   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6403   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6404           (ins _.RC:$src2, _.RC:$src3),
6405           OpcodeStr, "$src3, $src2", "$src2, $src3",
6406           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6407           AVX512FMA3Base, Sched<[sched]>;
6409   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6410           (ins _.RC:$src2, _.MemOp:$src3),
6411           OpcodeStr, "$src3, $src2", "$src2, $src3",
6412           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6413           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6415   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6416             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6417             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6418             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6419             (OpNode _.RC:$src2,
6420              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6421              AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6422   }
6425 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6426                                  X86FoldableSchedWrite sched,
6427                                  X86VectorVTInfo _, string Suff> {
6428   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6429   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6430           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6431           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6432           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6433           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6436 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6437                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6438                                    AVX512VLVectorVTInfo _, string Suff> {
6439   let Predicates = [HasAVX512] in {
6440     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6441                                       _.info512, Suff>,
6442                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6443                                         _.info512, Suff>,
6444                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6445   }
6446   let Predicates = [HasVLX, HasAVX512] in {
6447     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6448                                     _.info256, Suff>,
6449                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6450     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6451                                     _.info128, Suff>,
6452                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6453   }
6456 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6457                               SDNode OpNodeRnd> {
6458     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6459                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6460     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6461                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6462                                       VEX_W;
6465 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6466 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6467 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6468 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6469 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6470 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6473 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6474                                X86FoldableSchedWrite sched,
6475                                X86VectorVTInfo _, string Suff> {
6476   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6477   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6478           (ins _.RC:$src2, _.RC:$src3),
6479           OpcodeStr, "$src3, $src2", "$src2, $src3",
6480           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6481           vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6483   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6484           (ins _.RC:$src2, _.MemOp:$src3),
6485           OpcodeStr, "$src3, $src2", "$src2, $src3",
6486           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6487           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6489   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6490          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6491          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6492          "$src2, ${src3}"##_.BroadcastStr,
6493          (_.VT (OpNode _.RC:$src2,
6494                       (_.VT (_.BroadcastLdFrag addr:$src3)),
6495                       _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6496          Sched<[sched.Folded, sched.ReadAfterFold]>;
6497   }
6500 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6501                                  X86FoldableSchedWrite sched,
6502                                  X86VectorVTInfo _, string Suff> {
6503   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6504   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6505           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6506           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6507           (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6508           1, 1, vselect, 1>,
6509           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6512 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6513                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6514                                    AVX512VLVectorVTInfo _, string Suff> {
6515   let Predicates = [HasAVX512] in {
6516     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6517                                       _.info512, Suff>,
6518                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6519                                         _.info512, Suff>,
6520                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6521   }
6522   let Predicates = [HasVLX, HasAVX512] in {
6523     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6524                                     _.info256, Suff>,
6525                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6526     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6527                                     _.info128, Suff>,
6528                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6529   }
6532 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6533                               SDNode OpNodeRnd > {
6534     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6535                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6536     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6537                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6538                                       VEX_W;
6541 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6542 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6543 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6544 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6545 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6546 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6548 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6549                                X86FoldableSchedWrite sched,
6550                                X86VectorVTInfo _, string Suff> {
6551   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6552   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6553           (ins _.RC:$src2, _.RC:$src3),
6554           OpcodeStr, "$src3, $src2", "$src2, $src3",
6555           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6556           AVX512FMA3Base, Sched<[sched]>;
6558   // Pattern is 312 order so that the load is in a different place from the
6559   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6560   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6561           (ins _.RC:$src2, _.MemOp:$src3),
6562           OpcodeStr, "$src3, $src2", "$src2, $src3",
6563           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6564           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6566   // Pattern is 312 order so that the load is in a different place from the
6567   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6568   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6569          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6570          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6571          "$src2, ${src3}"##_.BroadcastStr,
6572          (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6573                        _.RC:$src1, _.RC:$src2)), 1, 0>,
6574          AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6575   }
6578 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6579                                  X86FoldableSchedWrite sched,
6580                                  X86VectorVTInfo _, string Suff> {
6581   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6582   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6583           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6584           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6585           (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6586           1, 1, vselect, 1>,
6587           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6590 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6591                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6592                                    AVX512VLVectorVTInfo _, string Suff> {
6593   let Predicates = [HasAVX512] in {
6594     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6595                                       _.info512, Suff>,
6596                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6597                                         _.info512, Suff>,
6598                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6599   }
6600   let Predicates = [HasVLX, HasAVX512] in {
6601     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6602                                     _.info256, Suff>,
6603                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6604     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6605                                     _.info128, Suff>,
6606                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6607   }
6610 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6611                               SDNode OpNodeRnd > {
6612     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6613                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6614     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6615                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6616                                       VEX_W;
6619 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6620 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6621 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6622 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6623 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6624 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6626 // Scalar FMA
6627 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6628                                dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6629 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6630   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6631           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6632           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6633           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6635   let mayLoad = 1 in
6636   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6637           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6638           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6639           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6641   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6642          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6643          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6644          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6646   let isCodeGenOnly = 1, isCommutable = 1 in {
6647     def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6648                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6649                      !strconcat(OpcodeStr,
6650                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6651                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6652     def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6653                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6654                     !strconcat(OpcodeStr,
6655                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6656                     [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6658     def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6659                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6660                      !strconcat(OpcodeStr,
6661                               "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6662                      !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6663                      Sched<[SchedWriteFMA.Scl]>;
6664   }// isCodeGenOnly = 1
6665 }// Constraints = "$src1 = $dst"
6668 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6669                             string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6670                             X86VectorVTInfo _, string SUFF> {
6671   let ExeDomain = _.ExeDomain in {
6672   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6673                 // Operands for intrinsic are in 123 order to preserve passthu
6674                 // semantics.
6675                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6676                          _.FRC:$src3))),
6677                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6678                          (_.ScalarLdFrag addr:$src3)))),
6679                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6680                          _.FRC:$src3, (i32 timm:$rc)))), 0>;
6682   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6683                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6684                                           _.FRC:$src1))),
6685                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6686                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6687                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6688                          _.FRC:$src1, (i32 timm:$rc)))), 1>;
6690   // One pattern is 312 order so that the load is in a different place from the
6691   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6692   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6693                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6694                          _.FRC:$src2))),
6695                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6696                                  _.FRC:$src1, _.FRC:$src2))),
6697                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6698                          _.FRC:$src2, (i32 timm:$rc)))), 1>;
6699   }
6702 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6703                         string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6704   let Predicates = [HasAVX512] in {
6705     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6706                                  OpNodeRnd, f32x_info, "SS">,
6707                                  EVEX_CD8<32, CD8VT1>, VEX_LIG;
6708     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6709                                  OpNodeRnd, f64x_info, "SD">,
6710                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6711   }
6714 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6715 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6716 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6717 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6719 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6720                                       string Suffix, SDNode Move,
6721                                       X86VectorVTInfo _, PatLeaf ZeroFP> {
6722   let Predicates = [HasAVX512] in {
6723     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6724                 (Op _.FRC:$src2,
6725                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6726                     _.FRC:$src3))))),
6727               (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6728                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6729                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6731     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6732                 (Op _.FRC:$src2, _.FRC:$src3,
6733                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6734               (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6735                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6736                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6738     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6739                 (Op _.FRC:$src2,
6740                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6741                     (_.ScalarLdFrag addr:$src3)))))),
6742               (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6743                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6744                addr:$src3)>;
6746     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6747                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6748                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6749               (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6750                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6751                addr:$src3)>;
6753     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6754                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6755                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6756               (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6757                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6758                addr:$src3)>;
6760     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6761                (X86selects VK1WM:$mask,
6762                 (Op _.FRC:$src2,
6763                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6764                     _.FRC:$src3),
6765                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6766               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6767                VR128X:$src1, VK1WM:$mask,
6768                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6769                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6771     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6772                (X86selects VK1WM:$mask,
6773                 (Op _.FRC:$src2,
6774                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6775                     (_.ScalarLdFrag addr:$src3)),
6776                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6777               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6778                VR128X:$src1, VK1WM:$mask,
6779                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6781     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6782                (X86selects VK1WM:$mask,
6783                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6784                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6785                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6786               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6787                VR128X:$src1, VK1WM:$mask,
6788                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6790     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6791                (X86selects VK1WM:$mask,
6792                 (Op _.FRC:$src2, _.FRC:$src3,
6793                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6794                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6795               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6796                VR128X:$src1, VK1WM:$mask,
6797                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6798                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6800     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6801                (X86selects VK1WM:$mask,
6802                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6803                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6804                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6805               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6806                VR128X:$src1, VK1WM:$mask,
6807                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6809     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6810                (X86selects VK1WM:$mask,
6811                 (Op _.FRC:$src2,
6812                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6813                     _.FRC:$src3),
6814                 (_.EltVT ZeroFP)))))),
6815               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6816                VR128X:$src1, VK1WM:$mask,
6817                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6818                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6820     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6821                (X86selects VK1WM:$mask,
6822                 (Op _.FRC:$src2, _.FRC:$src3,
6823                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6824                 (_.EltVT ZeroFP)))))),
6825               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6826                VR128X:$src1, VK1WM:$mask,
6827                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6828                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6830     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6831                (X86selects VK1WM:$mask,
6832                 (Op _.FRC:$src2,
6833                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6834                     (_.ScalarLdFrag addr:$src3)),
6835                 (_.EltVT ZeroFP)))))),
6836               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6837                VR128X:$src1, VK1WM:$mask,
6838                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6840     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6841                (X86selects VK1WM:$mask,
6842                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6843                     _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6844                 (_.EltVT ZeroFP)))))),
6845               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6846                VR128X:$src1, VK1WM:$mask,
6847                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6849     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6850                (X86selects VK1WM:$mask,
6851                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6852                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6853                 (_.EltVT ZeroFP)))))),
6854               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6855                VR128X:$src1, VK1WM:$mask,
6856                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6858     // Patterns with rounding mode.
6859     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6860                 (RndOp _.FRC:$src2,
6861                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6862                        _.FRC:$src3, (i32 timm:$rc)))))),
6863               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6864                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6865                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6867     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6868                 (RndOp _.FRC:$src2, _.FRC:$src3,
6869                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6870                        (i32 timm:$rc)))))),
6871               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6872                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6873                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6875     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6876                (X86selects VK1WM:$mask,
6877                 (RndOp _.FRC:$src2,
6878                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6879                        _.FRC:$src3, (i32 timm:$rc)),
6880                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6881               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6882                VR128X:$src1, VK1WM:$mask,
6883                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6884                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6886     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6887                (X86selects VK1WM:$mask,
6888                 (RndOp _.FRC:$src2, _.FRC:$src3,
6889                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6890                        (i32 timm:$rc)),
6891                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6892               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6893                VR128X:$src1, VK1WM:$mask,
6894                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6895                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6897     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6898                (X86selects VK1WM:$mask,
6899                 (RndOp _.FRC:$src2,
6900                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6901                        _.FRC:$src3, (i32 timm:$rc)),
6902                 (_.EltVT ZeroFP)))))),
6903               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6904                VR128X:$src1, VK1WM:$mask,
6905                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6906                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6908     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6909                (X86selects VK1WM:$mask,
6910                 (RndOp _.FRC:$src2, _.FRC:$src3,
6911                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6912                        (i32 timm:$rc)),
6913                 (_.EltVT ZeroFP)))))),
6914               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6915                VR128X:$src1, VK1WM:$mask,
6916                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6917                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6918   }
6921 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
6922                                   X86Movss, v4f32x_info, fp32imm0>;
6923 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6924                                   X86Movss, v4f32x_info, fp32imm0>;
6925 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6926                                   X86Movss, v4f32x_info, fp32imm0>;
6927 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6928                                   X86Movss, v4f32x_info, fp32imm0>;
6930 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
6931                                   X86Movsd, v2f64x_info, fp64imm0>;
6932 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6933                                   X86Movsd, v2f64x_info, fp64imm0>;
6934 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6935                                   X86Movsd, v2f64x_info, fp64imm0>;
6936 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6937                                   X86Movsd, v2f64x_info, fp64imm0>;
6939 //===----------------------------------------------------------------------===//
6940 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6941 //===----------------------------------------------------------------------===//
6942 let Constraints = "$src1 = $dst" in {
6943 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6944                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6945   // NOTE: The SDNode have the multiply operands first with the add last.
6946   // This enables commuted load patterns to be autogenerated by tablegen.
6947   let ExeDomain = _.ExeDomain in {
6948   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6949           (ins _.RC:$src2, _.RC:$src3),
6950           OpcodeStr, "$src3, $src2", "$src2, $src3",
6951           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6952          AVX512FMA3Base, Sched<[sched]>;
6954   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6955           (ins _.RC:$src2, _.MemOp:$src3),
6956           OpcodeStr, "$src3, $src2", "$src2, $src3",
6957           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6958           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6960   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6961             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6962             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6963             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6964             (OpNode _.RC:$src2,
6965                     (_.VT (_.BroadcastLdFrag addr:$src3)),
6966                     _.RC:$src1)>,
6967             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6968   }
6970 } // Constraints = "$src1 = $dst"
6972 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6973                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6974   let Predicates = [HasIFMA] in {
6975     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6976                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6977   }
6978   let Predicates = [HasVLX, HasIFMA] in {
6979     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6980                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6981     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6982                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6983   }
6986 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6987                                          SchedWriteVecIMul, avx512vl_i64_info>,
6988                                          VEX_W;
6989 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6990                                          SchedWriteVecIMul, avx512vl_i64_info>,
6991                                          VEX_W;
6993 //===----------------------------------------------------------------------===//
6994 // AVX-512  Scalar convert from sign integer to float/double
6995 //===----------------------------------------------------------------------===//
6997 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
6998                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
6999                     X86MemOperand x86memop, PatFrag ld_frag, string asm,
7000                     string mem> {
7001   let hasSideEffects = 0, isCodeGenOnly = 1 in {
7002     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7003               (ins DstVT.FRC:$src1, SrcRC:$src),
7004               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7005               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7006     let mayLoad = 1 in
7007       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7008               (ins DstVT.FRC:$src1, x86memop:$src),
7009               asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7010               EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7011   } // hasSideEffects = 0
7012   def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7013                 (ins DstVT.RC:$src1, SrcRC:$src2),
7014                 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7015                 [(set DstVT.RC:$dst,
7016                       (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7017                EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7019   def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7020                 (ins DstVT.RC:$src1, x86memop:$src2),
7021                 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7022                 [(set DstVT.RC:$dst,
7023                       (OpNode (DstVT.VT DstVT.RC:$src1),
7024                                (ld_frag addr:$src2)))]>,
7025                 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7026   def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7027                   (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7028                   DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7031 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7032                                X86FoldableSchedWrite sched, RegisterClass SrcRC,
7033                                X86VectorVTInfo DstVT, string asm,
7034                                string mem> {
7035   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7036               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7037               !strconcat(asm,
7038                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7039               [(set DstVT.RC:$dst,
7040                     (OpNode (DstVT.VT DstVT.RC:$src1),
7041                              SrcRC:$src2,
7042                              (i32 timm:$rc)))]>,
7043               EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7044   def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7045                   (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7046                   DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7049 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7050                                 X86FoldableSchedWrite sched,
7051                                 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7052                                 X86MemOperand x86memop, PatFrag ld_frag,
7053                                 string asm, string mem> {
7054   defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7055               avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7056                             ld_frag, asm, mem>, VEX_LIG;
7059 let Predicates = [HasAVX512] in {
7060 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7061                                  WriteCvtI2SS, GR32,
7062                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7063                                  XS, EVEX_CD8<32, CD8VT1>;
7064 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7065                                  WriteCvtI2SS, GR64,
7066                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7067                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7068 defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7069                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
7070                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7071 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7072                                  WriteCvtI2SD, GR64,
7073                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7074                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7076 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7077               (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7078 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7079               (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7081 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7082           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7083 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7084           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7085 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7086           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7087 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7088           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7090 def : Pat<(f32 (sint_to_fp GR32:$src)),
7091           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7092 def : Pat<(f32 (sint_to_fp GR64:$src)),
7093           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7094 def : Pat<(f64 (sint_to_fp GR32:$src)),
7095           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7096 def : Pat<(f64 (sint_to_fp GR64:$src)),
7097           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7099 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7100                                   WriteCvtI2SS, GR32,
7101                                   v4f32x_info, i32mem, loadi32,
7102                                   "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7103 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7104                                   WriteCvtI2SS, GR64,
7105                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7106                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7107 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7108                                   i32mem, loadi32, "cvtusi2sd", "l">,
7109                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7110 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7111                                   WriteCvtI2SD, GR64,
7112                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7113                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7115 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7116               (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7117 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7118               (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7120 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7121           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7122 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7123           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7124 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7125           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7126 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7127           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7129 def : Pat<(f32 (uint_to_fp GR32:$src)),
7130           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7131 def : Pat<(f32 (uint_to_fp GR64:$src)),
7132           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7133 def : Pat<(f64 (uint_to_fp GR32:$src)),
7134           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7135 def : Pat<(f64 (uint_to_fp GR64:$src)),
7136           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7139 //===----------------------------------------------------------------------===//
7140 // AVX-512  Scalar convert from float/double to integer
7141 //===----------------------------------------------------------------------===//
7143 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7144                                   X86VectorVTInfo DstVT, SDNode OpNode,
7145                                   SDNode OpNodeRnd,
7146                                   X86FoldableSchedWrite sched, string asm,
7147                                   string aliasStr> {
7148   let Predicates = [HasAVX512] in {
7149     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7150                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7151                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7152                 EVEX, VEX_LIG, Sched<[sched]>;
7153     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7154                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7155                  [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7156                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7157                  Sched<[sched]>;
7158     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7159                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7160                 [(set DstVT.RC:$dst, (OpNode
7161                       (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7162                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7163   } // Predicates = [HasAVX512]
7165   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7166           (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7167   def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7168           (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7169   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7170           (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7171                                           SrcVT.IntScalarMemOp:$src), 0, "att">;
7174 // Convert float/double to signed/unsigned int 32/64
7175 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7176                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7177                                    XS, EVEX_CD8<32, CD8VT1>;
7178 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7179                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7180                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7181 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7182                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7183                                    XS, EVEX_CD8<32, CD8VT1>;
7184 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7185                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7186                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7187 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7188                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7189                                    XD, EVEX_CD8<64, CD8VT1>;
7190 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7191                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7192                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7193 defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7194                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7195                                    XD, EVEX_CD8<64, CD8VT1>;
7196 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7197                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7198                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7200 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7201 // which produce unnecessary vmovs{s,d} instructions
7202 let Predicates = [HasAVX512] in {
7203 def : Pat<(v4f32 (X86Movss
7204                    (v4f32 VR128X:$dst),
7205                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7206           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7208 def : Pat<(v4f32 (X86Movss
7209                    (v4f32 VR128X:$dst),
7210                    (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7211           (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7213 def : Pat<(v4f32 (X86Movss
7214                    (v4f32 VR128X:$dst),
7215                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7216           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7218 def : Pat<(v4f32 (X86Movss
7219                    (v4f32 VR128X:$dst),
7220                    (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7221           (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7223 def : Pat<(v2f64 (X86Movsd
7224                    (v2f64 VR128X:$dst),
7225                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7226           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7228 def : Pat<(v2f64 (X86Movsd
7229                    (v2f64 VR128X:$dst),
7230                    (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7231           (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7233 def : Pat<(v2f64 (X86Movsd
7234                    (v2f64 VR128X:$dst),
7235                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7236           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7238 def : Pat<(v2f64 (X86Movsd
7239                    (v2f64 VR128X:$dst),
7240                    (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7241           (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7243 def : Pat<(v4f32 (X86Movss
7244                    (v4f32 VR128X:$dst),
7245                    (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7246           (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7248 def : Pat<(v4f32 (X86Movss
7249                    (v4f32 VR128X:$dst),
7250                    (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7251           (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7253 def : Pat<(v4f32 (X86Movss
7254                    (v4f32 VR128X:$dst),
7255                    (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7256           (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7258 def : Pat<(v4f32 (X86Movss
7259                    (v4f32 VR128X:$dst),
7260                    (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7261           (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7263 def : Pat<(v2f64 (X86Movsd
7264                    (v2f64 VR128X:$dst),
7265                    (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7266           (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7268 def : Pat<(v2f64 (X86Movsd
7269                    (v2f64 VR128X:$dst),
7270                    (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7271           (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7273 def : Pat<(v2f64 (X86Movsd
7274                    (v2f64 VR128X:$dst),
7275                    (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7276           (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7278 def : Pat<(v2f64 (X86Movsd
7279                    (v2f64 VR128X:$dst),
7280                    (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7281           (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7282 } // Predicates = [HasAVX512]
7284 // Convert float/double to signed/unsigned int 32/64 with truncation
7285 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7286                             X86VectorVTInfo _DstRC, SDNode OpNode,
7287                             SDNode OpNodeInt, SDNode OpNodeSAE,
7288                             X86FoldableSchedWrite sched, string aliasStr>{
7289 let Predicates = [HasAVX512] in {
7290   let isCodeGenOnly = 1 in {
7291   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7292               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7293               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7294               EVEX, VEX_LIG, Sched<[sched]>;
7295   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7296               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7297               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7298               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7299   }
7301   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7302             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7303            [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7304            EVEX, VEX_LIG, Sched<[sched]>;
7305   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7306             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7307             [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7308                                   EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7309   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7310               (ins _SrcRC.IntScalarMemOp:$src),
7311               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7312               [(set _DstRC.RC:$dst,
7313                 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7314               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7315 } //HasAVX512
7317   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7318           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7319   def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7320           (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7321   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7322           (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7323                                           _SrcRC.IntScalarMemOp:$src), 0, "att">;
7326 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7327                         fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7328                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7329 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7330                         fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7331                         "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7332 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7333                         fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7334                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7335 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7336                         fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7337                         "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7339 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7340                         fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7341                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7342 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7343                         fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7344                         "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7345 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7346                         fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7347                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7348 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7349                         fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7350                         "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7352 //===----------------------------------------------------------------------===//
7353 // AVX-512  Convert form float to double and back
7354 //===----------------------------------------------------------------------===//
7356 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7357                                 X86VectorVTInfo _Src, SDNode OpNode,
7358                                 X86FoldableSchedWrite sched> {
7359   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7360                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7361                          "$src2, $src1", "$src1, $src2",
7362                          (_.VT (OpNode (_.VT _.RC:$src1),
7363                                        (_Src.VT _Src.RC:$src2)))>,
7364                          EVEX_4V, VEX_LIG, Sched<[sched]>;
7365   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7366                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7367                          "$src2, $src1", "$src1, $src2",
7368                          (_.VT (OpNode (_.VT _.RC:$src1),
7369                                   (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
7370                          EVEX_4V, VEX_LIG,
7371                          Sched<[sched.Folded, sched.ReadAfterFold]>;
7373   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7374     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7375                (ins _.FRC:$src1, _Src.FRC:$src2),
7376                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7377                EVEX_4V, VEX_LIG, Sched<[sched]>;
7378     let mayLoad = 1 in
7379     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7380                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7381                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7382                EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7383   }
7386 // Scalar Coversion with SAE - suppress all exceptions
7387 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7388                                     X86VectorVTInfo _Src, SDNode OpNodeSAE,
7389                                     X86FoldableSchedWrite sched> {
7390   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7391                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7392                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7393                         (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7394                                          (_Src.VT _Src.RC:$src2)))>,
7395                         EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7398 // Scalar Conversion with rounding control (RC)
7399 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7400                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
7401                                    X86FoldableSchedWrite sched> {
7402   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7403                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7404                         "$rc, $src2, $src1", "$src1, $src2, $rc",
7405                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7406                                          (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7407                         EVEX_4V, VEX_LIG, Sched<[sched]>,
7408                         EVEX_B, EVEX_RC;
7410 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7411                                       SDNode OpNode, SDNode OpNodeRnd,
7412                                       X86FoldableSchedWrite sched,
7413                                       X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7414   let Predicates = [HasAVX512] in {
7415     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7416              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7417                                OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7418   }
7421 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7422                                       SDNode OpNode, SDNode OpNodeSAE,
7423                                       X86FoldableSchedWrite sched,
7424                                       X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7425   let Predicates = [HasAVX512] in {
7426     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7427              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7428              EVEX_CD8<32, CD8VT1>, XS;
7429   }
7431 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7432                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7433                                          f32x_info>;
7434 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7435                                           X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7436                                           f64x_info>;
7438 def : Pat<(f64 (fpextend FR32X:$src)),
7439           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7440           Requires<[HasAVX512]>;
7441 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7442           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7443           Requires<[HasAVX512, OptForSize]>;
7445 def : Pat<(f32 (fpround FR64X:$src)),
7446           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7447            Requires<[HasAVX512]>;
7449 def : Pat<(v4f32 (X86Movss
7450                    (v4f32 VR128X:$dst),
7451                    (v4f32 (scalar_to_vector
7452                      (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7453           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7454           Requires<[HasAVX512]>;
7456 def : Pat<(v2f64 (X86Movsd
7457                    (v2f64 VR128X:$dst),
7458                    (v2f64 (scalar_to_vector
7459                      (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7460           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7461           Requires<[HasAVX512]>;
7463 //===----------------------------------------------------------------------===//
7464 // AVX-512  Vector convert from signed/unsigned integer to float/double
7465 //          and from float/double to signed/unsigned integer
7466 //===----------------------------------------------------------------------===//
7468 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7469                           X86VectorVTInfo _Src, SDNode OpNode,
7470                           X86FoldableSchedWrite sched,
7471                           string Broadcast = _.BroadcastStr,
7472                           string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7473                           RegisterClass MaskRC = _.KRCWM,
7474                           dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7476   defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
7477                          (ins _Src.RC:$src),
7478                          (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7479                          (ins MaskRC:$mask, _Src.RC:$src),
7480                           OpcodeStr, "$src", "$src",
7481                          (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7482                          (vselect MaskRC:$mask,
7483                                   (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7484                                   _.RC:$src0),
7485                          vselect, "$src0 = $dst">,
7486                          EVEX, Sched<[sched]>;
7488   defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7489                          (ins MemOp:$src),
7490                          (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7491                          (ins MaskRC:$mask, MemOp:$src),
7492                          OpcodeStr#Alias, "$src", "$src",
7493                          LdDAG,
7494                          (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
7495                          vselect, "$src0 = $dst">,
7496                          EVEX, Sched<[sched.Folded]>;
7498   defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7499                          (ins _Src.ScalarMemOp:$src),
7500                          (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7501                          (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7502                          OpcodeStr,
7503                          "${src}"##Broadcast, "${src}"##Broadcast,
7504                          (_.VT (OpNode (_Src.VT
7505                                   (_Src.BroadcastLdFrag addr:$src))
7506                             )),
7507                          (vselect MaskRC:$mask,
7508                                   (_.VT
7509                                    (OpNode
7510                                     (_Src.VT
7511                                      (_Src.BroadcastLdFrag addr:$src)))),
7512                                   _.RC:$src0),
7513                          vselect, "$src0 = $dst">,
7514                          EVEX, EVEX_B, Sched<[sched.Folded]>;
7516 // Coversion with SAE - suppress all exceptions
7517 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7518                               X86VectorVTInfo _Src, SDNode OpNodeSAE,
7519                               X86FoldableSchedWrite sched> {
7520   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7521                         (ins _Src.RC:$src), OpcodeStr,
7522                         "{sae}, $src", "$src, {sae}",
7523                         (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7524                         EVEX, EVEX_B, Sched<[sched]>;
7527 // Conversion with rounding control (RC)
7528 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7529                          X86VectorVTInfo _Src, SDNode OpNodeRnd,
7530                          X86FoldableSchedWrite sched> {
7531   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7532                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7533                         "$rc, $src", "$src, $rc",
7534                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7535                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7538 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7539 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7540                                 X86VectorVTInfo _Src, SDNode OpNode,
7541                                 X86FoldableSchedWrite sched,
7542                                 string Broadcast = _.BroadcastStr,
7543                                 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7544                                 RegisterClass MaskRC = _.KRCWM>
7545   : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
7546                    MemOp, MaskRC,
7547                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7549 // Extend Float to Double
7550 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7551                            X86SchedWriteWidths sched> {
7552   let Predicates = [HasAVX512] in {
7553     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7554                             fpextend, sched.ZMM>,
7555              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7556                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7557   }
7558   let Predicates = [HasVLX] in {
7559     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7560                                X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7561     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7562                                sched.YMM>, EVEX_V256;
7563   }
7566 // Truncate Double to Float
7567 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7568   let Predicates = [HasAVX512] in {
7569     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
7570              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7571                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
7572   }
7573   let Predicates = [HasVLX] in {
7574     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7575                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
7576                                EVEX_V128;
7577     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
7578                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7579   }
7581   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7582                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7583   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7584                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7585                   VK2WM:$mask, VR128X:$src), 0, "att">;
7586   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
7587                   "$dst {${mask}} {z}, $src}",
7588                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7589                   VK2WM:$mask, VR128X:$src), 0, "att">;
7590   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7591                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7592   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7593                   "$dst {${mask}}, ${src}{1to2}}",
7594                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7595                   VK2WM:$mask, f64mem:$src), 0, "att">;
7596   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7597                   "$dst {${mask}} {z}, ${src}{1to2}}",
7598                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7599                   VK2WM:$mask, f64mem:$src), 0, "att">;
7601   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7602                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7603   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7604                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7605                   VK4WM:$mask, VR256X:$src), 0, "att">;
7606   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
7607                   "$dst {${mask}} {z}, $src}",
7608                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7609                   VK4WM:$mask, VR256X:$src), 0, "att">;
7610   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7611                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7612   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7613                   "$dst {${mask}}, ${src}{1to4}}",
7614                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7615                   VK4WM:$mask, f64mem:$src), 0, "att">;
7616   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7617                   "$dst {${mask}} {z}, ${src}{1to4}}",
7618                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7619                   VK4WM:$mask, f64mem:$src), 0, "att">;
7622 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7623                                   VEX_W, PD, EVEX_CD8<64, CD8VF>;
7624 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7625                                   PS, EVEX_CD8<32, CD8VH>;
7627 let Predicates = [HasAVX512] in {
7628   def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
7629             (VCVTPD2PSZrr VR512:$src)>;
7630   def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7631                      VR256X:$src0),
7632             (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
7633   def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7634                      v8f32x_info.ImmAllZerosV),
7635             (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
7637   def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7638             (VCVTPD2PSZrm addr:$src)>;
7639   def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7640                      VR256X:$src0),
7641             (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7642   def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7643                      v8f32x_info.ImmAllZerosV),
7644             (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
7646   def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))),
7647             (VCVTPD2PSZrmb addr:$src)>;
7648   def : Pat<(vselect VK8WM:$mask,
7649                      (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
7650                      (v8f32 VR256X:$src0)),
7651             (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7652   def : Pat<(vselect VK8WM:$mask,
7653                      (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
7654                      v8f32x_info.ImmAllZerosV),
7655             (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
7658 let Predicates = [HasVLX] in {
7659   def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
7660             (VCVTPD2PSZ256rr VR256X:$src)>;
7661   def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7662                      VR128X:$src0),
7663             (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
7664   def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7665                      v4f32x_info.ImmAllZerosV),
7666             (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
7668   def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
7669             (VCVTPD2PSZ256rm addr:$src)>;
7670   def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7671                      VR128X:$src0),
7672             (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7673   def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7674                      v4f32x_info.ImmAllZerosV),
7675             (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
7677   def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
7678             (VCVTPD2PSZ256rmb addr:$src)>;
7679   def : Pat<(vselect VK4WM:$mask,
7680                      (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
7681                      VR128X:$src0),
7682             (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7683   def : Pat<(vselect VK4WM:$mask,
7684                      (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
7685                      v4f32x_info.ImmAllZerosV),
7686             (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
7688   // Special patterns to allow use of X86vmfpround for masking. Instruction
7689   // patterns have been disabled with null_frag.
7690   def : Pat<(X86vfpround (v2f64 VR128X:$src)),
7691             (VCVTPD2PSZ128rr VR128X:$src)>;
7692   def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7693                           VK2WM:$mask),
7694             (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7695   def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7696                           VK2WM:$mask),
7697             (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7699   def : Pat<(X86vfpround (loadv2f64 addr:$src)),
7700             (VCVTPD2PSZ128rm addr:$src)>;
7701   def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7702                           VK2WM:$mask),
7703             (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7704   def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7705                           VK2WM:$mask),
7706             (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7708   def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
7709             (VCVTPD2PSZ128rmb addr:$src)>;
7710   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7711                           (v4f32 VR128X:$src0), VK2WM:$mask),
7712             (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7713   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7714                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7715             (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7718 // Convert Signed/Unsigned Doubleword to Double
7719 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7720                            SDNode OpNode128, X86SchedWriteWidths sched> {
7721   // No rounding in this op
7722   let Predicates = [HasAVX512] in
7723     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7724                             sched.ZMM>, EVEX_V512;
7726   let Predicates = [HasVLX] in {
7727     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7728                                OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
7729                                (v2f64 (OpNode128 (bc_v4i32
7730                                 (v2i64
7731                                  (scalar_to_vector (loadi64 addr:$src))))))>,
7732                                EVEX_V128;
7733     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7734                                sched.YMM>, EVEX_V256;
7735   }
7738 // Convert Signed/Unsigned Doubleword to Float
7739 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7740                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7741   let Predicates = [HasAVX512] in
7742     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7743                             sched.ZMM>,
7744              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7745                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7747   let Predicates = [HasVLX] in {
7748     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7749                                sched.XMM>, EVEX_V128;
7750     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7751                                sched.YMM>, EVEX_V256;
7752   }
7755 // Convert Float to Signed/Unsigned Doubleword with truncation
7756 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7757                             SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7758   let Predicates = [HasAVX512] in {
7759     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7760                             sched.ZMM>,
7761              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7762                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
7763   }
7764   let Predicates = [HasVLX] in {
7765     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7766                                sched.XMM>, EVEX_V128;
7767     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7768                                sched.YMM>, EVEX_V256;
7769   }
7772 // Convert Float to Signed/Unsigned Doubleword
7773 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7774                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7775   let Predicates = [HasAVX512] in {
7776     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7777                             sched.ZMM>,
7778              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7779                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7780   }
7781   let Predicates = [HasVLX] in {
7782     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7783                                sched.XMM>, EVEX_V128;
7784     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7785                                sched.YMM>, EVEX_V256;
7786   }
7789 // Convert Double to Signed/Unsigned Doubleword with truncation
7790 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7791                             SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7792   let Predicates = [HasAVX512] in {
7793     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7794                             sched.ZMM>,
7795              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7796                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
7797   }
7798   let Predicates = [HasVLX] in {
7799     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7800     // memory forms of these instructions in Asm Parser. They have the same
7801     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7802     // due to the same reason.
7803     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7804                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7805                                VK2WM>, EVEX_V128;
7806     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7807                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7808   }
7810   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7811                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7812                   VR128X:$src), 0, "att">;
7813   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7814                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7815                   VK2WM:$mask, VR128X:$src), 0, "att">;
7816   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7817                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7818                   VK2WM:$mask, VR128X:$src), 0, "att">;
7819   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7820                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7821                   f64mem:$src), 0, "att">;
7822   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7823                   "$dst {${mask}}, ${src}{1to2}}",
7824                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7825                   VK2WM:$mask, f64mem:$src), 0, "att">;
7826   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7827                   "$dst {${mask}} {z}, ${src}{1to2}}",
7828                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7829                   VK2WM:$mask, f64mem:$src), 0, "att">;
7831   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7832                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7833                   VR256X:$src), 0, "att">;
7834   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7835                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7836                   VK4WM:$mask, VR256X:$src), 0, "att">;
7837   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7838                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7839                   VK4WM:$mask, VR256X:$src), 0, "att">;
7840   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7841                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7842                   f64mem:$src), 0, "att">;
7843   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7844                   "$dst {${mask}}, ${src}{1to4}}",
7845                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7846                   VK4WM:$mask, f64mem:$src), 0, "att">;
7847   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7848                   "$dst {${mask}} {z}, ${src}{1to4}}",
7849                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7850                   VK4WM:$mask, f64mem:$src), 0, "att">;
7853 // Convert Double to Signed/Unsigned Doubleword
7854 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7855                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7856   let Predicates = [HasAVX512] in {
7857     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7858                             sched.ZMM>,
7859              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7860                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7861   }
7862   let Predicates = [HasVLX] in {
7863     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7864     // memory forms of these instructions in Asm Parcer. They have the same
7865     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7866     // due to the same reason.
7867     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7868                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7869                                VK2WM>, EVEX_V128;
7870     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7871                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7872   }
7874   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7875                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7876   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7877                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7878                   VK2WM:$mask, VR128X:$src), 0, "att">;
7879   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7880                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7881                   VK2WM:$mask, VR128X:$src), 0, "att">;
7882   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7883                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7884                   f64mem:$src), 0, "att">;
7885   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7886                   "$dst {${mask}}, ${src}{1to2}}",
7887                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7888                   VK2WM:$mask, f64mem:$src), 0, "att">;
7889   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7890                   "$dst {${mask}} {z}, ${src}{1to2}}",
7891                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7892                   VK2WM:$mask, f64mem:$src), 0, "att">;
7894   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7895                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7896   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7897                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7898                   VK4WM:$mask, VR256X:$src), 0, "att">;
7899   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7900                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7901                   VK4WM:$mask, VR256X:$src), 0, "att">;
7902   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7903                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7904                   f64mem:$src), 0, "att">;
7905   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7906                   "$dst {${mask}}, ${src}{1to4}}",
7907                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7908                   VK4WM:$mask, f64mem:$src), 0, "att">;
7909   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7910                   "$dst {${mask}} {z}, ${src}{1to4}}",
7911                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7912                   VK4WM:$mask, f64mem:$src), 0, "att">;
7915 // Convert Double to Signed/Unsigned Quardword
7916 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7917                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7918   let Predicates = [HasDQI] in {
7919     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7920                             sched.ZMM>,
7921              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7922                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7923   }
7924   let Predicates = [HasDQI, HasVLX] in {
7925     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7926                                sched.XMM>, EVEX_V128;
7927     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7928                                sched.YMM>, EVEX_V256;
7929   }
7932 // Convert Double to Signed/Unsigned Quardword with truncation
7933 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7934                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7935   let Predicates = [HasDQI] in {
7936     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7937                             sched.ZMM>,
7938              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7939                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7940   }
7941   let Predicates = [HasDQI, HasVLX] in {
7942     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7943                                sched.XMM>, EVEX_V128;
7944     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7945                                sched.YMM>, EVEX_V256;
7946   }
7949 // Convert Signed/Unsigned Quardword to Double
7950 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7951                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7952   let Predicates = [HasDQI] in {
7953     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7954                             sched.ZMM>,
7955              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7956                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7957   }
7958   let Predicates = [HasDQI, HasVLX] in {
7959     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7960                                sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7961     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7962                                sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7963   }
7966 // Convert Float to Signed/Unsigned Quardword
7967 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7968                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7969   let Predicates = [HasDQI] in {
7970     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7971                             sched.ZMM>,
7972              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7973                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7974   }
7975   let Predicates = [HasDQI, HasVLX] in {
7976     // Explicitly specified broadcast string, since we take only 2 elements
7977     // from v4f32x_info source
7978     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7979                                sched.XMM, "{1to2}", "", f64mem, VK2WM,
7980                                (v2i64 (OpNode (bc_v4f32
7981                                 (v2f64
7982                                  (scalar_to_vector (loadf64 addr:$src))))))>,
7983                                EVEX_V128;
7984     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7985                                sched.YMM>, EVEX_V256;
7986   }
7989 // Convert Float to Signed/Unsigned Quardword with truncation
7990 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7991                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7992   let Predicates = [HasDQI] in {
7993     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7994              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7995                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7996   }
7997   let Predicates = [HasDQI, HasVLX] in {
7998     // Explicitly specified broadcast string, since we take only 2 elements
7999     // from v4f32x_info source
8000     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8001                                sched.XMM, "{1to2}", "", f64mem, VK2WM,
8002                                (v2i64 (OpNode (bc_v4f32
8003                                 (v2f64
8004                                  (scalar_to_vector (loadf64 addr:$src))))))>,
8005                                EVEX_V128;
8006     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8007                                sched.YMM>, EVEX_V256;
8008   }
8011 // Convert Signed/Unsigned Quardword to Float
8012 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8013                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8014   let Predicates = [HasDQI] in {
8015     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8016                             sched.ZMM>,
8017              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8018                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8019   }
8020   let Predicates = [HasDQI, HasVLX] in {
8021     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8022     // memory forms of these instructions in Asm Parcer. They have the same
8023     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8024     // due to the same reason.
8025     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8026                                sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8027                                EVEX_V128, NotEVEX2VEXConvertible;
8028     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8029                                sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8030                                NotEVEX2VEXConvertible;
8031   }
8033   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8034                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8035                   VR128X:$src), 0, "att">;
8036   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8037                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8038                   VK2WM:$mask, VR128X:$src), 0, "att">;
8039   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8040                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8041                   VK2WM:$mask, VR128X:$src), 0, "att">;
8042   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8043                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8044                   i64mem:$src), 0, "att">;
8045   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
8046                   "$dst {${mask}}, ${src}{1to2}}",
8047                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8048                   VK2WM:$mask, i64mem:$src), 0, "att">;
8049   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8050                   "$dst {${mask}} {z}, ${src}{1to2}}",
8051                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8052                   VK2WM:$mask, i64mem:$src), 0, "att">;
8054   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8055                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8056                   VR256X:$src), 0, "att">;
8057   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
8058                   "$dst {${mask}}, $src}",
8059                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8060                   VK4WM:$mask, VR256X:$src), 0, "att">;
8061   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
8062                   "$dst {${mask}} {z}, $src}",
8063                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8064                   VK4WM:$mask, VR256X:$src), 0, "att">;
8065   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8066                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8067                   i64mem:$src), 0, "att">;
8068   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
8069                   "$dst {${mask}}, ${src}{1to4}}",
8070                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8071                   VK4WM:$mask, i64mem:$src), 0, "att">;
8072   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8073                   "$dst {${mask}} {z}, ${src}{1to4}}",
8074                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8075                   VK4WM:$mask, i64mem:$src), 0, "att">;
8078 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
8079                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8081 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
8082                                 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8083                                 PS, EVEX_CD8<32, CD8VF>;
8085 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
8086                                 X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
8087                                 XS, EVEX_CD8<32, CD8VF>;
8089 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
8090                                  X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
8091                                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
8093 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8094                                  X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
8095                                  EVEX_CD8<32, CD8VF>;
8097 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8098                                  X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
8099                                  PS, VEX_W, EVEX_CD8<64, CD8VF>;
8101 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8102                                   X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8103                                   EVEX_CD8<32, CD8VH>;
8105 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8106                                  X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8107                                  EVEX_CD8<32, CD8VF>;
8109 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8110                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8111                                  EVEX_CD8<32, CD8VF>;
8113 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8114                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8115                                  VEX_W, EVEX_CD8<64, CD8VF>;
8117 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8118                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8119                                  PS, EVEX_CD8<32, CD8VF>;
8121 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8122                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8123                                  PS, EVEX_CD8<64, CD8VF>;
8125 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8126                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8127                                  PD, EVEX_CD8<64, CD8VF>;
8129 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8130                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8131                                  EVEX_CD8<32, CD8VH>;
8133 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8134                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8135                                  PD, EVEX_CD8<64, CD8VF>;
8137 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8138                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8139                                  EVEX_CD8<32, CD8VH>;
8141 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8142                                  X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
8143                                  PD, EVEX_CD8<64, CD8VF>;
8145 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8146                                  X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
8147                                  EVEX_CD8<32, CD8VH>;
8149 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8150                                  X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
8151                                  PD, EVEX_CD8<64, CD8VF>;
8153 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8154                                  X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
8155                                  EVEX_CD8<32, CD8VH>;
8157 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8158                             X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8159                             EVEX_CD8<64, CD8VF>;
8161 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8162                             X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8163                             EVEX_CD8<64, CD8VF>;
8165 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
8166                             X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8167                             EVEX_CD8<64, CD8VF>;
8169 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
8170                             X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8171                             EVEX_CD8<64, CD8VF>;
8173 let Predicates = [HasVLX] in {
8174   // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8175   // patterns have been disabled with null_frag.
8176   def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8177             (VCVTPD2DQZ128rr VR128X:$src)>;
8178   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8179                           VK2WM:$mask),
8180             (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8181   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8182                           VK2WM:$mask),
8183             (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8185   def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8186             (VCVTPD2DQZ128rm addr:$src)>;
8187   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8188                           VK2WM:$mask),
8189             (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8190   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8191                           VK2WM:$mask),
8192             (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8194   def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8195             (VCVTPD2DQZ128rmb addr:$src)>;
8196   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8197                           (v4i32 VR128X:$src0), VK2WM:$mask),
8198             (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8199   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8200                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8201             (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8203   // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8204   // patterns have been disabled with null_frag.
8205   def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
8206             (VCVTTPD2DQZ128rr VR128X:$src)>;
8207   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8208                           VK2WM:$mask),
8209             (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8210   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8211                           VK2WM:$mask),
8212             (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8214   def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
8215             (VCVTTPD2DQZ128rm addr:$src)>;
8216   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8217                           VK2WM:$mask),
8218             (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8219   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8220                           VK2WM:$mask),
8221             (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8223   def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8224             (VCVTTPD2DQZ128rmb addr:$src)>;
8225   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8226                           (v4i32 VR128X:$src0), VK2WM:$mask),
8227             (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8228   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8229                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8230             (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8232   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8233   // patterns have been disabled with null_frag.
8234   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8235             (VCVTPD2UDQZ128rr VR128X:$src)>;
8236   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8237                            VK2WM:$mask),
8238             (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8239   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8240                            VK2WM:$mask),
8241             (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8243   def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8244             (VCVTPD2UDQZ128rm addr:$src)>;
8245   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8246                            VK2WM:$mask),
8247             (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8248   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8249                            VK2WM:$mask),
8250             (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8252   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8253             (VCVTPD2UDQZ128rmb addr:$src)>;
8254   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8255                            (v4i32 VR128X:$src0), VK2WM:$mask),
8256             (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8257   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8258                            v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8259             (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8261   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8262   // patterns have been disabled with null_frag.
8263   def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
8264             (VCVTTPD2UDQZ128rr VR128X:$src)>;
8265   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8266                           VK2WM:$mask),
8267             (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8268   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8269                           VK2WM:$mask),
8270             (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8272   def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
8273             (VCVTTPD2UDQZ128rm addr:$src)>;
8274   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8275                           VK2WM:$mask),
8276             (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8277   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8278                           VK2WM:$mask),
8279             (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8281   def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8282             (VCVTTPD2UDQZ128rmb addr:$src)>;
8283   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8284                           (v4i32 VR128X:$src0), VK2WM:$mask),
8285             (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8286   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8287                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8288             (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8291 let Predicates = [HasDQI, HasVLX] in {
8292   def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8293             (VCVTPS2QQZ128rm addr:$src)>;
8294   def : Pat<(v2i64 (vselect VK2WM:$mask,
8295                             (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8296                             VR128X:$src0)),
8297             (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8298   def : Pat<(v2i64 (vselect VK2WM:$mask,
8299                             (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8300                             v2i64x_info.ImmAllZerosV)),
8301             (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8303   def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8304             (VCVTPS2UQQZ128rm addr:$src)>;
8305   def : Pat<(v2i64 (vselect VK2WM:$mask,
8306                             (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8307                             VR128X:$src0)),
8308             (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8309   def : Pat<(v2i64 (vselect VK2WM:$mask,
8310                             (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8311                             v2i64x_info.ImmAllZerosV)),
8312             (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8314   def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8315             (VCVTTPS2QQZ128rm addr:$src)>;
8316   def : Pat<(v2i64 (vselect VK2WM:$mask,
8317                             (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8318                             VR128X:$src0)),
8319             (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8320   def : Pat<(v2i64 (vselect VK2WM:$mask,
8321                             (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8322                             v2i64x_info.ImmAllZerosV)),
8323             (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8325   def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8326             (VCVTTPS2UQQZ128rm addr:$src)>;
8327   def : Pat<(v2i64 (vselect VK2WM:$mask,
8328                             (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8329                             VR128X:$src0)),
8330             (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8331   def : Pat<(v2i64 (vselect VK2WM:$mask,
8332                             (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8333                             v2i64x_info.ImmAllZerosV)),
8334             (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8337 let Predicates = [HasAVX512, NoVLX] in {
8338 def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))),
8339           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8340            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8341                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8343 def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))),
8344           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8345            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8346                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8348 def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
8349           (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8350            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8351                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
8353 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8354           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8355            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8356                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8358 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8359           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8360            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8361                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8363 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8364           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8365            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8366                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
8368 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8369           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8370            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8371                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8374 let Predicates = [HasVLX] in {
8375   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8376             (VCVTDQ2PDZ128rm addr:$src)>;
8377   def : Pat<(v2f64 (vselect VK2WM:$mask,
8378                             (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8379                             VR128X:$src0)),
8380             (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8381   def : Pat<(v2f64 (vselect VK2WM:$mask,
8382                             (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8383                             v2f64x_info.ImmAllZerosV)),
8384             (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8386   def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8387             (VCVTUDQ2PDZ128rm addr:$src)>;
8388   def : Pat<(v2f64 (vselect VK2WM:$mask,
8389                             (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8390                             VR128X:$src0)),
8391             (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8392   def : Pat<(v2f64 (vselect VK2WM:$mask,
8393                             (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8394                             v2f64x_info.ImmAllZerosV)),
8395             (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8398 let Predicates = [HasDQI, HasVLX] in {
8399   // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8400   // patterns have been disabled with null_frag.
8401   def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
8402             (VCVTQQ2PSZ128rr VR128X:$src)>;
8403   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8404                            VK2WM:$mask),
8405             (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8406   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8407                            VK2WM:$mask),
8408             (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8410   def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))),
8411             (VCVTQQ2PSZ128rm addr:$src)>;
8412   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8413                            VK2WM:$mask),
8414             (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8415   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8416                            VK2WM:$mask),
8417             (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8419   def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8420             (VCVTQQ2PSZ128rmb addr:$src)>;
8421   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8422                            (v4f32 VR128X:$src0), VK2WM:$mask),
8423             (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8424   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8425                            v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8426             (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8428   // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8429   // patterns have been disabled with null_frag.
8430   def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))),
8431             (VCVTUQQ2PSZ128rr VR128X:$src)>;
8432   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8433                            VK2WM:$mask),
8434             (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8435   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8436                            VK2WM:$mask),
8437             (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8439   def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))),
8440             (VCVTUQQ2PSZ128rm addr:$src)>;
8441   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8442                            VK2WM:$mask),
8443             (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8444   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8445                            VK2WM:$mask),
8446             (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8448   def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8449             (VCVTUQQ2PSZ128rmb addr:$src)>;
8450   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8451                            (v4f32 VR128X:$src0), VK2WM:$mask),
8452             (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8453   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8454                            v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8455             (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8458 let Predicates = [HasDQI, NoVLX] in {
8459 def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))),
8460           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8461            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8462                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8464 def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))),
8465           (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8466            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8467                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
8469 def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))),
8470           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8471            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8472                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8474 def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))),
8475           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8476            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8477                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8479 def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))),
8480           (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8481            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8482                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
8484 def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
8485           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8486            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8487                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8489 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8490           (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8491            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8492                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
8494 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8495           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8496            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8497                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8499 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8500           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8501            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8502                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8504 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8505           (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8506            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8507                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
8509 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8510           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8511            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8512                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8514 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8515           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8516            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8517                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8520 //===----------------------------------------------------------------------===//
8521 // Half precision conversion instructions
8522 //===----------------------------------------------------------------------===//
8524 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8525                            X86MemOperand x86memop, PatFrag ld_frag,
8526                            X86FoldableSchedWrite sched> {
8527   defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8528                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8529                             (X86cvtph2ps (_src.VT _src.RC:$src))>,
8530                             T8PD, Sched<[sched]>;
8531   defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8532                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8533                             (X86cvtph2ps (_src.VT
8534                                           (ld_frag addr:$src)))>,
8535                             T8PD, Sched<[sched.Folded]>;
8538 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8539                                X86FoldableSchedWrite sched> {
8540   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8541                              (ins _src.RC:$src), "vcvtph2ps",
8542                              "{sae}, $src", "$src, {sae}",
8543                              (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8544                              T8PD, EVEX_B, Sched<[sched]>;
8547 let Predicates = [HasAVX512] in
8548   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8549                                     WriteCvtPH2PSZ>,
8550                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8551                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8553 let Predicates = [HasVLX] in {
8554   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8555                        load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8556                        EVEX_CD8<32, CD8VH>;
8557   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8558                        load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8559                        EVEX_CD8<32, CD8VH>;
8561   // Pattern match vcvtph2ps of a scalar i64 load.
8562   def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
8563             (VCVTPH2PSZ128rm addr:$src)>;
8564   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8565               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8566             (VCVTPH2PSZ128rm addr:$src)>;
8569 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8570                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8571 let ExeDomain = GenericDomain in {
8572   def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8573              (ins _src.RC:$src1, i32u8imm:$src2),
8574              "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8575              [(set _dest.RC:$dst,
8576                    (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8577              Sched<[RR]>;
8578   let Constraints = "$src0 = $dst" in
8579   def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8580              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8581              "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8582              [(set _dest.RC:$dst,
8583                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8584                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
8585              Sched<[RR]>, EVEX_K;
8586   def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8587              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8588              "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8589              [(set _dest.RC:$dst,
8590                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8591                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8592              Sched<[RR]>, EVEX_KZ;
8593   let hasSideEffects = 0, mayStore = 1 in {
8594     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8595                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8596                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8597                Sched<[MR]>;
8598     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8599                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8600                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8601                 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8602   }
8606 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8607                                SchedWrite Sched> {
8608   let hasSideEffects = 0 in
8609   defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8610                    (outs _dest.RC:$dst),
8611                    (ins _src.RC:$src1, i32u8imm:$src2),
8612                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8613                    EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8616 let Predicates = [HasAVX512] in {
8617   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8618                                     WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8619                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8620                                         EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8621   let Predicates = [HasVLX] in {
8622     defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8623                                          WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8624                                          EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8625     defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8626                                          WriteCvtPS2PH, WriteCvtPS2PHSt>,
8627                                          EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8628   }
8630   def : Pat<(store (f64 (extractelt
8631                          (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8632                          (iPTR 0))), addr:$dst),
8633             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8634   def : Pat<(store (i64 (extractelt
8635                          (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8636                          (iPTR 0))), addr:$dst),
8637             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8638   def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
8639             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
8640   def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8641             (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8644 // Patterns for matching conversions from float to half-float and vice versa.
8645 let Predicates = [HasVLX] in {
8646   // Use MXCSR.RC for rounding instead of explicitly specifying the default
8647   // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8648   // configurations we support (the default). However, falling back to MXCSR is
8649   // more consistent with other instructions, which are always controlled by it.
8650   // It's encoded as 0b100.
8651   def : Pat<(fp_to_f16 FR32X:$src),
8652             (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8653               (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8655   def : Pat<(f16_to_fp GR16:$src),
8656             (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8657               (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8659   def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8660             (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8661               (v8i16 (VCVTPS2PHZ128rr
8662                (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8665 //  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8666 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8667                             string OpcodeStr, X86FoldableSchedWrite sched> {
8668   let hasSideEffects = 0 in
8669   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8670                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8671                   EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8674 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8675   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8676                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8677   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8678                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8679   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8680                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8681   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8682                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8685 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8686   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8687                                  "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8688                                  EVEX_CD8<32, CD8VT1>;
8689   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8690                                   "ucomisd", WriteFCom>, PD, EVEX,
8691                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8692   let Pattern = []<dag> in {
8693     defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8694                                    "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8695                                    EVEX_CD8<32, CD8VT1>;
8696     defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8697                                    "comisd", WriteFCom>, PD, EVEX,
8698                                     VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8699   }
8700   let isCodeGenOnly = 1 in {
8701     defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8702                           sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8703                           EVEX_CD8<32, CD8VT1>;
8704     defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8705                           sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8706                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8708     defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8709                           sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8710                           EVEX_CD8<32, CD8VT1>;
8711     defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8712                           sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8713                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8714   }
8717 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8718 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8719                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8720   let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8721   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8722                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8723                            "$src2, $src1", "$src1, $src2",
8724                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8725                            EVEX_4V, VEX_LIG, Sched<[sched]>;
8726   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8727                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8728                          "$src2, $src1", "$src1, $src2",
8729                          (OpNode (_.VT _.RC:$src1),
8730                           _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
8731                           Sched<[sched.Folded, sched.ReadAfterFold]>;
8735 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8736                                f32x_info>, EVEX_CD8<32, CD8VT1>,
8737                                T8PD;
8738 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8739                                f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8740                                T8PD;
8741 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8742                                  SchedWriteFRsqrt.Scl, f32x_info>,
8743                                  EVEX_CD8<32, CD8VT1>, T8PD;
8744 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8745                                  SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8746                                  EVEX_CD8<64, CD8VT1>, T8PD;
8748 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8749 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8750                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8751   let ExeDomain = _.ExeDomain in {
8752   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8753                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8754                          (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8755                          Sched<[sched]>;
8756   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8757                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8758                          (OpNode (_.VT
8759                            (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8760                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8761   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8762                           (ins _.ScalarMemOp:$src), OpcodeStr,
8763                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8764                           (OpNode (_.VT
8765                             (_.BroadcastLdFrag addr:$src)))>,
8766                           EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8767   }
8770 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8771                                 X86SchedWriteWidths sched> {
8772   defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8773                            v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8774   defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8775                            v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8777   // Define only if AVX512VL feature is present.
8778   let Predicates = [HasVLX] in {
8779     defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8780                                 OpNode, sched.XMM, v4f32x_info>,
8781                                EVEX_V128, EVEX_CD8<32, CD8VF>;
8782     defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8783                                 OpNode, sched.YMM, v8f32x_info>,
8784                                EVEX_V256, EVEX_CD8<32, CD8VF>;
8785     defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8786                                 OpNode, sched.XMM, v2f64x_info>,
8787                                EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8788     defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8789                                 OpNode, sched.YMM, v4f64x_info>,
8790                                EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8791   }
8794 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8795 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8797 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8798 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8799                          SDNode OpNode, SDNode OpNodeSAE,
8800                          X86FoldableSchedWrite sched> {
8801   let ExeDomain = _.ExeDomain in {
8802   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8803                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8804                            "$src2, $src1", "$src1, $src2",
8805                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8806                            Sched<[sched]>;
8808   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8809                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8810                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8811                             (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8812                             EVEX_B, Sched<[sched]>;
8814   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8815                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8816                          "$src2, $src1", "$src1, $src2",
8817                          (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
8818                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8819   }
8822 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8823                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8824   defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8825                            sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8826   defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8827                            sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8830 let Predicates = [HasERI] in {
8831   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8832                                SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8833   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8834                                SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8837 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8838                               SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8839 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8841 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8842                          SDNode OpNode, X86FoldableSchedWrite sched> {
8843   let ExeDomain = _.ExeDomain in {
8844   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8845                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8846                          (OpNode (_.VT _.RC:$src))>,
8847                          Sched<[sched]>;
8849   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8850                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8851                          (OpNode (_.VT
8852                              (bitconvert (_.LdFrag addr:$src))))>,
8853                           Sched<[sched.Folded, sched.ReadAfterFold]>;
8855   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8856                          (ins _.ScalarMemOp:$src), OpcodeStr,
8857                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8858                          (OpNode (_.VT
8859                                   (_.BroadcastLdFrag addr:$src)))>,
8860                          EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8861   }
8863 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8864                          SDNode OpNode, X86FoldableSchedWrite sched> {
8865   let ExeDomain = _.ExeDomain in
8866   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8867                         (ins _.RC:$src), OpcodeStr,
8868                         "{sae}, $src", "$src, {sae}",
8869                         (OpNode (_.VT _.RC:$src))>,
8870                         EVEX_B, Sched<[sched]>;
8873 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8874                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8875    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8876               avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8877               T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8878    defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8879               avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8880               T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8883 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8884                                   SDNode OpNode, X86SchedWriteWidths sched> {
8885   // Define only if AVX512VL feature is present.
8886   let Predicates = [HasVLX] in {
8887     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8888                                 sched.XMM>,
8889                                 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8890     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8891                                 sched.YMM>,
8892                                 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8893     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8894                                 sched.XMM>,
8895                                 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8896     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8897                                 sched.YMM>,
8898                                 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8899   }
8902 let Predicates = [HasERI] in {
8903  defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8904                             SchedWriteFRsqrt>, EVEX;
8905  defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8906                             SchedWriteFRcp>, EVEX;
8907  defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8908                             SchedWriteFAdd>, EVEX;
8910 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8911                             SchedWriteFRnd>,
8912                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8913                                           SchedWriteFRnd>, EVEX;
8915 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8916                                     X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8917   let ExeDomain = _.ExeDomain in
8918   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8919                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8920                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8921                          EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8924 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8925                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8926   let ExeDomain = _.ExeDomain in {
8927   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8928                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8929                          (_.VT (fsqrt _.RC:$src))>, EVEX,
8930                          Sched<[sched]>;
8931   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8932                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8933                          (fsqrt (_.VT
8934                            (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8935                            Sched<[sched.Folded, sched.ReadAfterFold]>;
8936   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8937                           (ins _.ScalarMemOp:$src), OpcodeStr,
8938                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8939                           (fsqrt (_.VT
8940                             (_.BroadcastLdFrag addr:$src)))>,
8941                           EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8942   }
8945 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8946                                   X86SchedWriteSizes sched> {
8947   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8948                                 sched.PS.ZMM, v16f32_info>,
8949                                 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8950   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8951                                 sched.PD.ZMM, v8f64_info>,
8952                                 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8953   // Define only if AVX512VL feature is present.
8954   let Predicates = [HasVLX] in {
8955     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8956                                      sched.PS.XMM, v4f32x_info>,
8957                                      EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8958     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8959                                      sched.PS.YMM, v8f32x_info>,
8960                                      EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8961     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8962                                      sched.PD.XMM, v2f64x_info>,
8963                                      EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8964     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8965                                      sched.PD.YMM, v4f64x_info>,
8966                                      EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8967   }
8970 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8971                                         X86SchedWriteSizes sched> {
8972   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8973                                       sched.PS.ZMM, v16f32_info>,
8974                                       EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8975   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8976                                       sched.PD.ZMM, v8f64_info>,
8977                                       EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8980 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8981                               X86VectorVTInfo _, string Name> {
8982   let ExeDomain = _.ExeDomain in {
8983     defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8984                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8985                          "$src2, $src1", "$src1, $src2",
8986                          (X86fsqrts (_.VT _.RC:$src1),
8987                                     (_.VT _.RC:$src2))>,
8988                          Sched<[sched]>;
8989     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8990                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8991                          "$src2, $src1", "$src1, $src2",
8992                          (X86fsqrts (_.VT _.RC:$src1),
8993                                     _.ScalarIntMemCPat:$src2)>,
8994                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8995     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8996                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8997                          "$rc, $src2, $src1", "$src1, $src2, $rc",
8998                          (X86fsqrtRnds (_.VT _.RC:$src1),
8999                                      (_.VT _.RC:$src2),
9000                                      (i32 timm:$rc))>,
9001                          EVEX_B, EVEX_RC, Sched<[sched]>;
9003     let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
9004       def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9005                 (ins _.FRC:$src1, _.FRC:$src2),
9006                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9007                 Sched<[sched]>;
9008       let mayLoad = 1 in
9009         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9010                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9011                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9012                   Sched<[sched.Folded, sched.ReadAfterFold]>;
9013     }
9014   }
9016   let Predicates = [HasAVX512] in {
9017     def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
9018               (!cast<Instruction>(Name#Zr)
9019                   (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9020   }
9022   let Predicates = [HasAVX512, OptForSize] in {
9023     def : Pat<(_.EltVT (fsqrt (load addr:$src))),
9024               (!cast<Instruction>(Name#Zm)
9025                   (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9026   }
9029 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9030                                   X86SchedWriteSizes sched> {
9031   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9032                         EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9033   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9034                         EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9037 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9038              avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9040 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9042 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9043                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9044   let ExeDomain = _.ExeDomain in {
9045   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9046                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9047                            "$src3, $src2, $src1", "$src1, $src2, $src3",
9048                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9049                            (i32 timm:$src3)))>,
9050                            Sched<[sched]>;
9052   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9053                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9054                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9055                          (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9056                          (i32 timm:$src3)))>, EVEX_B,
9057                          Sched<[sched]>;
9059   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9060                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9061                          OpcodeStr,
9062                          "$src3, $src2, $src1", "$src1, $src2, $src3",
9063                          (_.VT (X86RndScales _.RC:$src1,
9064                                 _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
9065                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9067   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9068     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9069                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9070                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9071                []>, Sched<[sched]>;
9073     let mayLoad = 1 in
9074       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9075                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9076                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9077                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
9078   }
9079   }
9081   let Predicates = [HasAVX512] in {
9082     def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2),
9083               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9084                _.FRC:$src1, timm:$src2))>;
9085   }
9087   let Predicates = [HasAVX512, OptForSize] in {
9088     def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9089               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9090                addr:$src1, timm:$src2))>;
9091   }
9094 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9095                                            SchedWriteFRnd.Scl, f32x_info>,
9096                                            AVX512AIi8Base, EVEX_4V, VEX_LIG,
9097                                            EVEX_CD8<32, CD8VT1>;
9099 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9100                                            SchedWriteFRnd.Scl, f64x_info>,
9101                                            VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9102                                            EVEX_CD8<64, CD8VT1>;
9104 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9105                                 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9106                                 dag OutMask, Predicate BasePredicate> {
9107   let Predicates = [BasePredicate] in {
9108     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9109                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9110                (extractelt _.VT:$dst, (iPTR 0))))),
9111               (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9112                _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9114     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9115                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9116                ZeroFP))),
9117               (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9118                OutMask, _.VT:$src2, _.VT:$src1)>;
9119   }
9122 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9123                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9124                             fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9125 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9126                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9127                             fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9130 //-------------------------------------------------
9131 // Integer truncate and extend operations
9132 //-------------------------------------------------
9134 // PatFrags that contain a select and a truncate op. The take operands in the
9135 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9136 // either to the multiclasses.
9137 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9138                            (vselect node:$mask,
9139                                     (trunc node:$src), node:$src0)>;
9140 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9141                             (vselect node:$mask,
9142                                      (X86vtruncs node:$src), node:$src0)>;
9143 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9144                              (vselect node:$mask,
9145                                       (X86vtruncus node:$src), node:$src0)>;
9147 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9148                               SDPatternOperator MaskNode,
9149                               X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9150                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9151   let ExeDomain = DestInfo.ExeDomain in {
9152   def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9153              (ins SrcInfo.RC:$src),
9154              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9155              [(set DestInfo.RC:$dst,
9156                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9157              EVEX, Sched<[sched]>;
9158   let Constraints = "$src0 = $dst" in
9159   def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9160              (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9161              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9162              [(set DestInfo.RC:$dst,
9163                    (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9164                              (DestInfo.VT DestInfo.RC:$src0),
9165                              SrcInfo.KRCWM:$mask))]>,
9166              EVEX, EVEX_K, Sched<[sched]>;
9167   def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9168              (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9169              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9170              [(set DestInfo.RC:$dst,
9171                    (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9172                              DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9173              EVEX, EVEX_KZ, Sched<[sched]>;
9174   }
9176   let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9177     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9178                (ins x86memop:$dst, SrcInfo.RC:$src),
9179                OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9180                EVEX, Sched<[sched.Folded]>;
9182     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9183                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9184                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9185                EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9186   }//mayStore = 1, hasSideEffects = 0
9189 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9190                                     X86VectorVTInfo DestInfo,
9191                                     PatFrag truncFrag, PatFrag mtruncFrag,
9192                                     string Name> {
9194   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9195             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9196                                     addr:$dst, SrcInfo.RC:$src)>;
9198   def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9199                         SrcInfo.KRCWM:$mask),
9200             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9201                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9204 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9205                         SDNode OpNode256, SDNode OpNode512,
9206                         SDPatternOperator MaskNode128,
9207                         SDPatternOperator MaskNode256,
9208                         SDPatternOperator MaskNode512,
9209                         X86FoldableSchedWrite sched,
9210                         AVX512VLVectorVTInfo VTSrcInfo,
9211                         X86VectorVTInfo DestInfoZ128,
9212                         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9213                         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9214                         X86MemOperand x86memopZ, PatFrag truncFrag,
9215                         PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9217   let Predicates = [HasVLX, prd] in {
9218     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9219                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9220                 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9221                              truncFrag, mtruncFrag, NAME>, EVEX_V128;
9223     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9224                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9225                 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9226                              truncFrag, mtruncFrag, NAME>, EVEX_V256;
9227   }
9228   let Predicates = [prd] in
9229     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9230                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9231                 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9232                              truncFrag, mtruncFrag, NAME>, EVEX_V512;
9235 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9236                            SDPatternOperator MaskNode,
9237                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9238                            PatFrag MaskedStoreNode, SDNode InVecNode,
9239                            SDPatternOperator InVecMaskNode> {
9240   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9241                           InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9242                           avx512vl_i64_info, v16i8x_info, v16i8x_info,
9243                           v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9244                           MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9247 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9248                            SDPatternOperator MaskNode,
9249                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9250                            PatFrag MaskedStoreNode, SDNode InVecNode,
9251                            SDPatternOperator InVecMaskNode> {
9252   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9253                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9254                           avx512vl_i64_info, v8i16x_info, v8i16x_info,
9255                           v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9256                           MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9259 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9260                            SDPatternOperator MaskNode,
9261                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9262                            PatFrag MaskedStoreNode, SDNode InVecNode,
9263                            SDPatternOperator InVecMaskNode> {
9264   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9265                           InVecMaskNode, MaskNode, MaskNode, sched,
9266                           avx512vl_i64_info, v4i32x_info, v4i32x_info,
9267                           v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9268                           MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9271 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9272                            SDPatternOperator MaskNode,
9273                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9274                            PatFrag MaskedStoreNode, SDNode InVecNode,
9275                            SDPatternOperator InVecMaskNode> {
9276   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9277                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9278                           avx512vl_i32_info, v16i8x_info, v16i8x_info,
9279                           v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9280                           MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9283 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9284                            SDPatternOperator MaskNode,
9285                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9286                            PatFrag MaskedStoreNode, SDNode InVecNode,
9287                            SDPatternOperator InVecMaskNode> {
9288   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9289                           InVecMaskNode, MaskNode, MaskNode, sched,
9290                           avx512vl_i32_info, v8i16x_info, v8i16x_info,
9291                           v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9292                           MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9295 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9296                            SDPatternOperator MaskNode,
9297                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9298                            PatFrag MaskedStoreNode, SDNode InVecNode,
9299                            SDPatternOperator InVecMaskNode> {
9300   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9301                           InVecMaskNode, MaskNode, MaskNode, sched,
9302                           avx512vl_i16_info, v16i8x_info, v16i8x_info,
9303                           v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9304                           MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9307 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
9308                                   WriteShuffle256, truncstorevi8,
9309                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9310 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
9311                                   WriteShuffle256, truncstore_s_vi8,
9312                                   masked_truncstore_s_vi8, X86vtruncs,
9313                                   X86vmtruncs>;
9314 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9315                                   select_truncus, WriteShuffle256,
9316                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9317                                   X86vtruncus, X86vmtruncus>;
9319 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9320                                   WriteShuffle256, truncstorevi16,
9321                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9322 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9323                                   WriteShuffle256, truncstore_s_vi16,
9324                                   masked_truncstore_s_vi16, X86vtruncs,
9325                                   X86vmtruncs>;
9326 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9327                                   select_truncus, WriteShuffle256,
9328                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9329                                   X86vtruncus, X86vmtruncus>;
9331 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9332                                   WriteShuffle256, truncstorevi32,
9333                                   masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9334 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9335                                   WriteShuffle256, truncstore_s_vi32,
9336                                   masked_truncstore_s_vi32, X86vtruncs,
9337                                   X86vmtruncs>;
9338 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9339                                   select_truncus, WriteShuffle256,
9340                                   truncstore_us_vi32, masked_truncstore_us_vi32,
9341                                   X86vtruncus, X86vmtruncus>;
9343 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9344                                   WriteShuffle256, truncstorevi8,
9345                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9346 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9347                                   WriteShuffle256, truncstore_s_vi8,
9348                                   masked_truncstore_s_vi8, X86vtruncs,
9349                                   X86vmtruncs>;
9350 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9351                                   select_truncus, WriteShuffle256,
9352                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9353                                   X86vtruncus, X86vmtruncus>;
9355 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9356                                   WriteShuffle256, truncstorevi16,
9357                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9358 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9359                                   WriteShuffle256, truncstore_s_vi16,
9360                                   masked_truncstore_s_vi16, X86vtruncs,
9361                                   X86vmtruncs>;
9362 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9363                                   select_truncus, WriteShuffle256,
9364                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9365                                   X86vtruncus, X86vmtruncus>;
9367 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9368                                   WriteShuffle256, truncstorevi8,
9369                                   masked_truncstorevi8, X86vtrunc,
9370                                   X86vmtrunc>;
9371 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9372                                   WriteShuffle256, truncstore_s_vi8,
9373                                   masked_truncstore_s_vi8, X86vtruncs,
9374                                   X86vmtruncs>;
9375 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9376                                   select_truncus, WriteShuffle256,
9377                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9378                                   X86vtruncus, X86vmtruncus>;
9380 let Predicates = [HasAVX512, NoVLX] in {
9381 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9382          (v8i16 (EXTRACT_SUBREG
9383                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9384                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9385 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9386          (v4i32 (EXTRACT_SUBREG
9387                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9388                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
9391 let Predicates = [HasBWI, NoVLX] in {
9392 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9393          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9394                                             VR256X:$src, sub_ymm))), sub_xmm))>;
9397 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9398 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9399                            X86VectorVTInfo DestInfo,
9400                            X86VectorVTInfo SrcInfo> {
9401   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9402                                  DestInfo.RC:$src0,
9403                                  SrcInfo.KRCWM:$mask)),
9404             (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9405                                                  SrcInfo.KRCWM:$mask,
9406                                                  SrcInfo.RC:$src)>;
9408   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9409                                  DestInfo.ImmAllZerosV,
9410                                  SrcInfo.KRCWM:$mask)),
9411             (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9412                                                   SrcInfo.RC:$src)>;
9415 let Predicates = [HasVLX] in {
9416 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9417 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9418 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9421 let Predicates = [HasAVX512] in {
9422 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9423 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9424 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9426 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9427 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9428 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9430 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9431 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9432 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9435 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9436               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9437               X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9438   let ExeDomain = DestInfo.ExeDomain in {
9439   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9440                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9441                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9442                   EVEX, Sched<[sched]>;
9444   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9445                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9446                   (DestInfo.VT (LdFrag addr:$src))>,
9447                 EVEX, Sched<[sched.Folded]>;
9448   }
9451 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9452           SDNode OpNode, SDNode InVecNode, string ExtTy,
9453           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9454   let Predicates = [HasVLX, HasBWI] in {
9455     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9456                     v16i8x_info, i64mem, LdFrag, InVecNode>,
9457                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9459     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9460                     v16i8x_info, i128mem, LdFrag, OpNode>,
9461                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9462   }
9463   let Predicates = [HasBWI] in {
9464     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9465                     v32i8x_info, i256mem, LdFrag, OpNode>,
9466                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9467   }
9470 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9471           SDNode OpNode, SDNode InVecNode, string ExtTy,
9472           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9473   let Predicates = [HasVLX, HasAVX512] in {
9474     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9475                    v16i8x_info, i32mem, LdFrag, InVecNode>,
9476                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9478     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9479                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9480                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9481   }
9482   let Predicates = [HasAVX512] in {
9483     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9484                    v16i8x_info, i128mem, LdFrag, OpNode>,
9485                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9486   }
9489 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9490           SDNode OpNode, SDNode InVecNode, string ExtTy,
9491           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9492   let Predicates = [HasVLX, HasAVX512] in {
9493     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9494                    v16i8x_info, i16mem, LdFrag, InVecNode>,
9495                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9497     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9498                    v16i8x_info, i32mem, LdFrag, InVecNode>,
9499                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9500   }
9501   let Predicates = [HasAVX512] in {
9502     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9503                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9504                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9505   }
9508 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9509          SDNode OpNode, SDNode InVecNode, string ExtTy,
9510          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9511   let Predicates = [HasVLX, HasAVX512] in {
9512     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9513                    v8i16x_info, i64mem, LdFrag, InVecNode>,
9514                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9516     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9517                    v8i16x_info, i128mem, LdFrag, OpNode>,
9518                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9519   }
9520   let Predicates = [HasAVX512] in {
9521     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9522                    v16i16x_info, i256mem, LdFrag, OpNode>,
9523                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9524   }
9527 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9528          SDNode OpNode, SDNode InVecNode, string ExtTy,
9529          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9530   let Predicates = [HasVLX, HasAVX512] in {
9531     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9532                    v8i16x_info, i32mem, LdFrag, InVecNode>,
9533                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9535     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9536                    v8i16x_info, i64mem, LdFrag, InVecNode>,
9537                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9538   }
9539   let Predicates = [HasAVX512] in {
9540     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9541                    v8i16x_info, i128mem, LdFrag, OpNode>,
9542                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9543   }
9546 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9547          SDNode OpNode, SDNode InVecNode, string ExtTy,
9548          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9550   let Predicates = [HasVLX, HasAVX512] in {
9551     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9552                    v4i32x_info, i64mem, LdFrag, InVecNode>,
9553                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9555     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9556                    v4i32x_info, i128mem, LdFrag, OpNode>,
9557                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9558   }
9559   let Predicates = [HasAVX512] in {
9560     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9561                    v8i32x_info, i256mem, LdFrag, OpNode>,
9562                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9563   }
9566 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9567 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9568 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9569 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9570 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9571 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9573 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9574 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9575 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9576 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9577 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9578 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9581 // Patterns that we also need any extend versions of. aext_vector_inreg
9582 // is currently legalized to zext_vector_inreg.
9583 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9584   // 256-bit patterns
9585   let Predicates = [HasVLX, HasBWI] in {
9586     def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9587               (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9588   }
9590   let Predicates = [HasVLX] in {
9591     def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9592               (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9594     def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9595               (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9596   }
9598   // 512-bit patterns
9599   let Predicates = [HasBWI] in {
9600     def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9601               (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9602   }
9603   let Predicates = [HasAVX512] in {
9604     def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9605               (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9606     def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9607               (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9609     def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9610               (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9612     def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9613               (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9614   }
9617 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9618                                  SDNode InVecOp> :
9619     AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9620   // 128-bit patterns
9621   let Predicates = [HasVLX, HasBWI] in {
9622   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9623             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9624   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9625             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9626   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9627             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9628   }
9629   let Predicates = [HasVLX] in {
9630   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9631             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9632   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9633             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9635   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9636             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9638   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9639             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9640   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9641             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9642   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9643             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9645   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9646             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9647   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9648             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9650   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9651             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9652   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9653             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9654   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9655             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9656   }
9657   let Predicates = [HasVLX] in {
9658   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9659             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9660   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9661             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9663   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9664             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9665   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9666             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9668   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9669             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9670   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9671             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9672   }
9673   // 512-bit patterns
9674   let Predicates = [HasAVX512] in {
9675   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9676             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9677   }
9680 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9681 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9683 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9684 // ext+trunc aggresively making it impossible to legalize the DAG to this
9685 // pattern directly.
9686 let Predicates = [HasAVX512, NoBWI] in {
9687 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9688          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9689 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9690          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9693 //===----------------------------------------------------------------------===//
9694 // GATHER - SCATTER Operations
9696 // FIXME: Improve scheduling of gather/scatter instructions.
9697 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9698                          X86MemOperand memop, PatFrag GatherNode,
9699                          RegisterClass MaskRC = _.KRCWM> {
9700   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9701       ExeDomain = _.ExeDomain in
9702   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9703             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9704             !strconcat(OpcodeStr#_.Suffix,
9705             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9706             [(set _.RC:$dst, MaskRC:$mask_wb,
9707               (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
9708                      vectoraddr:$src2))]>, EVEX, EVEX_K,
9709              EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9712 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9713                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9714   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9715                                       vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9716   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9717                                       vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
9718 let Predicates = [HasVLX] in {
9719   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9720                               vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9721   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9722                               vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9723   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9724                               vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9725   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9726                               vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9730 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9731                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9732   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9733                                        mgatherv16i32>, EVEX_V512;
9734   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9735                                        mgatherv8i64>, EVEX_V512;
9736 let Predicates = [HasVLX] in {
9737   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9738                                           vy256xmem, mgatherv8i32>, EVEX_V256;
9739   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9740                                           vy128xmem, mgatherv4i64>, EVEX_V256;
9741   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9742                                           vx128xmem, mgatherv4i32>, EVEX_V128;
9743   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9744                                           vx64xmem, mgatherv2i64, VK2WM>,
9745                                           EVEX_V128;
9750 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9751                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9753 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9754                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9756 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9757                           X86MemOperand memop, PatFrag ScatterNode,
9758                           RegisterClass MaskRC = _.KRCWM> {
9760 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9762   def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9763             (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9764             !strconcat(OpcodeStr#_.Suffix,
9765             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9766             [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9767                                     MaskRC:$mask,  vectoraddr:$dst))]>,
9768             EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9769             Sched<[WriteStore]>;
9772 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9773                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9774   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9775                                       vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9776   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9777                                       vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
9778 let Predicates = [HasVLX] in {
9779   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9780                               vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9781   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9782                               vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9783   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9784                               vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9785   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9786                               vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9790 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9791                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9792   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9793                                        mscatterv16i32>, EVEX_V512;
9794   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9795                                        mscatterv8i64>, EVEX_V512;
9796 let Predicates = [HasVLX] in {
9797   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9798                                           vy256xmem, mscatterv8i32>, EVEX_V256;
9799   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9800                                           vy128xmem, mscatterv4i64>, EVEX_V256;
9801   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9802                                           vx128xmem, mscatterv4i32>, EVEX_V128;
9803   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9804                                           vx64xmem, mscatterv2i64, VK2WM>,
9805                                           EVEX_V128;
9809 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9810                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9812 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9813                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9815 // prefetch
9816 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9817                        RegisterClass KRC, X86MemOperand memop> {
9818   let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9819   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9820             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9821             EVEX, EVEX_K, Sched<[WriteLoad]>;
9824 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9825                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9827 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9828                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9830 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9831                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9833 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9834                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9836 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9837                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9839 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9840                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9842 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9843                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9845 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9846                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9848 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9849                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9851 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9852                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9854 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9855                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9857 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9858                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9860 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9861                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9863 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9864                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9866 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9867                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9869 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9870                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9872 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9873 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9874                   !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9875                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9876                   EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9878 // Also need a pattern for anyextend.
9879 def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
9880           (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
9883 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9884                                  string OpcodeStr, Predicate prd> {
9885 let Predicates = [prd] in
9886   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9888   let Predicates = [prd, HasVLX] in {
9889     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9890     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9891   }
9894 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9895 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9896 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9897 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9899 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9900     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9901                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9902                         [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9903                         EVEX, Sched<[WriteMove]>;
9906 // Use 512bit version to implement 128/256 bit in case NoVLX.
9907 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9908                                            X86VectorVTInfo _,
9909                                            string Name> {
9911   def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9912             (_.KVT (COPY_TO_REGCLASS
9913                      (!cast<Instruction>(Name#"Zrr")
9914                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9915                                       _.RC:$src, _.SubRegIdx)),
9916                    _.KRC))>;
9919 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9920                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9921   let Predicates = [prd] in
9922     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9923                                             EVEX_V512;
9925   let Predicates = [prd, HasVLX] in {
9926     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9927                                               EVEX_V256;
9928     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9929                                                EVEX_V128;
9930   }
9931   let Predicates = [prd, NoVLX] in {
9932     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9933     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9934   }
9937 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9938                                               avx512vl_i8_info, HasBWI>;
9939 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9940                                               avx512vl_i16_info, HasBWI>, VEX_W;
9941 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9942                                               avx512vl_i32_info, HasDQI>;
9943 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9944                                               avx512vl_i64_info, HasDQI>, VEX_W;
9946 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9947 // is available, but BWI is not. We can't handle this in lowering because
9948 // a target independent DAG combine likes to combine sext and trunc.
9949 let Predicates = [HasDQI, NoBWI] in {
9950   def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9951             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9952   def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9953             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9955   def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
9956             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9957   def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
9958             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9961 let Predicates = [HasDQI, NoBWI, HasVLX] in {
9962   def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9963             (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9965   def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
9966             (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9969 //===----------------------------------------------------------------------===//
9970 // AVX-512 - COMPRESS and EXPAND
9973 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9974                                  string OpcodeStr, X86FoldableSchedWrite sched> {
9975   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9976               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9977               (null_frag)>, AVX5128IBase,
9978               Sched<[sched]>;
9980   let mayStore = 1, hasSideEffects = 0 in
9981   def mr : AVX5128I<opc, MRMDestMem, (outs),
9982               (ins _.MemOp:$dst, _.RC:$src),
9983               OpcodeStr # "\t{$src, $dst|$dst, $src}",
9984               []>, EVEX_CD8<_.EltSize, CD8VT1>,
9985               Sched<[sched.Folded]>;
9987   def mrk : AVX5128I<opc, MRMDestMem, (outs),
9988               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9989               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9990               []>,
9991               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9992               Sched<[sched.Folded]>;
9995 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9996   def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9997             (!cast<Instruction>(Name#_.ZSuffix##mrk)
9998                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10000   def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10001             (!cast<Instruction>(Name#_.ZSuffix##rrk)
10002                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10003   def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10004             (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10005                             _.KRCWM:$mask, _.RC:$src)>;
10008 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10009                                  X86FoldableSchedWrite sched,
10010                                  AVX512VLVectorVTInfo VTInfo,
10011                                  Predicate Pred = HasAVX512> {
10012   let Predicates = [Pred] in
10013   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10014            compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10016   let Predicates = [Pred, HasVLX] in {
10017     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10018                 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10019     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10020                 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10021   }
10024 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10025 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10026                                           avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10027 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10028                                           avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10029 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10030                                           avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10031 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10032                                           avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10034 // expand
10035 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10036                                  string OpcodeStr, X86FoldableSchedWrite sched> {
10037   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10038               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10039               (null_frag)>, AVX5128IBase,
10040               Sched<[sched]>;
10042   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10043               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10044               (null_frag)>,
10045             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10046             Sched<[sched.Folded, sched.ReadAfterFold]>;
10049 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10051   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10052             (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10053                                         _.KRCWM:$mask, addr:$src)>;
10055   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10056             (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10057                                         _.KRCWM:$mask, addr:$src)>;
10059   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10060                                                (_.VT _.RC:$src0))),
10061             (!cast<Instruction>(Name#_.ZSuffix##rmk)
10062                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10064   def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10065             (!cast<Instruction>(Name#_.ZSuffix##rrk)
10066                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10067   def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10068             (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10069                             _.KRCWM:$mask, _.RC:$src)>;
10072 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10073                                X86FoldableSchedWrite sched,
10074                                AVX512VLVectorVTInfo VTInfo,
10075                                Predicate Pred = HasAVX512> {
10076   let Predicates = [Pred] in
10077   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10078            expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10080   let Predicates = [Pred, HasVLX] in {
10081     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10082                 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10083     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10084                 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10085   }
10088 // FIXME: Is there a better scheduler class for VPEXPAND?
10089 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10090                                       avx512vl_i32_info>, EVEX;
10091 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10092                                       avx512vl_i64_info>, EVEX, VEX_W;
10093 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10094                                       avx512vl_f32_info>, EVEX;
10095 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10096                                       avx512vl_f64_info>, EVEX, VEX_W;
10098 //handle instruction  reg_vec1 = op(reg_vec,imm)
10099 //                               op(mem_vec,imm)
10100 //                               op(broadcast(eltVt),imm)
10101 //all instruction created with FROUND_CURRENT
10102 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10103                                       X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10104   let ExeDomain = _.ExeDomain in {
10105   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10106                       (ins _.RC:$src1, i32u8imm:$src2),
10107                       OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10108                       (OpNode (_.VT _.RC:$src1),
10109                               (i32 timm:$src2))>, Sched<[sched]>;
10110   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10111                     (ins _.MemOp:$src1, i32u8imm:$src2),
10112                     OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10113                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10114                             (i32 timm:$src2))>,
10115                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10116   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10117                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10118                     OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10119                     "${src1}"##_.BroadcastStr##", $src2",
10120                     (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10121                             (i32 timm:$src2))>, EVEX_B,
10122                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10123   }
10126 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10127 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10128                                           SDNode OpNode, X86FoldableSchedWrite sched,
10129                                           X86VectorVTInfo _> {
10130   let ExeDomain = _.ExeDomain in
10131   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10132                       (ins _.RC:$src1, i32u8imm:$src2),
10133                       OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10134                       "$src1, {sae}, $src2",
10135                       (OpNode (_.VT _.RC:$src1),
10136                               (i32 timm:$src2))>,
10137                       EVEX_B, Sched<[sched]>;
10140 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10141             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10142             SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10143   let Predicates = [prd] in {
10144     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10145                                            _.info512>,
10146                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10147                                                sched.ZMM, _.info512>, EVEX_V512;
10148   }
10149   let Predicates = [prd, HasVLX] in {
10150     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10151                                            _.info128>, EVEX_V128;
10152     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10153                                            _.info256>, EVEX_V256;
10154   }
10157 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10158 //                               op(reg_vec2,mem_vec,imm)
10159 //                               op(reg_vec2,broadcast(eltVt),imm)
10160 //all instruction created with FROUND_CURRENT
10161 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10162                                 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10163   let ExeDomain = _.ExeDomain in {
10164   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10165                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10166                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10167                       (OpNode (_.VT _.RC:$src1),
10168                               (_.VT _.RC:$src2),
10169                               (i32 timm:$src3))>,
10170                       Sched<[sched]>;
10171   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10172                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10173                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10174                     (OpNode (_.VT _.RC:$src1),
10175                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
10176                             (i32 timm:$src3))>,
10177                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10178   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10179                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10180                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10181                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
10182                     (OpNode (_.VT _.RC:$src1),
10183                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10184                             (i32 timm:$src3))>, EVEX_B,
10185                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10186   }
10189 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10190 //                               op(reg_vec2,mem_vec,imm)
10191 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10192                               X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10193                               X86VectorVTInfo SrcInfo>{
10194   let ExeDomain = DestInfo.ExeDomain in {
10195   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10196                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10197                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10198                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10199                                (SrcInfo.VT SrcInfo.RC:$src2),
10200                                (i8 timm:$src3)))>,
10201                   Sched<[sched]>;
10202   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10203                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10204                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10205                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10206                              (SrcInfo.VT (bitconvert
10207                                                 (SrcInfo.LdFrag addr:$src2))),
10208                              (i8 timm:$src3)))>,
10209                 Sched<[sched.Folded, sched.ReadAfterFold]>;
10210   }
10213 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10214 //                               op(reg_vec2,mem_vec,imm)
10215 //                               op(reg_vec2,broadcast(eltVt),imm)
10216 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10217                            X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10218   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10220   let ExeDomain = _.ExeDomain in
10221   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10222                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10223                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10224                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
10225                     (OpNode (_.VT _.RC:$src1),
10226                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10227                             (i8 timm:$src3))>, EVEX_B,
10228                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10231 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10232 //                                      op(reg_vec2,mem_scalar,imm)
10233 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10234                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10235   let ExeDomain = _.ExeDomain in {
10236   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10237                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10238                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10239                       (OpNode (_.VT _.RC:$src1),
10240                               (_.VT _.RC:$src2),
10241                               (i32 timm:$src3))>,
10242                       Sched<[sched]>;
10243   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10244                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10245                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10246                     (OpNode (_.VT _.RC:$src1),
10247                             (_.VT _.ScalarIntMemCPat:$src2),
10248                             (i32 timm:$src3))>,
10249                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10250   }
10253 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10254 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10255                                     SDNode OpNode, X86FoldableSchedWrite sched,
10256                                     X86VectorVTInfo _> {
10257   let ExeDomain = _.ExeDomain in
10258   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10259                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10260                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10261                       "$src1, $src2, {sae}, $src3",
10262                       (OpNode (_.VT _.RC:$src1),
10263                               (_.VT _.RC:$src2),
10264                               (i32 timm:$src3))>,
10265                       EVEX_B, Sched<[sched]>;
10268 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10269 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10270                                     X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10271   let ExeDomain = _.ExeDomain in
10272   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10273                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10274                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10275                       "$src1, $src2, {sae}, $src3",
10276                       (OpNode (_.VT _.RC:$src1),
10277                               (_.VT _.RC:$src2),
10278                               (i32 timm:$src3))>,
10279                       EVEX_B, Sched<[sched]>;
10282 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10283             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10284             SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10285   let Predicates = [prd] in {
10286     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10287                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10288                                   EVEX_V512;
10290   }
10291   let Predicates = [prd, HasVLX] in {
10292     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10293                                   EVEX_V128;
10294     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10295                                   EVEX_V256;
10296   }
10299 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10300                    X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10301                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10302   let Predicates = [Pred] in {
10303     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10304                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10305   }
10306   let Predicates = [Pred, HasVLX] in {
10307     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10308                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10309     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10310                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10311   }
10314 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10315                                   bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10316                                   Predicate Pred = HasAVX512> {
10317   let Predicates = [Pred] in {
10318     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10319                                 EVEX_V512;
10320   }
10321   let Predicates = [Pred, HasVLX] in {
10322     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10323                                 EVEX_V128;
10324     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10325                                 EVEX_V256;
10326   }
10329 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10330                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10331                   SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10332   let Predicates = [prd] in {
10333      defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10334               avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10335   }
10338 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10339                     bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10340                     SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10341   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10342                             opcPs, OpNode, OpNodeSAE, sched, prd>,
10343                             EVEX_CD8<32, CD8VF>;
10344   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10345                             opcPd, OpNode, OpNodeSAE, sched, prd>,
10346                             EVEX_CD8<64, CD8VF>, VEX_W;
10349 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10350                               X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
10351                               AVX512AIi8Base, EVEX;
10352 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10353                               X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
10354                               AVX512AIi8Base, EVEX;
10355 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10356                               X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
10357                               AVX512AIi8Base, EVEX;
10359 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10360                                                 0x50, X86VRange, X86VRangeSAE,
10361                                                 SchedWriteFAdd, HasDQI>,
10362       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10363 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10364                                                 0x50, X86VRange, X86VRangeSAE,
10365                                                 SchedWriteFAdd, HasDQI>,
10366       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10368 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10369       f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10370       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10371 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10372       0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10373       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10375 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10376       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10377       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10378 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10379       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10380       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10382 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10383       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10384       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10385 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10386       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10387       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10389 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10390                                           X86FoldableSchedWrite sched,
10391                                           X86VectorVTInfo _,
10392                                           X86VectorVTInfo CastInfo,
10393                                           string EVEX2VEXOvrd> {
10394   let ExeDomain = _.ExeDomain in {
10395   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10396                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10397                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10398                   (_.VT (bitconvert
10399                          (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10400                                                   (i8 timm:$src3)))))>,
10401                   Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10402   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10403                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10404                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10405                 (_.VT
10406                  (bitconvert
10407                   (CastInfo.VT (X86Shuf128 _.RC:$src1,
10408                                            (CastInfo.LdFrag addr:$src2),
10409                                            (i8 timm:$src3)))))>,
10410                 Sched<[sched.Folded, sched.ReadAfterFold]>,
10411                 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10412   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10413                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10414                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10415                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
10416                     (_.VT
10417                      (bitconvert
10418                       (CastInfo.VT
10419                        (X86Shuf128 _.RC:$src1,
10420                                    (_.BroadcastLdFrag addr:$src2),
10421                                    (i8 timm:$src3)))))>, EVEX_B,
10422                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10423   }
10426 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10427                                    AVX512VLVectorVTInfo _,
10428                                    AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10429                                    string EVEX2VEXOvrd>{
10430   let Predicates = [HasAVX512] in
10431   defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10432                                           _.info512, CastInfo.info512, "">, EVEX_V512;
10434   let Predicates = [HasAVX512, HasVLX] in
10435   defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10436                                              _.info256, CastInfo.info256,
10437                                              EVEX2VEXOvrd>, EVEX_V256;
10440 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10441       avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10442 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10443       avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10444 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10445       avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10446 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10447       avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10449 let Predicates = [HasAVX512] in {
10450 // Provide fallback in case the load node that is used in the broadcast
10451 // patterns above is used by additional users, which prevents the pattern
10452 // selection.
10453 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10454           (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10455                           (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10456                           0)>;
10457 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10458           (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10459                           (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10460                           0)>;
10462 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10463           (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10464                           (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10465                           0)>;
10466 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10467           (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10468                           (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10469                           0)>;
10471 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10472           (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10473                           (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10474                           0)>;
10476 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10477           (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10478                           (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10479                           0)>;
10482 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10483                          X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10484   // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10485   // instantiation of this class.
10486   let ExeDomain = _.ExeDomain in {
10487   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10488                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10489                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10490                   (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10491                   Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10492   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10493                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10494                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10495                 (_.VT (X86VAlign _.RC:$src1,
10496                                  (bitconvert (_.LdFrag addr:$src2)),
10497                                  (i8 timm:$src3)))>,
10498                 Sched<[sched.Folded, sched.ReadAfterFold]>,
10499                 EVEX2VEXOverride<"VPALIGNRrmi">;
10501   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10502                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10503                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10504                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10505                    (X86VAlign _.RC:$src1,
10506                               (_.VT (_.BroadcastLdFrag addr:$src2)),
10507                               (i8 timm:$src3))>, EVEX_B,
10508                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10509   }
10512 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10513                                 AVX512VLVectorVTInfo _> {
10514   let Predicates = [HasAVX512] in {
10515     defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10516                                 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10517   }
10518   let Predicates = [HasAVX512, HasVLX] in {
10519     defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10520                                 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10521     // We can't really override the 256-bit version so change it back to unset.
10522     let EVEX2VEXOverride = ? in
10523     defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10524                                 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10525   }
10528 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10529                                    avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10530 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10531                                    avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10532                                    VEX_W;
10534 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10535                                          SchedWriteShuffle, avx512vl_i8_info,
10536                                          avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10538 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10539 // into vpalignr.
10540 def ValignqImm32XForm : SDNodeXForm<timm, [{
10541   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10542 }]>;
10543 def ValignqImm8XForm : SDNodeXForm<timm, [{
10544   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10545 }]>;
10546 def ValigndImm8XForm : SDNodeXForm<timm, [{
10547   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10548 }]>;
10550 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10551                                         X86VectorVTInfo From, X86VectorVTInfo To,
10552                                         SDNodeXForm ImmXForm> {
10553   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10554                             (bitconvert
10555                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10556                                               timm:$src3))),
10557                             To.RC:$src0)),
10558             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10559                                                   To.RC:$src1, To.RC:$src2,
10560                                                   (ImmXForm timm:$src3))>;
10562   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10563                             (bitconvert
10564                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10565                                               timm:$src3))),
10566                             To.ImmAllZerosV)),
10567             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10568                                                    To.RC:$src1, To.RC:$src2,
10569                                                    (ImmXForm timm:$src3))>;
10571   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10572                             (bitconvert
10573                              (From.VT (OpNode From.RC:$src1,
10574                                               (From.LdFrag addr:$src2),
10575                                       timm:$src3))),
10576                             To.RC:$src0)),
10577             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10578                                                   To.RC:$src1, addr:$src2,
10579                                                   (ImmXForm timm:$src3))>;
10581   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10582                             (bitconvert
10583                              (From.VT (OpNode From.RC:$src1,
10584                                               (From.LdFrag addr:$src2),
10585                                       timm:$src3))),
10586                             To.ImmAllZerosV)),
10587             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10588                                                    To.RC:$src1, addr:$src2,
10589                                                    (ImmXForm timm:$src3))>;
10592 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10593                                            X86VectorVTInfo From,
10594                                            X86VectorVTInfo To,
10595                                            SDNodeXForm ImmXForm> :
10596       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10597   def : Pat<(From.VT (OpNode From.RC:$src1,
10598                              (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
10599                              timm:$src3)),
10600             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10601                                                   (ImmXForm timm:$src3))>;
10603   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10604                             (bitconvert
10605                              (From.VT (OpNode From.RC:$src1,
10606                                       (bitconvert
10607                                        (To.VT (To.BroadcastLdFrag addr:$src2))),
10608                                       timm:$src3))),
10609                             To.RC:$src0)),
10610             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10611                                                    To.RC:$src1, addr:$src2,
10612                                                    (ImmXForm timm:$src3))>;
10614   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10615                             (bitconvert
10616                              (From.VT (OpNode From.RC:$src1,
10617                                       (bitconvert
10618                                        (To.VT (To.BroadcastLdFrag addr:$src2))),
10619                                       timm:$src3))),
10620                             To.ImmAllZerosV)),
10621             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10622                                                     To.RC:$src1, addr:$src2,
10623                                                     (ImmXForm timm:$src3))>;
10626 let Predicates = [HasAVX512] in {
10627   // For 512-bit we lower to the widest element type we can. So we only need
10628   // to handle converting valignq to valignd.
10629   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10630                                          v16i32_info, ValignqImm32XForm>;
10633 let Predicates = [HasVLX] in {
10634   // For 128-bit we lower to the widest element type we can. So we only need
10635   // to handle converting valignq to valignd.
10636   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10637                                          v4i32x_info, ValignqImm32XForm>;
10638   // For 256-bit we lower to the widest element type we can. So we only need
10639   // to handle converting valignq to valignd.
10640   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10641                                          v8i32x_info, ValignqImm32XForm>;
10644 let Predicates = [HasVLX, HasBWI] in {
10645   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10646   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10647                                       v16i8x_info, ValignqImm8XForm>;
10648   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10649                                       v16i8x_info, ValigndImm8XForm>;
10652 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10653                 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10654                 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10656 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10657                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10658   let ExeDomain = _.ExeDomain in {
10659   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10660                     (ins _.RC:$src1), OpcodeStr,
10661                     "$src1", "$src1",
10662                     (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
10663                     Sched<[sched]>;
10665   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10666                   (ins _.MemOp:$src1), OpcodeStr,
10667                   "$src1", "$src1",
10668                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
10669             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10670             Sched<[sched.Folded]>;
10671   }
10674 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10675                             X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10676            avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10677   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10678                   (ins _.ScalarMemOp:$src1), OpcodeStr,
10679                   "${src1}"##_.BroadcastStr,
10680                   "${src1}"##_.BroadcastStr,
10681                   (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
10682              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10683              Sched<[sched.Folded]>;
10686 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10687                               X86SchedWriteWidths sched,
10688                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10689   let Predicates = [prd] in
10690     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10691                              EVEX_V512;
10693   let Predicates = [prd, HasVLX] in {
10694     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10695                               EVEX_V256;
10696     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10697                               EVEX_V128;
10698   }
10701 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10702                                X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10703                                Predicate prd> {
10704   let Predicates = [prd] in
10705     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10706                               EVEX_V512;
10708   let Predicates = [prd, HasVLX] in {
10709     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10710                                  EVEX_V256;
10711     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10712                                  EVEX_V128;
10713   }
10716 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10717                                  SDNode OpNode, X86SchedWriteWidths sched,
10718                                  Predicate prd> {
10719   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10720                                avx512vl_i64_info, prd>, VEX_W;
10721   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10722                                avx512vl_i32_info, prd>;
10725 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10726                                  SDNode OpNode, X86SchedWriteWidths sched,
10727                                  Predicate prd> {
10728   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10729                               avx512vl_i16_info, prd>, VEX_WIG;
10730   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10731                               avx512vl_i8_info, prd>, VEX_WIG;
10734 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10735                                   bits<8> opc_d, bits<8> opc_q,
10736                                   string OpcodeStr, SDNode OpNode,
10737                                   X86SchedWriteWidths sched> {
10738   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10739                                     HasAVX512>,
10740               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10741                                     HasBWI>;
10744 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10745                                     SchedWriteVecALU>;
10747 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10748 let Predicates = [HasAVX512, NoVLX] in {
10749   def : Pat<(v4i64 (abs VR256X:$src)),
10750             (EXTRACT_SUBREG
10751                 (VPABSQZrr
10752                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10753              sub_ymm)>;
10754   def : Pat<(v2i64 (abs VR128X:$src)),
10755             (EXTRACT_SUBREG
10756                 (VPABSQZrr
10757                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10758              sub_xmm)>;
10761 // Use 512bit version to implement 128/256 bit.
10762 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10763                                  AVX512VLVectorVTInfo _, Predicate prd> {
10764   let Predicates = [prd, NoVLX] in {
10765     def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
10766               (EXTRACT_SUBREG
10767                 (!cast<Instruction>(InstrStr # "Zrr")
10768                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10769                                  _.info256.RC:$src1,
10770                                  _.info256.SubRegIdx)),
10771               _.info256.SubRegIdx)>;
10773     def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
10774               (EXTRACT_SUBREG
10775                 (!cast<Instruction>(InstrStr # "Zrr")
10776                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10777                                  _.info128.RC:$src1,
10778                                  _.info128.SubRegIdx)),
10779               _.info128.SubRegIdx)>;
10780   }
10783 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10784                                         SchedWriteVecIMul, HasCDI>;
10786 // FIXME: Is there a better scheduler class for VPCONFLICT?
10787 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10788                                         SchedWriteVecALU, HasCDI>;
10790 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10791 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10792 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10794 //===---------------------------------------------------------------------===//
10795 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10796 //===---------------------------------------------------------------------===//
10798 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10799 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10800                                      SchedWriteVecALU, HasVPOPCNTDQ>;
10802 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10803 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10805 //===---------------------------------------------------------------------===//
10806 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10807 //===---------------------------------------------------------------------===//
10809 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10810                             X86SchedWriteWidths sched> {
10811   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10812                                       avx512vl_f32_info, HasAVX512>, XS;
10815 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10816                                   SchedWriteFShuffle>;
10817 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10818                                   SchedWriteFShuffle>;
10820 //===----------------------------------------------------------------------===//
10821 // AVX-512 - MOVDDUP
10822 //===----------------------------------------------------------------------===//
10824 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
10825                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10826   let ExeDomain = _.ExeDomain in {
10827   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10828                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
10829                    (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
10830                    Sched<[sched]>;
10831   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10832                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10833                  (_.VT (_.BroadcastLdFrag addr:$src))>,
10834                  EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10835                  Sched<[sched.Folded]>;
10836   }
10839 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10840                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10841   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10842                            VTInfo.info512>, EVEX_V512;
10844   let Predicates = [HasAVX512, HasVLX] in {
10845     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10846                                 VTInfo.info256>, EVEX_V256;
10847     defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
10848                                    VTInfo.info128>, EVEX_V128;
10849   }
10852 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10853                           X86SchedWriteWidths sched> {
10854   defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10855                                         avx512vl_f64_info>, XD, VEX_W;
10858 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10860 let Predicates = [HasVLX] in {
10861 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10862           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10863 def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10864           (VMOVDDUPZ128rm addr:$src)>;
10865 def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
10866           (VMOVDDUPZ128rm addr:$src)>;
10868 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10869                    (v2f64 VR128X:$src0)),
10870           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10871                            (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10872 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10873                    immAllZerosV),
10874           (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10876 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10877                    (v2f64 VR128X:$src0)),
10878           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10879 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10880                    immAllZerosV),
10881           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10883 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10884                    (v2f64 VR128X:$src0)),
10885           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10886 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10887                    immAllZerosV),
10888           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10891 //===----------------------------------------------------------------------===//
10892 // AVX-512 - Unpack Instructions
10893 //===----------------------------------------------------------------------===//
10895 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10896                                  SchedWriteFShuffleSizes, 0, 1>;
10897 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10898                                  SchedWriteFShuffleSizes>;
10900 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10901                                        SchedWriteShuffle, HasBWI>;
10902 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10903                                        SchedWriteShuffle, HasBWI>;
10904 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10905                                        SchedWriteShuffle, HasBWI>;
10906 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10907                                        SchedWriteShuffle, HasBWI>;
10909 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10910                                        SchedWriteShuffle, HasAVX512>;
10911 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10912                                        SchedWriteShuffle, HasAVX512>;
10913 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10914                                         SchedWriteShuffle, HasAVX512>;
10915 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10916                                         SchedWriteShuffle, HasAVX512>;
10918 //===----------------------------------------------------------------------===//
10919 // AVX-512 - Extract & Insert Integer Instructions
10920 //===----------------------------------------------------------------------===//
10922 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10923                                                             X86VectorVTInfo _> {
10924   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10925               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10926               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10927               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10928                        addr:$dst)]>,
10929               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10932 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10933   let Predicates = [HasBWI] in {
10934     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10935                   (ins _.RC:$src1, u8imm:$src2),
10936                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10937                   [(set GR32orGR64:$dst,
10938                         (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10939                   EVEX, TAPD, Sched<[WriteVecExtract]>;
10941     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10942   }
10945 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10946   let Predicates = [HasBWI] in {
10947     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10948                   (ins _.RC:$src1, u8imm:$src2),
10949                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10950                   [(set GR32orGR64:$dst,
10951                         (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10952                   EVEX, PD, Sched<[WriteVecExtract]>;
10954     let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10955     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10956                    (ins _.RC:$src1, u8imm:$src2),
10957                    OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10958                    EVEX, TAPD, FoldGenData<NAME#rr>,
10959                    Sched<[WriteVecExtract]>;
10961     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10962   }
10965 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10966                                                             RegisterClass GRC> {
10967   let Predicates = [HasDQI] in {
10968     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10969                   (ins _.RC:$src1, u8imm:$src2),
10970                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10971                   [(set GRC:$dst,
10972                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10973                   EVEX, TAPD, Sched<[WriteVecExtract]>;
10975     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10976                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10977                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10978                 [(store (extractelt (_.VT _.RC:$src1),
10979                                     imm:$src2),addr:$dst)]>,
10980                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10981                 Sched<[WriteVecExtractSt]>;
10982   }
10985 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10986 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10987 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10988 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10990 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10991                                             X86VectorVTInfo _, PatFrag LdFrag> {
10992   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10993       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10994       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10995       [(set _.RC:$dst,
10996           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10997       EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11000 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11001                                             X86VectorVTInfo _, PatFrag LdFrag> {
11002   let Predicates = [HasBWI] in {
11003     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11004         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11005         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11006         [(set _.RC:$dst,
11007             (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
11008         Sched<[WriteVecInsert]>;
11010     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
11011   }
11014 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11015                                          X86VectorVTInfo _, RegisterClass GRC> {
11016   let Predicates = [HasDQI] in {
11017     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11018         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11019         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11020         [(set _.RC:$dst,
11021             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11022         EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11024     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11025                                     _.ScalarLdFrag>, TAPD;
11026   }
11029 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11030                                      extloadi8>, TAPD, VEX_WIG;
11031 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11032                                      extloadi16>, PD, VEX_WIG;
11033 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11034 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11036 //===----------------------------------------------------------------------===//
11037 // VSHUFPS - VSHUFPD Operations
11038 //===----------------------------------------------------------------------===//
11040 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11041                         AVX512VLVectorVTInfo VTInfo_FP>{
11042   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11043                                     SchedWriteFShuffle>,
11044                                     EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11045                                     AVX512AIi8Base, EVEX_4V;
11048 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11049 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11051 //===----------------------------------------------------------------------===//
11052 // AVX-512 - Byte shift Left/Right
11053 //===----------------------------------------------------------------------===//
11055 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
11056 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11057                                Format MRMm, string OpcodeStr,
11058                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11059   def rr : AVX512<opc, MRMr,
11060              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11061              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11062              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11063              Sched<[sched]>;
11064   def rm : AVX512<opc, MRMm,
11065            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11066            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11067            [(set _.RC:$dst,(_.VT (OpNode
11068                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
11069                                  (i8 timm:$src2))))]>,
11070            Sched<[sched.Folded, sched.ReadAfterFold]>;
11073 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11074                                    Format MRMm, string OpcodeStr,
11075                                    X86SchedWriteWidths sched, Predicate prd>{
11076   let Predicates = [prd] in
11077     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11078                                  sched.ZMM, v64i8_info>, EVEX_V512;
11079   let Predicates = [prd, HasVLX] in {
11080     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11081                                     sched.YMM, v32i8x_info>, EVEX_V256;
11082     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11083                                     sched.XMM, v16i8x_info>, EVEX_V128;
11084   }
11086 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11087                                        SchedWriteShuffle, HasBWI>,
11088                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11089 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11090                                        SchedWriteShuffle, HasBWI>,
11091                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11093 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11094                                 string OpcodeStr, X86FoldableSchedWrite sched,
11095                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11096   let isCommutable = 1 in
11097   def rr : AVX512BI<opc, MRMSrcReg,
11098              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11099              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11100              [(set _dst.RC:$dst,(_dst.VT
11101                                 (OpNode (_src.VT _src.RC:$src1),
11102                                         (_src.VT _src.RC:$src2))))]>,
11103              Sched<[sched]>;
11104   def rm : AVX512BI<opc, MRMSrcMem,
11105            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11106            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11107            [(set _dst.RC:$dst,(_dst.VT
11108                               (OpNode (_src.VT _src.RC:$src1),
11109                               (_src.VT (bitconvert
11110                                         (_src.LdFrag addr:$src2))))))]>,
11111            Sched<[sched.Folded, sched.ReadAfterFold]>;
11114 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11115                                     string OpcodeStr, X86SchedWriteWidths sched,
11116                                     Predicate prd> {
11117   let Predicates = [prd] in
11118     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11119                                   v8i64_info, v64i8_info>, EVEX_V512;
11120   let Predicates = [prd, HasVLX] in {
11121     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11122                                      v4i64x_info, v32i8x_info>, EVEX_V256;
11123     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11124                                      v2i64x_info, v16i8x_info>, EVEX_V128;
11125   }
11128 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11129                                         SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11131 // Transforms to swizzle an immediate to enable better matching when
11132 // memory operand isn't in the right place.
11133 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11134   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11135   uint8_t Imm = N->getZExtValue();
11136   // Swap bits 1/4 and 3/6.
11137   uint8_t NewImm = Imm & 0xa5;
11138   if (Imm & 0x02) NewImm |= 0x10;
11139   if (Imm & 0x10) NewImm |= 0x02;
11140   if (Imm & 0x08) NewImm |= 0x40;
11141   if (Imm & 0x40) NewImm |= 0x08;
11142   return getI8Imm(NewImm, SDLoc(N));
11143 }]>;
11144 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11145   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11146   uint8_t Imm = N->getZExtValue();
11147   // Swap bits 2/4 and 3/5.
11148   uint8_t NewImm = Imm & 0xc3;
11149   if (Imm & 0x04) NewImm |= 0x10;
11150   if (Imm & 0x10) NewImm |= 0x04;
11151   if (Imm & 0x08) NewImm |= 0x20;
11152   if (Imm & 0x20) NewImm |= 0x08;
11153   return getI8Imm(NewImm, SDLoc(N));
11154 }]>;
11155 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11156   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11157   uint8_t Imm = N->getZExtValue();
11158   // Swap bits 1/2 and 5/6.
11159   uint8_t NewImm = Imm & 0x99;
11160   if (Imm & 0x02) NewImm |= 0x04;
11161   if (Imm & 0x04) NewImm |= 0x02;
11162   if (Imm & 0x20) NewImm |= 0x40;
11163   if (Imm & 0x40) NewImm |= 0x20;
11164   return getI8Imm(NewImm, SDLoc(N));
11165 }]>;
11166 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11167   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11168   uint8_t Imm = N->getZExtValue();
11169   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11170   uint8_t NewImm = Imm & 0x81;
11171   if (Imm & 0x02) NewImm |= 0x04;
11172   if (Imm & 0x04) NewImm |= 0x10;
11173   if (Imm & 0x08) NewImm |= 0x40;
11174   if (Imm & 0x10) NewImm |= 0x02;
11175   if (Imm & 0x20) NewImm |= 0x08;
11176   if (Imm & 0x40) NewImm |= 0x20;
11177   return getI8Imm(NewImm, SDLoc(N));
11178 }]>;
11179 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11180   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11181   uint8_t Imm = N->getZExtValue();
11182   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11183   uint8_t NewImm = Imm & 0x81;
11184   if (Imm & 0x02) NewImm |= 0x10;
11185   if (Imm & 0x04) NewImm |= 0x02;
11186   if (Imm & 0x08) NewImm |= 0x20;
11187   if (Imm & 0x10) NewImm |= 0x04;
11188   if (Imm & 0x20) NewImm |= 0x40;
11189   if (Imm & 0x40) NewImm |= 0x08;
11190   return getI8Imm(NewImm, SDLoc(N));
11191 }]>;
11193 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11194                           X86FoldableSchedWrite sched, X86VectorVTInfo _,
11195                           string Name>{
11196   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11197   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11198                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11199                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11200                       (OpNode (_.VT _.RC:$src1),
11201                               (_.VT _.RC:$src2),
11202                               (_.VT _.RC:$src3),
11203                               (i8 timm:$src4)), 1, 1>,
11204                       AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11205   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11206                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11207                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11208                     (OpNode (_.VT _.RC:$src1),
11209                             (_.VT _.RC:$src2),
11210                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
11211                             (i8 timm:$src4)), 1, 0>,
11212                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11213                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11214   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11215                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11216                     OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11217                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
11218                     (OpNode (_.VT _.RC:$src1),
11219                             (_.VT _.RC:$src2),
11220                             (_.VT (_.BroadcastLdFrag addr:$src3)),
11221                             (i8 timm:$src4)), 1, 0>, EVEX_B,
11222                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11223                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11224   }// Constraints = "$src1 = $dst"
11226   // Additional patterns for matching passthru operand in other positions.
11227   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11228                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11229                    _.RC:$src1)),
11230             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11231              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11232   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11233                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11234                    _.RC:$src1)),
11235             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11236              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11238   // Additional patterns for matching loads in other positions.
11239   def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11240                           _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11241             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11242                                    addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11243   def : Pat<(_.VT (OpNode _.RC:$src1,
11244                           (bitconvert (_.LdFrag addr:$src3)),
11245                           _.RC:$src2, (i8 timm:$src4))),
11246             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11247                                    addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11249   // Additional patterns for matching zero masking with loads in other
11250   // positions.
11251   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11252                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11253                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11254                    _.ImmAllZerosV)),
11255             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11256              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11257   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11258                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11259                     _.RC:$src2, (i8 timm:$src4)),
11260                    _.ImmAllZerosV)),
11261             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11262              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11264   // Additional patterns for matching masked loads with different
11265   // operand orders.
11266   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11267                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11268                     _.RC:$src2, (i8 timm:$src4)),
11269                    _.RC:$src1)),
11270             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11271              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11272   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11273                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11274                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11275                    _.RC:$src1)),
11276             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11277              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11278   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11279                    (OpNode _.RC:$src2, _.RC:$src1,
11280                     (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11281                    _.RC:$src1)),
11282             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11283              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11284   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11285                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11286                     _.RC:$src1, (i8 timm:$src4)),
11287                    _.RC:$src1)),
11288             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11289              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11290   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11291                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11292                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11293                    _.RC:$src1)),
11294             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11295              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11297   // Additional patterns for matching broadcasts in other positions.
11298   def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
11299                           _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11300             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11301                                    addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11302   def : Pat<(_.VT (OpNode _.RC:$src1,
11303                           (_.BroadcastLdFrag addr:$src3),
11304                           _.RC:$src2, (i8 timm:$src4))),
11305             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11306                                    addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11308   // Additional patterns for matching zero masking with broadcasts in other
11309   // positions.
11310   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11311                    (OpNode (_.BroadcastLdFrag addr:$src3),
11312                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11313                    _.ImmAllZerosV)),
11314             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11315              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11316              (VPTERNLOG321_imm8 timm:$src4))>;
11317   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11318                    (OpNode _.RC:$src1,
11319                     (_.BroadcastLdFrag addr:$src3),
11320                     _.RC:$src2, (i8 timm:$src4)),
11321                    _.ImmAllZerosV)),
11322             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11323              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11324              (VPTERNLOG132_imm8 timm:$src4))>;
11326   // Additional patterns for matching masked broadcasts with different
11327   // operand orders.
11328   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11329                    (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11330                     _.RC:$src2, (i8 timm:$src4)),
11331                    _.RC:$src1)),
11332             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11333              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11334   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11335                    (OpNode (_.BroadcastLdFrag addr:$src3),
11336                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11337                    _.RC:$src1)),
11338             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11339              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11340   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11341                    (OpNode _.RC:$src2, _.RC:$src1,
11342                     (_.BroadcastLdFrag addr:$src3),
11343                     (i8 timm:$src4)), _.RC:$src1)),
11344             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11345              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11346   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11347                    (OpNode _.RC:$src2,
11348                     (_.BroadcastLdFrag addr:$src3),
11349                     _.RC:$src1, (i8 timm:$src4)),
11350                    _.RC:$src1)),
11351             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11352              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11353   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11354                    (OpNode (_.BroadcastLdFrag addr:$src3),
11355                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11356                    _.RC:$src1)),
11357             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11358              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11361 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11362                                  AVX512VLVectorVTInfo _> {
11363   let Predicates = [HasAVX512] in
11364     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11365                                _.info512, NAME>, EVEX_V512;
11366   let Predicates = [HasAVX512, HasVLX] in {
11367     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11368                                _.info128, NAME>, EVEX_V128;
11369     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11370                                _.info256, NAME>, EVEX_V256;
11371   }
11374 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11375                                         avx512vl_i32_info>;
11376 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11377                                         avx512vl_i64_info>, VEX_W;
11379 // Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
11380 let Predicates = [HasVLX] in {
11381   def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11382                                  (i8 timm:$src4))),
11383             (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11384                                timm:$src4)>;
11385   def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11386                                  (loadv16i8 addr:$src3), (i8 timm:$src4))),
11387             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11388                                timm:$src4)>;
11389   def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
11390                                  VR128X:$src1, (i8 timm:$src4))),
11391             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11392                                (VPTERNLOG321_imm8 timm:$src4))>;
11393   def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
11394                                  VR128X:$src2, (i8 timm:$src4))),
11395             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11396                                (VPTERNLOG132_imm8 timm:$src4))>;
11398   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11399                                  (i8 timm:$src4))),
11400             (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11401                                timm:$src4)>;
11402   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11403                                  (loadv8i16 addr:$src3), (i8 timm:$src4))),
11404             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11405                                timm:$src4)>;
11406   def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
11407                                  VR128X:$src1, (i8 timm:$src4))),
11408             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11409                                (VPTERNLOG321_imm8 timm:$src4))>;
11410   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
11411                                  VR128X:$src2, (i8 timm:$src4))),
11412             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11413                                (VPTERNLOG132_imm8 timm:$src4))>;
11415   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11416                                  (i8 timm:$src4))),
11417             (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11418                                timm:$src4)>;
11419   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11420                                  (loadv32i8 addr:$src3), (i8 timm:$src4))),
11421             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11422                                timm:$src4)>;
11423   def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
11424                                  VR256X:$src1, (i8 timm:$src4))),
11425             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11426                                (VPTERNLOG321_imm8 timm:$src4))>;
11427   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
11428                                  VR256X:$src2, (i8 timm:$src4))),
11429             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11430                                (VPTERNLOG132_imm8 timm:$src4))>;
11432   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11433                                   (i8 timm:$src4))),
11434             (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11435                                timm:$src4)>;
11436   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11437                                   (loadv16i16 addr:$src3), (i8 timm:$src4))),
11438             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11439                                timm:$src4)>;
11440   def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
11441                                   VR256X:$src1, (i8 timm:$src4))),
11442             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11443                                (VPTERNLOG321_imm8 timm:$src4))>;
11444   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
11445                                   VR256X:$src2, (i8 timm:$src4))),
11446             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11447                                (VPTERNLOG132_imm8 timm:$src4))>;
11450 let Predicates = [HasAVX512] in {
11451   def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11452                                  (i8 timm:$src4))),
11453             (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11454                             timm:$src4)>;
11455   def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11456                                  (loadv64i8 addr:$src3), (i8 timm:$src4))),
11457             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11458                             timm:$src4)>;
11459   def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
11460                                   VR512:$src1, (i8 timm:$src4))),
11461             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11462                             (VPTERNLOG321_imm8 timm:$src4))>;
11463   def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
11464                                  VR512:$src2, (i8 timm:$src4))),
11465             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11466                             (VPTERNLOG132_imm8 timm:$src4))>;
11468   def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11469                                   (i8 timm:$src4))),
11470             (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11471                             timm:$src4)>;
11472   def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11473                                   (loadv32i16 addr:$src3), (i8 timm:$src4))),
11474             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11475                             timm:$src4)>;
11476   def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
11477                                   VR512:$src1, (i8 timm:$src4))),
11478             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11479                             (VPTERNLOG321_imm8 timm:$src4))>;
11480   def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
11481                                  VR512:$src2, (i8 timm:$src4))),
11482             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11483                             (VPTERNLOG132_imm8 timm:$src4))>;
11486 // Patterns to implement vnot using vpternlog instead of creating all ones
11487 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11488 // so that the result is only dependent on src0. But we use the same source
11489 // for all operands to prevent a false dependency.
11490 // TODO: We should maybe have a more generalized algorithm for folding to
11491 // vpternlog.
11492 let Predicates = [HasAVX512] in {
11493   def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11494             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11495   def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11496             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11497   def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11498             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11499   def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11500             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11503 let Predicates = [HasAVX512, NoVLX] in {
11504   def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11505             (EXTRACT_SUBREG
11506              (VPTERNLOGQZrri
11507               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11508               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11509               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11510               (i8 15)), sub_xmm)>;
11511   def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11512             (EXTRACT_SUBREG
11513              (VPTERNLOGQZrri
11514               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11515               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11516               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11517               (i8 15)), sub_xmm)>;
11518   def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11519             (EXTRACT_SUBREG
11520              (VPTERNLOGQZrri
11521               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11522               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11523               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11524               (i8 15)), sub_xmm)>;
11525   def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11526             (EXTRACT_SUBREG
11527              (VPTERNLOGQZrri
11528               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11529               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11530               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11531               (i8 15)), sub_xmm)>;
11533   def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11534             (EXTRACT_SUBREG
11535              (VPTERNLOGQZrri
11536               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11537               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11538               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11539               (i8 15)), sub_ymm)>;
11540   def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11541             (EXTRACT_SUBREG
11542              (VPTERNLOGQZrri
11543               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11544               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11545               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11546               (i8 15)), sub_ymm)>;
11547   def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11548             (EXTRACT_SUBREG
11549              (VPTERNLOGQZrri
11550               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11551               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11552               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11553               (i8 15)), sub_ymm)>;
11554   def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11555             (EXTRACT_SUBREG
11556              (VPTERNLOGQZrri
11557               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11558               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11559               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11560               (i8 15)), sub_ymm)>;
11563 let Predicates = [HasVLX] in {
11564   def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11565             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11566   def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11567             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11568   def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11569             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11570   def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11571             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11573   def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11574             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11575   def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11576             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11577   def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11578             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11579   def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11580             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11583 //===----------------------------------------------------------------------===//
11584 // AVX-512 - FixupImm
11585 //===----------------------------------------------------------------------===//
11587 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11588                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11589                                   X86VectorVTInfo TblVT>{
11590   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11591     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11592                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11593                          OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11594                         (X86VFixupimm (_.VT _.RC:$src1),
11595                                       (_.VT _.RC:$src2),
11596                                       (TblVT.VT _.RC:$src3),
11597                                       (i32 timm:$src4))>, Sched<[sched]>;
11598     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11599                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11600                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11601                       (X86VFixupimm (_.VT _.RC:$src1),
11602                                     (_.VT _.RC:$src2),
11603                                     (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11604                                     (i32 timm:$src4))>,
11605                       Sched<[sched.Folded, sched.ReadAfterFold]>;
11606     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11607                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11608                     OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11609                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
11610                       (X86VFixupimm (_.VT _.RC:$src1),
11611                                     (_.VT _.RC:$src2),
11612                                     (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11613                                     (i32 timm:$src4))>,
11614                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11615   } // Constraints = "$src1 = $dst"
11618 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11619                                       X86FoldableSchedWrite sched,
11620                                       X86VectorVTInfo _, X86VectorVTInfo TblVT>
11621   : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11622 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11623   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11624                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11625                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11626                       "$src2, $src3, {sae}, $src4",
11627                       (X86VFixupimmSAE (_.VT _.RC:$src1),
11628                                        (_.VT _.RC:$src2),
11629                                        (TblVT.VT _.RC:$src3),
11630                                        (i32 timm:$src4))>,
11631                       EVEX_B, Sched<[sched]>;
11632   }
11635 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11636                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11637                                   X86VectorVTInfo _src3VT> {
11638   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11639       ExeDomain = _.ExeDomain in {
11640     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11641                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11642                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11643                       (X86VFixupimms (_.VT _.RC:$src1),
11644                                      (_.VT _.RC:$src2),
11645                                      (_src3VT.VT _src3VT.RC:$src3),
11646                                      (i32 timm:$src4))>, Sched<[sched]>;
11647     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11648                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11649                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11650                       "$src2, $src3, {sae}, $src4",
11651                       (X86VFixupimmSAEs (_.VT _.RC:$src1),
11652                                         (_.VT _.RC:$src2),
11653                                         (_src3VT.VT _src3VT.RC:$src3),
11654                                         (i32 timm:$src4))>,
11655                       EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11656     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11657                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11658                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11659                      (X86VFixupimms (_.VT _.RC:$src1),
11660                                     (_.VT _.RC:$src2),
11661                                     (_src3VT.VT (scalar_to_vector
11662                                               (_src3VT.ScalarLdFrag addr:$src3))),
11663                                     (i32 timm:$src4))>,
11664                      Sched<[sched.Folded, sched.ReadAfterFold]>;
11665   }
11668 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11669                                       AVX512VLVectorVTInfo _Vec,
11670                                       AVX512VLVectorVTInfo _Tbl> {
11671   let Predicates = [HasAVX512] in
11672     defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11673                                 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11674                                 EVEX_4V, EVEX_V512;
11675   let Predicates = [HasAVX512, HasVLX] in {
11676     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11677                             _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11678                             EVEX_4V, EVEX_V128;
11679     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11680                             _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11681                             EVEX_4V, EVEX_V256;
11682   }
11685 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11686                                            SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11687                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11688 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11689                                            SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11690                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11691 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11692                          avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11693 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11694                          avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11696 // Patterns used to select SSE scalar fp arithmetic instructions from
11697 // either:
11699 // (1) a scalar fp operation followed by a blend
11701 // The effect is that the backend no longer emits unnecessary vector
11702 // insert instructions immediately after SSE scalar fp instructions
11703 // like addss or mulss.
11705 // For example, given the following code:
11706 //   __m128 foo(__m128 A, __m128 B) {
11707 //     A[0] += B[0];
11708 //     return A;
11709 //   }
11711 // Previously we generated:
11712 //   addss %xmm0, %xmm1
11713 //   movss %xmm1, %xmm0
11715 // We now generate:
11716 //   addss %xmm1, %xmm0
11718 // (2) a vector packed single/double fp operation followed by a vector insert
11720 // The effect is that the backend converts the packed fp instruction
11721 // followed by a vector insert into a single SSE scalar fp instruction.
11723 // For example, given the following code:
11724 //   __m128 foo(__m128 A, __m128 B) {
11725 //     __m128 C = A + B;
11726 //     return (__m128) {c[0], a[1], a[2], a[3]};
11727 //   }
11729 // Previously we generated:
11730 //   addps %xmm0, %xmm1
11731 //   movss %xmm1, %xmm0
11733 // We now generate:
11734 //   addss %xmm1, %xmm0
11736 // TODO: Some canonicalization in lowering would simplify the number of
11737 // patterns we have to try to match.
11738 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11739                                            X86VectorVTInfo _, PatLeaf ZeroFP> {
11740   let Predicates = [HasAVX512] in {
11741     // extracted scalar math op with insert via movss
11742     def : Pat<(MoveNode
11743                (_.VT VR128X:$dst),
11744                (_.VT (scalar_to_vector
11745                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11746                           _.FRC:$src)))),
11747               (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11748                (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11749     def : Pat<(MoveNode
11750                (_.VT VR128X:$dst),
11751                (_.VT (scalar_to_vector
11752                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11753                           (_.ScalarLdFrag addr:$src))))),
11754               (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11756     // extracted masked scalar math op with insert via movss
11757     def : Pat<(MoveNode (_.VT VR128X:$src1),
11758                (scalar_to_vector
11759                 (X86selects VK1WM:$mask,
11760                             (Op (_.EltVT
11761                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11762                                 _.FRC:$src2),
11763                             _.FRC:$src0))),
11764               (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11765                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11766                VK1WM:$mask, _.VT:$src1,
11767                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11768     def : Pat<(MoveNode (_.VT VR128X:$src1),
11769                (scalar_to_vector
11770                 (X86selects VK1WM:$mask,
11771                             (Op (_.EltVT
11772                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11773                                 (_.ScalarLdFrag addr:$src2)),
11774                             _.FRC:$src0))),
11775               (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11776                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11777                VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11779     // extracted masked scalar math op with insert via movss
11780     def : Pat<(MoveNode (_.VT VR128X:$src1),
11781                (scalar_to_vector
11782                 (X86selects VK1WM:$mask,
11783                             (Op (_.EltVT
11784                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11785                                 _.FRC:$src2), (_.EltVT ZeroFP)))),
11786       (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 
11787           VK1WM:$mask, _.VT:$src1,
11788           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11789     def : Pat<(MoveNode (_.VT VR128X:$src1),
11790                (scalar_to_vector
11791                 (X86selects VK1WM:$mask,
11792                             (Op (_.EltVT
11793                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11794                                 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11795       (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11796   }
11799 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11800 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11801 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11802 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11804 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11805 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11806 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11807 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11809 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11810                                              SDNode Move, X86VectorVTInfo _> {
11811   let Predicates = [HasAVX512] in {
11812     def : Pat<(_.VT (Move _.VT:$dst,
11813                      (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11814               (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11815   }
11818 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11819 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11821 //===----------------------------------------------------------------------===//
11822 // AES instructions
11823 //===----------------------------------------------------------------------===//
11825 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11826   let Predicates = [HasVLX, HasVAES] in {
11827     defm Z128 : AESI_binop_rm_int<Op, OpStr,
11828                                   !cast<Intrinsic>(IntPrefix),
11829                                   loadv2i64, 0, VR128X, i128mem>,
11830                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11831     defm Z256 : AESI_binop_rm_int<Op, OpStr,
11832                                   !cast<Intrinsic>(IntPrefix##"_256"),
11833                                   loadv4i64, 0, VR256X, i256mem>,
11834                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11835     }
11836     let Predicates = [HasAVX512, HasVAES] in
11837     defm Z    : AESI_binop_rm_int<Op, OpStr,
11838                                   !cast<Intrinsic>(IntPrefix##"_512"),
11839                                   loadv8i64, 0, VR512, i512mem>,
11840                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11843 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11844 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11845 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11846 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11848 //===----------------------------------------------------------------------===//
11849 // PCLMUL instructions - Carry less multiplication
11850 //===----------------------------------------------------------------------===//
11852 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11853 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11854                               EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11856 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11857 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11858                               EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11860 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11861                                 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11862                                 EVEX_CD8<64, CD8VF>, VEX_WIG;
11865 // Aliases
11866 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11867 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11868 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11870 //===----------------------------------------------------------------------===//
11871 // VBMI2
11872 //===----------------------------------------------------------------------===//
11874 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11875                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11876   let Constraints = "$src1 = $dst",
11877       ExeDomain   = VTI.ExeDomain in {
11878     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11879                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11880                 "$src3, $src2", "$src2, $src3",
11881                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11882                 AVX512FMA3Base, Sched<[sched]>;
11883     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11884                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11885                 "$src3, $src2", "$src2, $src3",
11886                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11887                         (VTI.VT (VTI.LdFrag addr:$src3))))>,
11888                 AVX512FMA3Base,
11889                 Sched<[sched.Folded, sched.ReadAfterFold]>;
11890   }
11893 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11894                                X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11895          : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11896   let Constraints = "$src1 = $dst",
11897       ExeDomain   = VTI.ExeDomain in
11898   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11899               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11900               "${src3}"##VTI.BroadcastStr##", $src2",
11901               "$src2, ${src3}"##VTI.BroadcastStr,
11902               (OpNode VTI.RC:$src1, VTI.RC:$src2,
11903                (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11904               AVX512FMA3Base, EVEX_B,
11905               Sched<[sched.Folded, sched.ReadAfterFold]>;
11908 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11909                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11910   let Predicates = [HasVBMI2] in
11911   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11912                                    EVEX_V512;
11913   let Predicates = [HasVBMI2, HasVLX] in {
11914     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11915                                    EVEX_V256;
11916     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11917                                    EVEX_V128;
11918   }
11921 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11922                                       X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11923   let Predicates = [HasVBMI2] in
11924   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11925                                     EVEX_V512;
11926   let Predicates = [HasVBMI2, HasVLX] in {
11927     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11928                                     EVEX_V256;
11929     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11930                                     EVEX_V128;
11931   }
11933 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11934                            SDNode OpNode, X86SchedWriteWidths sched> {
11935   defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11936              avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11937   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11938              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11939   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11940              avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11943 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11944                            SDNode OpNode, X86SchedWriteWidths sched> {
11945   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11946              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11947              VEX_W, EVEX_CD8<16, CD8VF>;
11948   defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11949              OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11950   defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11951              sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11954 // Concat & Shift
11955 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11956 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11957 defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11958 defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11960 // Compress
11961 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11962                                          avx512vl_i8_info, HasVBMI2>, EVEX,
11963                                          NotMemoryFoldable;
11964 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11965                                           avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11966                                           NotMemoryFoldable;
11967 // Expand
11968 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11969                                       avx512vl_i8_info, HasVBMI2>, EVEX;
11970 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11971                                       avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11973 //===----------------------------------------------------------------------===//
11974 // VNNI
11975 //===----------------------------------------------------------------------===//
11977 let Constraints = "$src1 = $dst" in
11978 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11979                     X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11980                     bit IsCommutable> {
11981   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11982                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11983                                    "$src3, $src2", "$src2, $src3",
11984                                    (VTI.VT (OpNode VTI.RC:$src1,
11985                                             VTI.RC:$src2, VTI.RC:$src3)),
11986                                    IsCommutable, IsCommutable>,
11987                                    EVEX_4V, T8PD, Sched<[sched]>;
11988   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11989                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11990                                    "$src3, $src2", "$src2, $src3",
11991                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11992                                             (VTI.VT (VTI.LdFrag addr:$src3))))>,
11993                                    EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11994                                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11995   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11996                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11997                                    OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11998                                    "$src2, ${src3}"##VTI.BroadcastStr,
11999                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
12000                                     (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12001                                    EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12002                                    T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12005 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12006                        X86SchedWriteWidths sched, bit IsCommutable> {
12007   let Predicates = [HasVNNI] in
12008   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12009                            IsCommutable>, EVEX_V512;
12010   let Predicates = [HasVNNI, HasVLX] in {
12011     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12012                            IsCommutable>, EVEX_V256;
12013     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12014                            IsCommutable>, EVEX_V128;
12015   }
12018 // FIXME: Is there a better scheduler class for VPDP?
12019 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12020 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12021 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12022 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12024 def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
12025                              (X86vpmaddwd node:$lhs, node:$rhs), [{
12026   return N->hasOneUse();
12027 }]>;
12029 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12030 let Predicates = [HasVNNI] in {
12031   def : Pat<(v16i32 (add VR512:$src1,
12032                          (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12033             (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12034   def : Pat<(v16i32 (add VR512:$src1,
12035                          (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12036             (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12038 let Predicates = [HasVNNI,HasVLX] in {
12039   def : Pat<(v8i32 (add VR256X:$src1,
12040                         (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12041             (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12042   def : Pat<(v8i32 (add VR256X:$src1,
12043                         (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12044             (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12045   def : Pat<(v4i32 (add VR128X:$src1,
12046                         (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12047             (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12048   def : Pat<(v4i32 (add VR128X:$src1,
12049                         (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12050             (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12053 //===----------------------------------------------------------------------===//
12054 // Bit Algorithms
12055 //===----------------------------------------------------------------------===//
12057 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12058 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12059                                    avx512vl_i8_info, HasBITALG>;
12060 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12061                                    avx512vl_i16_info, HasBITALG>, VEX_W;
12063 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12064 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12066 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12067                                  (X86Vpshufbitqmb node:$src1, node:$src2), [{
12068   return N->hasOneUse();
12069 }]>;
12071 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12072   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12073                                 (ins VTI.RC:$src1, VTI.RC:$src2),
12074                                 "vpshufbitqmb",
12075                                 "$src2, $src1", "$src1, $src2",
12076                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12077                                 (VTI.VT VTI.RC:$src2)),
12078                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12079                                 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12080                                 Sched<[sched]>;
12081   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12082                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12083                                 "vpshufbitqmb",
12084                                 "$src2, $src1", "$src1, $src2",
12085                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12086                                 (VTI.VT (VTI.LdFrag addr:$src2))),
12087                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12088                                 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12089                                 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12090                                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12093 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12094   let Predicates = [HasBITALG] in
12095   defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12096   let Predicates = [HasBITALG, HasVLX] in {
12097     defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12098     defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12099   }
12102 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12103 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12105 //===----------------------------------------------------------------------===//
12106 // GFNI
12107 //===----------------------------------------------------------------------===//
12109 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12110                                    X86SchedWriteWidths sched> {
12111   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12112   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12113                                 EVEX_V512;
12114   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12115     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12116                                 EVEX_V256;
12117     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12118                                 EVEX_V128;
12119   }
12122 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12123                                           SchedWriteVecALU>,
12124                                           EVEX_CD8<8, CD8VF>, T8PD;
12126 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12127                                       X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12128                                       X86VectorVTInfo BcstVTI>
12129            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12130   let ExeDomain = VTI.ExeDomain in
12131   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12132                 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12133                 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
12134                 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
12135                 (OpNode (VTI.VT VTI.RC:$src1),
12136                  (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12137                  (i8 timm:$src3))>, EVEX_B,
12138                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12141 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12142                                      X86SchedWriteWidths sched> {
12143   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12144   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12145                                            v64i8_info, v8i64_info>, EVEX_V512;
12146   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12147     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12148                                            v32i8x_info, v4i64x_info>, EVEX_V256;
12149     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12150                                            v16i8x_info, v2i64x_info>, EVEX_V128;
12151   }
12154 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12155                          X86GF2P8affineinvqb, SchedWriteVecIMul>,
12156                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12157 defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12158                          X86GF2P8affineqb, SchedWriteVecIMul>,
12159                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12162 //===----------------------------------------------------------------------===//
12163 // AVX5124FMAPS
12164 //===----------------------------------------------------------------------===//
12166 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12167     Constraints = "$src1 = $dst" in {
12168 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12169                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12170                     "v4fmaddps", "$src3, $src2", "$src2, $src3",
12171                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12172                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12174 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12175                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12176                      "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12177                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12178                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12180 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12181                     (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12182                     "v4fmaddss", "$src3, $src2", "$src2, $src3",
12183                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12184                     Sched<[SchedWriteFMA.Scl.Folded]>;
12186 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12187                      (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12188                      "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12189                      []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12190                      Sched<[SchedWriteFMA.Scl.Folded]>;
12193 //===----------------------------------------------------------------------===//
12194 // AVX5124VNNIW
12195 //===----------------------------------------------------------------------===//
12197 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12198     Constraints = "$src1 = $dst" in {
12199 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12200                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12201                      "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12202                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12203                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12205 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12206                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12207                      "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12208                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12209                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12212 let hasSideEffects = 0 in {
12213   let mayStore = 1 in
12214   def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12215   let mayLoad = 1 in
12216   def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12219 //===----------------------------------------------------------------------===//
12220 // VP2INTERSECT
12221 //===----------------------------------------------------------------------===//
12223 multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
12224   def rr : I<0x68, MRMSrcReg,
12225                   (outs _.KRPC:$dst),
12226                   (ins _.RC:$src1, _.RC:$src2),
12227                   !strconcat("vp2intersect", _.Suffix,
12228                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12229                   [(set _.KRPC:$dst, (X86vp2intersect
12230                             _.RC:$src1, (_.VT _.RC:$src2)))]>,
12231                   EVEX_4V, T8XD;
12233   def rm : I<0x68, MRMSrcMem,
12234                   (outs _.KRPC:$dst),
12235                   (ins  _.RC:$src1, _.MemOp:$src2),
12236                   !strconcat("vp2intersect", _.Suffix,
12237                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12238                   [(set _.KRPC:$dst, (X86vp2intersect
12239                             _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12240                   EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
12242   def rmb : I<0x68, MRMSrcMem,
12243                   (outs _.KRPC:$dst),
12244                   (ins _.RC:$src1, _.ScalarMemOp:$src2),
12245                   !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12246                              ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12247                   [(set _.KRPC:$dst, (X86vp2intersect
12248                              _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12249                   EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
12252 multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
12253   let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12254     defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
12256   let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12257     defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
12258     defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
12259   }
12262 defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
12263 defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
12265 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12266                              X86SchedWriteWidths sched,
12267                              AVX512VLVectorVTInfo _SrcVTInfo,
12268                              AVX512VLVectorVTInfo _DstVTInfo,
12269                              SDNode OpNode, Predicate prd,
12270                              bit IsCommutable = 0> {
12271   let Predicates = [prd] in
12272     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12273                                    _SrcVTInfo.info512, _DstVTInfo.info512,
12274                                    _SrcVTInfo.info512, IsCommutable>,
12275                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
12276   let Predicates = [HasVLX, prd] in {
12277     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12278                                       _SrcVTInfo.info256, _DstVTInfo.info256,
12279                                       _SrcVTInfo.info256, IsCommutable>,
12280                                      EVEX_V256, EVEX_CD8<32, CD8VF>;
12281     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12282                                       _SrcVTInfo.info128, _DstVTInfo.info128,
12283                                       _SrcVTInfo.info128, IsCommutable>,
12284                                       EVEX_V128, EVEX_CD8<32, CD8VF>;
12285   }
12288 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12289                                         SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
12290                                         avx512vl_f32_info, avx512vl_i16_info,
12291                                         X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12293 // Truncate Float to BFloat16
12294 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12295                              X86SchedWriteWidths sched> {
12296   let Predicates = [HasBF16] in {
12297     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12298                             X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12299   }
12300   let Predicates = [HasBF16, HasVLX] in {
12301     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12302                                null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12303                                VK4WM>, EVEX_V128;
12304     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12305                                X86cvtneps2bf16,
12306                                sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12308     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12309                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12310                     VR128X:$src), 0>;
12311     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12312                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12313                     f128mem:$src), 0, "intel">;
12314     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12315                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12316                     VR256X:$src), 0>;
12317     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12318                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12319                     f256mem:$src), 0, "intel">;
12320   }
12323 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12324                                        SchedWriteCvtPD2PS>, T8XS,
12325                                        EVEX_CD8<32, CD8VF>;
12327 let Predicates = [HasBF16, HasVLX] in {
12328   // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12329   // patterns have been disabled with null_frag.
12330   def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12331             (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12332   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12333                               VK4WM:$mask),
12334             (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12335   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12336                               VK4WM:$mask),
12337             (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12339   def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12340             (VCVTNEPS2BF16Z128rm addr:$src)>;
12341   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12342                               VK4WM:$mask),
12343             (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12344   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12345                               VK4WM:$mask),
12346             (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12348   def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12349                                      (X86VBroadcastld32 addr:$src)))),
12350             (VCVTNEPS2BF16Z128rmb addr:$src)>;
12351   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12352                               (v8i16 VR128X:$src0), VK4WM:$mask),
12353             (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12354   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12355                               v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12356             (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12359 let Constraints = "$src1 = $dst" in {
12360 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12361                               X86VectorVTInfo _, X86VectorVTInfo src_v> {
12362   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12363                            (ins _.RC:$src2, _.RC:$src3),
12364                            OpcodeStr, "$src3, $src2", "$src2, $src3",
12365                            (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
12366                            EVEX_4V;
12368   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12369                                (ins _.RC:$src2, _.MemOp:$src3),
12370                                OpcodeStr, "$src3, $src2", "$src2, $src3",
12371                                (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12372                                (src_v.VT (bitconvert
12373                                (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
12375   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12376                   (ins _.RC:$src2, _.ScalarMemOp:$src3),
12377                   OpcodeStr,
12378                   !strconcat("${src3}", _.BroadcastStr,", $src2"),
12379                   !strconcat("$src2, ${src3}", _.BroadcastStr),
12380                   (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12381                   (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12382                   EVEX_B, EVEX_4V;
12385 } // Constraints = "$src1 = $dst"
12387 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12388                                  AVX512VLVectorVTInfo _,
12389                                  AVX512VLVectorVTInfo src_v, Predicate prd> {
12390   let Predicates = [prd] in {
12391     defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
12392                                    src_v.info512>, EVEX_V512;
12393   }
12394   let Predicates = [HasVLX, prd] in {
12395     defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
12396                                    src_v.info256>, EVEX_V256;
12397     defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
12398                                    src_v.info128>, EVEX_V128;
12399   }
12402 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
12403                                        avx512vl_f32_info, avx512vl_i32_info,
12404                                        HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;