[Alignment][NFC] Use Align with TargetLowering::setPrefLoopAlignment
[llvm-complete.git] / lib / Target / X86 / X86InstrAVX512.td
blob31a46e68a86b85087fbf36671963520abab402f8
1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT).  These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                       string suffix = ""> {
22   RegisterClass RC = rc;
23   ValueType EltVT = eltvt;
24   int NumElts = numelts;
26   // Corresponding mask register class.
27   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29   // Corresponding mask register pair class.
30   RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                               !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33   // Corresponding write-mask register class.
34   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
36   // The mask VT.
37   ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39   // Suffix used in the instruction mnemonic.
40   string Suffix = suffix;
42   // VTName is a string name for vector VT. For vector types it will be
43   // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44   // It is a little bit complex for scalar types, where NumElts = 1.
45   // In this case we build v4f32 or v2f64
46   string VTName = "v" # !if (!eq (NumElts, 1),
47                         !if (!eq (EltVT.Size, 32), 4,
48                         !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
50   // The vector VT.
51   ValueType VT = !cast<ValueType>(VTName);
53   string EltTypeName = !cast<string>(EltVT);
54   // Size of the element type in bits, e.g. 32 for v16i32.
55   string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56   int EltSize = EltVT.Size;
58   // "i" for integer types and "f" for floating-point types
59   string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61   // Size of RC in bits, e.g. 512 for VR512.
62   int Size = VT.Size;
64   // The corresponding memory operand, e.g. i512mem for VR512.
65   X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66   X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67   // FP scalar memory operand for intrinsics - ssmem/sdmem.
68   Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69                            !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
71   // Load patterns
72   PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
74   PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
76   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
78   ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
79                                           !cast<ComplexPattern>("sse_load_f32"),
80                                     !if (!eq (EltTypeName, "f64"),
81                                           !cast<ComplexPattern>("sse_load_f64"),
82                                     ?));
84   // The string to specify embedded broadcast in assembly.
85   string BroadcastStr = "{1to" # NumElts # "}";
87   // 8-bit compressed displacement tuple/subvector format.  This is only
88   // defined for NumElts <= 8.
89   CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
90                                !cast<CD8VForm>("CD8VT" # NumElts), ?);
92   SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
93                           !if (!eq (Size, 256), sub_ymm, ?));
95   Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
96                      !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
97                      SSEPackedInt));
99   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
101   dag ImmAllZerosV = (VT immAllZerosV);
103   string ZSuffix = !if (!eq (Size, 128), "Z128",
104                    !if (!eq (Size, 256), "Z256", "Z"));
107 def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
108 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
109 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
110 def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
111 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
112 def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
114 // "x" in v32i8x_info means RC = VR256X
115 def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
116 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
117 def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
118 def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
119 def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
120 def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
122 def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
123 def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
124 def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
125 def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
126 def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
127 def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
129 // We map scalar types to the smallest (128-bit) vector type
130 // with the appropriate element type. This allows to use the same masking logic.
131 def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
132 def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
133 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
134 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
136 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
137                            X86VectorVTInfo i128> {
138   X86VectorVTInfo info512 = i512;
139   X86VectorVTInfo info256 = i256;
140   X86VectorVTInfo info128 = i128;
143 def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
144                                              v16i8x_info>;
145 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
146                                              v8i16x_info>;
147 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
148                                              v4i32x_info>;
149 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
150                                              v2i64x_info>;
151 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
152                                              v4f32x_info>;
153 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
154                                              v2f64x_info>;
156 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
157                        ValueType _vt> {
158   RegisterClass KRC = _krc;
159   RegisterClass KRCWM = _krcwm;
160   ValueType KVT = _vt;
163 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
164 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
165 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
166 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
167 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
168 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
169 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171 // This multiclass generates the masking variants from the non-masking
172 // variant.  It only provides the assembly pieces for the masking variants.
173 // It assumes custom ISel patterns for masking which can be provided as
174 // template arguments.
175 multiclass AVX512_maskable_custom<bits<8> O, Format F,
176                                   dag Outs,
177                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
178                                   string OpcodeStr,
179                                   string AttSrcAsm, string IntelSrcAsm,
180                                   list<dag> Pattern,
181                                   list<dag> MaskingPattern,
182                                   list<dag> ZeroMaskingPattern,
183                                   string MaskingConstraint = "",
184                                   bit IsCommutable = 0,
185                                   bit IsKCommutable = 0,
186                                   bit IsKZCommutable = IsCommutable> {
187   let isCommutable = IsCommutable in
188     def NAME: AVX512<O, F, Outs, Ins,
189                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
190                                      "$dst, "#IntelSrcAsm#"}",
191                        Pattern>;
193   // Prefer over VMOV*rrk Pat<>
194   let isCommutable = IsKCommutable in
195     def NAME#k: AVX512<O, F, Outs, MaskingIns,
196                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
197                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
198                        MaskingPattern>,
199               EVEX_K {
200       // In case of the 3src subclass this is overridden with a let.
201       string Constraints = MaskingConstraint;
202     }
204   // Zero mask does not add any restrictions to commute operands transformation.
205   // So, it is Ok to use IsCommutable instead of IsKCommutable.
206   let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
207     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
208                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
209                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
210                        ZeroMaskingPattern>,
211               EVEX_KZ;
215 // Common base class of AVX512_maskable and AVX512_maskable_3src.
216 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
217                                   dag Outs,
218                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
219                                   string OpcodeStr,
220                                   string AttSrcAsm, string IntelSrcAsm,
221                                   dag RHS, dag MaskingRHS,
222                                   SDNode Select = vselect,
223                                   string MaskingConstraint = "",
224                                   bit IsCommutable = 0,
225                                   bit IsKCommutable = 0,
226                                   bit IsKZCommutable = IsCommutable> :
227   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
228                          AttSrcAsm, IntelSrcAsm,
229                          [(set _.RC:$dst, RHS)],
230                          [(set _.RC:$dst, MaskingRHS)],
231                          [(set _.RC:$dst,
232                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
233                          MaskingConstraint, IsCommutable,
234                          IsKCommutable, IsKZCommutable>;
236 // This multiclass generates the unconditional/non-masking, the masking and
237 // the zero-masking variant of the vector instruction.  In the masking case, the
238 // perserved vector elements come from a new dummy input operand tied to $dst.
239 // This version uses a separate dag for non-masking and masking.
240 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
241                            dag Outs, dag Ins, string OpcodeStr,
242                            string AttSrcAsm, string IntelSrcAsm,
243                            dag RHS, dag MaskRHS,
244                            bit IsCommutable = 0, bit IsKCommutable = 0,
245                            SDNode Select = vselect> :
246    AVX512_maskable_custom<O, F, Outs, Ins,
247                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
248                           !con((ins _.KRCWM:$mask), Ins),
249                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
250                           [(set _.RC:$dst, RHS)],
251                           [(set _.RC:$dst,
252                               (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
253                           [(set _.RC:$dst,
254                               (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
255                           "$src0 = $dst", IsCommutable, IsKCommutable>;
257 // This multiclass generates the unconditional/non-masking, the masking and
258 // the zero-masking variant of the vector instruction.  In the masking case, the
259 // perserved vector elements come from a new dummy input operand tied to $dst.
260 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
261                            dag Outs, dag Ins, string OpcodeStr,
262                            string AttSrcAsm, string IntelSrcAsm,
263                            dag RHS,
264                            bit IsCommutable = 0, bit IsKCommutable = 0,
265                            bit IsKZCommutable = IsCommutable,
266                            SDNode Select = vselect> :
267    AVX512_maskable_common<O, F, _, Outs, Ins,
268                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
269                           !con((ins _.KRCWM:$mask), Ins),
270                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
271                           (Select _.KRCWM:$mask, RHS, _.RC:$src0),
272                           Select, "$src0 = $dst", IsCommutable, IsKCommutable,
273                           IsKZCommutable>;
275 // This multiclass generates the unconditional/non-masking, the masking and
276 // the zero-masking variant of the scalar instruction.
277 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
278                            dag Outs, dag Ins, string OpcodeStr,
279                            string AttSrcAsm, string IntelSrcAsm,
280                            dag RHS> :
281    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
282                    RHS, 0, 0, 0, X86selects>;
284 // Similar to AVX512_maskable but in this case one of the source operands
285 // ($src1) is already tied to $dst so we just use that for the preserved
286 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
287 // $src1.
288 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
289                                 dag Outs, dag NonTiedIns, string OpcodeStr,
290                                 string AttSrcAsm, string IntelSrcAsm,
291                                 dag RHS,
292                                 bit IsCommutable = 0,
293                                 bit IsKCommutable = 0,
294                                 SDNode Select = vselect,
295                                 bit MaskOnly = 0> :
296    AVX512_maskable_common<O, F, _, Outs,
297                           !con((ins _.RC:$src1), NonTiedIns),
298                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
299                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
301                           !if(MaskOnly, (null_frag), RHS),
302                           (Select _.KRCWM:$mask, RHS, _.RC:$src1),
303                           Select, "", IsCommutable, IsKCommutable>;
305 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
306 // operand differs from the output VT. This requires a bitconvert on
307 // the preserved vector going into the vselect.
308 // NOTE: The unmasked pattern is disabled.
309 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
310                                      X86VectorVTInfo InVT,
311                                      dag Outs, dag NonTiedIns, string OpcodeStr,
312                                      string AttSrcAsm, string IntelSrcAsm,
313                                      dag RHS, bit IsCommutable = 0> :
314    AVX512_maskable_common<O, F, OutVT, Outs,
315                           !con((ins InVT.RC:$src1), NonTiedIns),
316                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
317                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318                           OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
319                           (vselect InVT.KRCWM:$mask, RHS,
320                            (bitconvert InVT.RC:$src1)),
321                            vselect, "", IsCommutable>;
323 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
324                                      dag Outs, dag NonTiedIns, string OpcodeStr,
325                                      string AttSrcAsm, string IntelSrcAsm,
326                                      dag RHS,
327                                      bit IsCommutable = 0,
328                                      bit IsKCommutable = 0,
329                                      bit MaskOnly = 0> :
330    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
331                         IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
332                         X86selects, MaskOnly>;
334 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
335                                   dag Outs, dag Ins,
336                                   string OpcodeStr,
337                                   string AttSrcAsm, string IntelSrcAsm,
338                                   list<dag> Pattern> :
339    AVX512_maskable_custom<O, F, Outs, Ins,
340                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
341                           !con((ins _.KRCWM:$mask), Ins),
342                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
343                           "$src0 = $dst">;
345 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
346                                        dag Outs, dag NonTiedIns,
347                                        string OpcodeStr,
348                                        string AttSrcAsm, string IntelSrcAsm,
349                                        list<dag> Pattern> :
350    AVX512_maskable_custom<O, F, Outs,
351                           !con((ins _.RC:$src1), NonTiedIns),
352                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
353                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
355                           "">;
357 // Instruction with mask that puts result in mask register,
358 // like "compare" and "vptest"
359 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
360                                   dag Outs,
361                                   dag Ins, dag MaskingIns,
362                                   string OpcodeStr,
363                                   string AttSrcAsm, string IntelSrcAsm,
364                                   list<dag> Pattern,
365                                   list<dag> MaskingPattern,
366                                   bit IsCommutable = 0> {
367     let isCommutable = IsCommutable in {
368     def NAME: AVX512<O, F, Outs, Ins,
369                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
370                                      "$dst, "#IntelSrcAsm#"}",
371                        Pattern>;
373     def NAME#k: AVX512<O, F, Outs, MaskingIns,
374                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
375                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
376                        MaskingPattern>, EVEX_K;
377     }
380 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
381                                   dag Outs,
382                                   dag Ins, dag MaskingIns,
383                                   string OpcodeStr,
384                                   string AttSrcAsm, string IntelSrcAsm,
385                                   dag RHS, dag MaskingRHS,
386                                   bit IsCommutable = 0> :
387   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
388                          AttSrcAsm, IntelSrcAsm,
389                          [(set _.KRC:$dst, RHS)],
390                          [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
392 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
393                            dag Outs, dag Ins, string OpcodeStr,
394                            string AttSrcAsm, string IntelSrcAsm,
395                            dag RHS, dag RHS_su, bit IsCommutable = 0> :
396    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
397                           !con((ins _.KRCWM:$mask), Ins),
398                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
399                           (and _.KRCWM:$mask, RHS_su), IsCommutable>;
402 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
403 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
404 // swizzled by ExecutionDomainFix to pxor.
405 // We set canFoldAsLoad because this can be converted to a constant-pool
406 // load of an all-zeros value if folding it would be beneficial.
407 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
408     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
409 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
410                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
411 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
412                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
415 // Alias instructions that allow VPTERNLOG to be used with a mask to create
416 // a mix of all ones and all zeros elements. This is done this way to force
417 // the same register to be used as input for all three sources.
418 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
419 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
420                                 (ins VK16WM:$mask), "",
421                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
422                                                       (v16i32 immAllOnesV),
423                                                       (v16i32 immAllZerosV)))]>;
424 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
425                                 (ins VK8WM:$mask), "",
426                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
427                                            (v8i64 immAllOnesV),
428                                            (v8i64 immAllZerosV)))]>;
431 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
432     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
433 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
434                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
435 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
436                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
439 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
440 // This is expanded by ExpandPostRAPseudos.
441 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
442     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
443   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
444                           [(set FR32X:$dst, fp32imm0)]>;
445   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
446                           [(set FR64X:$dst, fpimm0)]>;
449 //===----------------------------------------------------------------------===//
450 // AVX-512 - VECTOR INSERT
453 // Supports two different pattern operators for mask and unmasked ops. Allows
454 // null_frag to be passed for one.
455 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
456                                   X86VectorVTInfo To,
457                                   SDPatternOperator vinsert_insert,
458                                   SDPatternOperator vinsert_for_mask,
459                                   X86FoldableSchedWrite sched> {
460   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
461     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
462                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
463                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
464                    "$src3, $src2, $src1", "$src1, $src2, $src3",
465                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
466                                          (From.VT From.RC:$src2),
467                                          (iPTR imm)),
468                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
469                                            (From.VT From.RC:$src2),
470                                            (iPTR imm))>,
471                    AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
472     let mayLoad = 1 in
473     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
474                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
475                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
476                    "$src3, $src2, $src1", "$src1, $src2, $src3",
477                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
478                                (From.VT (From.LdFrag addr:$src2)),
479                                (iPTR imm)),
480                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
481                                (From.VT (From.LdFrag addr:$src2)),
482                                (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
483                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
484                    Sched<[sched.Folded, sched.ReadAfterFold]>;
485   }
488 // Passes the same pattern operator for masked and unmasked ops.
489 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
490                             X86VectorVTInfo To,
491                             SDPatternOperator vinsert_insert,
492                             X86FoldableSchedWrite sched> :
493   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
495 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
496                        X86VectorVTInfo To, PatFrag vinsert_insert,
497                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
498   let Predicates = p in {
499     def : Pat<(vinsert_insert:$ins
500                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
501               (To.VT (!cast<Instruction>(InstrStr#"rr")
502                      To.RC:$src1, From.RC:$src2,
503                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
505     def : Pat<(vinsert_insert:$ins
506                   (To.VT To.RC:$src1),
507                   (From.VT (From.LdFrag addr:$src2)),
508                   (iPTR imm)),
509               (To.VT (!cast<Instruction>(InstrStr#"rm")
510                   To.RC:$src1, addr:$src2,
511                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
512   }
515 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
516                             ValueType EltVT64, int Opcode256,
517                             X86FoldableSchedWrite sched> {
519   let Predicates = [HasVLX] in
520     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
521                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
522                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
523                                  vinsert128_insert, sched>, EVEX_V256;
525   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
526                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
527                                  X86VectorVTInfo<16, EltVT32, VR512>,
528                                  vinsert128_insert, sched>, EVEX_V512;
530   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
531                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
532                                  X86VectorVTInfo< 8, EltVT64, VR512>,
533                                  vinsert256_insert, sched>, VEX_W, EVEX_V512;
535   // Even with DQI we'd like to only use these instructions for masking.
536   let Predicates = [HasVLX, HasDQI] in
537     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
538                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
539                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
540                                    null_frag, vinsert128_insert, sched>,
541                                    VEX_W1X, EVEX_V256;
543   // Even with DQI we'd like to only use these instructions for masking.
544   let Predicates = [HasDQI] in {
545     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
546                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
547                                  X86VectorVTInfo< 8, EltVT64, VR512>,
548                                  null_frag, vinsert128_insert, sched>,
549                                  VEX_W, EVEX_V512;
551     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
552                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
553                                    X86VectorVTInfo<16, EltVT32, VR512>,
554                                    null_frag, vinsert256_insert, sched>,
555                                    EVEX_V512;
556   }
559 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
560 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
561 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
563 // Codegen pattern with the alternative types,
564 // Even with AVX512DQ we'll still use these for unmasked operations.
565 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
566               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
567 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
568               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
570 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
571               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
572 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
573               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
575 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
576               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
577 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
578               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
580 // Codegen pattern with the alternative types insert VEC128 into VEC256
581 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
582               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
583 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
584               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
585 // Codegen pattern with the alternative types insert VEC128 into VEC512
586 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
587               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
588 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
589                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
590 // Codegen pattern with the alternative types insert VEC256 into VEC512
591 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
592               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
593 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
594               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
597 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
598                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
599                                  PatFrag vinsert_insert,
600                                  SDNodeXForm INSERT_get_vinsert_imm,
601                                  list<Predicate> p> {
602 let Predicates = p in {
603   def : Pat<(Cast.VT
604              (vselect Cast.KRCWM:$mask,
605                       (bitconvert
606                        (vinsert_insert:$ins (To.VT To.RC:$src1),
607                                             (From.VT From.RC:$src2),
608                                             (iPTR imm))),
609                       Cast.RC:$src0)),
610             (!cast<Instruction>(InstrStr#"rrk")
611              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
612              (INSERT_get_vinsert_imm To.RC:$ins))>;
613   def : Pat<(Cast.VT
614              (vselect Cast.KRCWM:$mask,
615                       (bitconvert
616                        (vinsert_insert:$ins (To.VT To.RC:$src1),
617                                             (From.VT
618                                              (bitconvert
619                                               (From.LdFrag addr:$src2))),
620                                             (iPTR imm))),
621                       Cast.RC:$src0)),
622             (!cast<Instruction>(InstrStr#"rmk")
623              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
624              (INSERT_get_vinsert_imm To.RC:$ins))>;
626   def : Pat<(Cast.VT
627              (vselect Cast.KRCWM:$mask,
628                       (bitconvert
629                        (vinsert_insert:$ins (To.VT To.RC:$src1),
630                                             (From.VT From.RC:$src2),
631                                             (iPTR imm))),
632                       Cast.ImmAllZerosV)),
633             (!cast<Instruction>(InstrStr#"rrkz")
634              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
635              (INSERT_get_vinsert_imm To.RC:$ins))>;
636   def : Pat<(Cast.VT
637              (vselect Cast.KRCWM:$mask,
638                       (bitconvert
639                        (vinsert_insert:$ins (To.VT To.RC:$src1),
640                                             (From.VT (From.LdFrag addr:$src2)),
641                                             (iPTR imm))),
642                       Cast.ImmAllZerosV)),
643             (!cast<Instruction>(InstrStr#"rmkz")
644              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
645              (INSERT_get_vinsert_imm To.RC:$ins))>;
649 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
650                              v8f32x_info, vinsert128_insert,
651                              INSERT_get_vinsert128_imm, [HasVLX]>;
652 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
653                              v4f64x_info, vinsert128_insert,
654                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
656 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
657                              v8i32x_info, vinsert128_insert,
658                              INSERT_get_vinsert128_imm, [HasVLX]>;
659 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
660                              v8i32x_info, vinsert128_insert,
661                              INSERT_get_vinsert128_imm, [HasVLX]>;
662 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
663                              v8i32x_info, vinsert128_insert,
664                              INSERT_get_vinsert128_imm, [HasVLX]>;
665 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
666                              v4i64x_info, vinsert128_insert,
667                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
668 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
669                              v4i64x_info, vinsert128_insert,
670                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
671 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
672                              v4i64x_info, vinsert128_insert,
673                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
675 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
676                              v16f32_info, vinsert128_insert,
677                              INSERT_get_vinsert128_imm, [HasAVX512]>;
678 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
679                              v8f64_info, vinsert128_insert,
680                              INSERT_get_vinsert128_imm, [HasDQI]>;
682 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
683                              v16i32_info, vinsert128_insert,
684                              INSERT_get_vinsert128_imm, [HasAVX512]>;
685 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
686                              v16i32_info, vinsert128_insert,
687                              INSERT_get_vinsert128_imm, [HasAVX512]>;
688 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
689                              v16i32_info, vinsert128_insert,
690                              INSERT_get_vinsert128_imm, [HasAVX512]>;
691 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
692                              v8i64_info, vinsert128_insert,
693                              INSERT_get_vinsert128_imm, [HasDQI]>;
694 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
695                              v8i64_info, vinsert128_insert,
696                              INSERT_get_vinsert128_imm, [HasDQI]>;
697 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
698                              v8i64_info, vinsert128_insert,
699                              INSERT_get_vinsert128_imm, [HasDQI]>;
701 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
702                              v16f32_info, vinsert256_insert,
703                              INSERT_get_vinsert256_imm, [HasDQI]>;
704 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
705                              v8f64_info, vinsert256_insert,
706                              INSERT_get_vinsert256_imm, [HasAVX512]>;
708 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
709                              v16i32_info, vinsert256_insert,
710                              INSERT_get_vinsert256_imm, [HasDQI]>;
711 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
712                              v16i32_info, vinsert256_insert,
713                              INSERT_get_vinsert256_imm, [HasDQI]>;
714 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
715                              v16i32_info, vinsert256_insert,
716                              INSERT_get_vinsert256_imm, [HasDQI]>;
717 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
718                              v8i64_info, vinsert256_insert,
719                              INSERT_get_vinsert256_imm, [HasAVX512]>;
720 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
721                              v8i64_info, vinsert256_insert,
722                              INSERT_get_vinsert256_imm, [HasAVX512]>;
723 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
724                              v8i64_info, vinsert256_insert,
725                              INSERT_get_vinsert256_imm, [HasAVX512]>;
727 // vinsertps - insert f32 to XMM
728 let ExeDomain = SSEPackedSingle in {
729 let isCommutable = 1 in
730 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
731       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
732       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
733       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
734       EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
735 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
736       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
737       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
738       [(set VR128X:$dst, (X86insertps VR128X:$src1,
739                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
740                           imm:$src3))]>,
741       EVEX_4V, EVEX_CD8<32, CD8VT1>,
742       Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
745 //===----------------------------------------------------------------------===//
746 // AVX-512 VECTOR EXTRACT
747 //---
749 // Supports two different pattern operators for mask and unmasked ops. Allows
750 // null_frag to be passed for one.
751 multiclass vextract_for_size_split<int Opcode,
752                                    X86VectorVTInfo From, X86VectorVTInfo To,
753                                    SDPatternOperator vextract_extract,
754                                    SDPatternOperator vextract_for_mask,
755                                    SchedWrite SchedRR, SchedWrite SchedMR> {
757   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
758     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
759                 (ins From.RC:$src1, u8imm:$idx),
760                 "vextract" # To.EltTypeName # "x" # To.NumElts,
761                 "$idx, $src1", "$src1, $idx",
762                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
763                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
764                 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
766     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
767                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
768                     "vextract" # To.EltTypeName # "x" # To.NumElts #
769                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
770                     [(store (To.VT (vextract_extract:$idx
771                                     (From.VT From.RC:$src1), (iPTR imm))),
772                              addr:$dst)]>, EVEX,
773                     Sched<[SchedMR]>;
775     let mayStore = 1, hasSideEffects = 0 in
776     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
777                     (ins To.MemOp:$dst, To.KRCWM:$mask,
778                                         From.RC:$src1, u8imm:$idx),
779                      "vextract" # To.EltTypeName # "x" # To.NumElts #
780                           "\t{$idx, $src1, $dst {${mask}}|"
781                           "$dst {${mask}}, $src1, $idx}", []>,
782                     EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
783   }
786 // Passes the same pattern operator for masked and unmasked ops.
787 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
788                              X86VectorVTInfo To,
789                              SDPatternOperator vextract_extract,
790                              SchedWrite SchedRR, SchedWrite SchedMR> :
791   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
793 // Codegen pattern for the alternative types
794 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
795                 X86VectorVTInfo To, PatFrag vextract_extract,
796                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
797   let Predicates = p in {
798      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
799                (To.VT (!cast<Instruction>(InstrStr#"rr")
800                           From.RC:$src1,
801                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
802      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
803                               (iPTR imm))), addr:$dst),
804                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
805                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
806   }
809 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
810                              ValueType EltVT64, int Opcode256,
811                              SchedWrite SchedRR, SchedWrite SchedMR> {
812   let Predicates = [HasAVX512] in {
813     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
814                                    X86VectorVTInfo<16, EltVT32, VR512>,
815                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
816                                    vextract128_extract, SchedRR, SchedMR>,
817                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
818     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
819                                    X86VectorVTInfo< 8, EltVT64, VR512>,
820                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
821                                    vextract256_extract, SchedRR, SchedMR>,
822                                        VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
823   }
824   let Predicates = [HasVLX] in
825     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
826                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
827                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
828                                  vextract128_extract, SchedRR, SchedMR>,
829                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
831   // Even with DQI we'd like to only use these instructions for masking.
832   let Predicates = [HasVLX, HasDQI] in
833     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
834                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
835                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
836                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
837                                      VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
839   // Even with DQI we'd like to only use these instructions for masking.
840   let Predicates = [HasDQI] in {
841     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
842                                  X86VectorVTInfo< 8, EltVT64, VR512>,
843                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
844                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
845                                      VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
846     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
847                                  X86VectorVTInfo<16, EltVT32, VR512>,
848                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
849                                  null_frag, vextract256_extract, SchedRR, SchedMR>,
850                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
851   }
854 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
855 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
856 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
858 // extract_subvector codegen patterns with the alternative types.
859 // Even with AVX512DQ we'll still use these for unmasked operations.
860 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
861           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
862 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
863           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
865 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
866           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
867 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
868           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
870 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
871           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
872 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
873           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
875 // Codegen pattern with the alternative types extract VEC128 from VEC256
876 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
877           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
878 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
879           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
881 // Codegen pattern with the alternative types extract VEC128 from VEC512
882 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
883                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
884 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
885                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
886 // Codegen pattern with the alternative types extract VEC256 from VEC512
887 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
888                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
889 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
890                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
893 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
894 // smaller extract to enable EVEX->VEX.
895 let Predicates = [NoVLX] in {
896 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
897           (v2i64 (VEXTRACTI128rr
898                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
899                   (iPTR 1)))>;
900 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
901           (v2f64 (VEXTRACTF128rr
902                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
903                   (iPTR 1)))>;
904 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
905           (v4i32 (VEXTRACTI128rr
906                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
907                   (iPTR 1)))>;
908 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
909           (v4f32 (VEXTRACTF128rr
910                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
911                   (iPTR 1)))>;
912 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
913           (v8i16 (VEXTRACTI128rr
914                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
915                   (iPTR 1)))>;
916 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
917           (v16i8 (VEXTRACTI128rr
918                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
919                   (iPTR 1)))>;
922 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
923 // smaller extract to enable EVEX->VEX.
924 let Predicates = [HasVLX] in {
925 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
926           (v2i64 (VEXTRACTI32x4Z256rr
927                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
928                   (iPTR 1)))>;
929 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
930           (v2f64 (VEXTRACTF32x4Z256rr
931                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
932                   (iPTR 1)))>;
933 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
934           (v4i32 (VEXTRACTI32x4Z256rr
935                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
936                   (iPTR 1)))>;
937 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
938           (v4f32 (VEXTRACTF32x4Z256rr
939                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
940                   (iPTR 1)))>;
941 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
942           (v8i16 (VEXTRACTI32x4Z256rr
943                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
944                   (iPTR 1)))>;
945 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
946           (v16i8 (VEXTRACTI32x4Z256rr
947                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
948                   (iPTR 1)))>;
952 // Additional patterns for handling a bitcast between the vselect and the
953 // extract_subvector.
954 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
955                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
956                                   PatFrag vextract_extract,
957                                   SDNodeXForm EXTRACT_get_vextract_imm,
958                                   list<Predicate> p> {
959 let Predicates = p in {
960   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
961                               (bitconvert
962                                (To.VT (vextract_extract:$ext
963                                        (From.VT From.RC:$src), (iPTR imm)))),
964                               To.RC:$src0)),
965             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
966                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
967                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
969   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
970                               (bitconvert
971                                (To.VT (vextract_extract:$ext
972                                        (From.VT From.RC:$src), (iPTR imm)))),
973                               Cast.ImmAllZerosV)),
974             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
975                       Cast.KRCWM:$mask, From.RC:$src,
976                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
980 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
981                               v4f32x_info, vextract128_extract,
982                               EXTRACT_get_vextract128_imm, [HasVLX]>;
983 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
984                               v2f64x_info, vextract128_extract,
985                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
987 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
988                               v4i32x_info, vextract128_extract,
989                               EXTRACT_get_vextract128_imm, [HasVLX]>;
990 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
991                               v4i32x_info, vextract128_extract,
992                               EXTRACT_get_vextract128_imm, [HasVLX]>;
993 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
994                               v4i32x_info, vextract128_extract,
995                               EXTRACT_get_vextract128_imm, [HasVLX]>;
996 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
997                               v2i64x_info, vextract128_extract,
998                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
999 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1000                               v2i64x_info, vextract128_extract,
1001                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1002 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1003                               v2i64x_info, vextract128_extract,
1004                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1006 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1007                               v4f32x_info, vextract128_extract,
1008                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1009 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1010                               v2f64x_info, vextract128_extract,
1011                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1013 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1014                               v4i32x_info, vextract128_extract,
1015                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1016 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1017                               v4i32x_info, vextract128_extract,
1018                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1019 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1020                               v4i32x_info, vextract128_extract,
1021                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1022 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1023                               v2i64x_info, vextract128_extract,
1024                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1025 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1026                               v2i64x_info, vextract128_extract,
1027                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1028 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1029                               v2i64x_info, vextract128_extract,
1030                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1032 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1033                               v8f32x_info, vextract256_extract,
1034                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1035 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1036                               v4f64x_info, vextract256_extract,
1037                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1039 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1040                               v8i32x_info, vextract256_extract,
1041                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1042 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1043                               v8i32x_info, vextract256_extract,
1044                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1045 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1046                               v8i32x_info, vextract256_extract,
1047                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1048 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1049                               v4i64x_info, vextract256_extract,
1050                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1051 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1052                               v4i64x_info, vextract256_extract,
1053                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1054 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1055                               v4i64x_info, vextract256_extract,
1056                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1058 // vextractps - extract 32 bits from XMM
1059 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1060       (ins VR128X:$src1, u8imm:$src2),
1061       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1062       [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1063       EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1065 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1066       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1067       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1068       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1069                           addr:$dst)]>,
1070       EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1072 //===---------------------------------------------------------------------===//
1073 // AVX-512 BROADCAST
1074 //---
1075 // broadcast with a scalar argument.
1076 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1077                             string Name,
1078                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1079   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1080             (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1081              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1082   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1083                                   (X86VBroadcast SrcInfo.FRC:$src),
1084                                   DestInfo.RC:$src0)),
1085             (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1086              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1087              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1088   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1089                                   (X86VBroadcast SrcInfo.FRC:$src),
1090                                   DestInfo.ImmAllZerosV)),
1091             (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1092              DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1095 // Split version to allow mask and broadcast node to be different types. This
1096 // helps support the 32x2 broadcasts.
1097 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1098                                      string Name,
1099                                      SchedWrite SchedRR, SchedWrite SchedRM,
1100                                      X86VectorVTInfo MaskInfo,
1101                                      X86VectorVTInfo DestInfo,
1102                                      X86VectorVTInfo SrcInfo,
1103                                      SDPatternOperator UnmaskedOp = X86VBroadcast> {
1104   let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1105   defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1106                    (outs MaskInfo.RC:$dst),
1107                    (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1108                    (MaskInfo.VT
1109                     (bitconvert
1110                      (DestInfo.VT
1111                       (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1112                    (MaskInfo.VT
1113                     (bitconvert
1114                      (DestInfo.VT
1115                       (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1116                    T8PD, EVEX, Sched<[SchedRR]>;
1117   let mayLoad = 1 in
1118   defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1119                    (outs MaskInfo.RC:$dst),
1120                    (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1121                    (MaskInfo.VT
1122                     (bitconvert
1123                      (DestInfo.VT (UnmaskedOp
1124                                    (SrcInfo.ScalarLdFrag addr:$src))))),
1125                    (MaskInfo.VT
1126                     (bitconvert
1127                      (DestInfo.VT (X86VBroadcast
1128                                    (SrcInfo.ScalarLdFrag addr:$src)))))>,
1129                    T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1130                    Sched<[SchedRM]>;
1131   }
1134 // Helper class to force mask and broadcast result to same type.
1135 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1136                                SchedWrite SchedRR, SchedWrite SchedRM,
1137                                X86VectorVTInfo DestInfo,
1138                                X86VectorVTInfo SrcInfo> :
1139   avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1140                             DestInfo, DestInfo, SrcInfo>;
1142 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1143                                                        AVX512VLVectorVTInfo _> {
1144   let Predicates = [HasAVX512] in {
1145     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1146                                   WriteFShuffle256Ld, _.info512, _.info128>,
1147               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1148                                       _.info128>,
1149               EVEX_V512;
1150   }
1152   let Predicates = [HasVLX] in {
1153     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1154                                      WriteFShuffle256Ld, _.info256, _.info128>,
1155                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1156                                          _.info128>,
1157                  EVEX_V256;
1158   }
1161 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1162                                                        AVX512VLVectorVTInfo _> {
1163   let Predicates = [HasAVX512] in {
1164     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1165                                   WriteFShuffle256Ld, _.info512, _.info128>,
1166               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1167                                       _.info128>,
1168               EVEX_V512;
1169   }
1171   let Predicates = [HasVLX] in {
1172     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1173                                      WriteFShuffle256Ld, _.info256, _.info128>,
1174                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1175                                          _.info128>,
1176                  EVEX_V256;
1177     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1178                                      WriteFShuffle256Ld, _.info128, _.info128>,
1179                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1180                                          _.info128>,
1181                  EVEX_V128;
1182   }
1184 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1185                                        avx512vl_f32_info>;
1186 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1187                                        avx512vl_f64_info>, VEX_W1X;
1189 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1190                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1191                                     RegisterClass SrcRC> {
1192   let ExeDomain = _.ExeDomain in
1193   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1194                          (ins SrcRC:$src),
1195                          "vpbroadcast"##_.Suffix, "$src", "$src",
1196                          (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1197                          Sched<[SchedRR]>;
1200 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1201                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1202                                     RegisterClass SrcRC, SubRegIndex Subreg> {
1203   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1204   defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1205                         (outs _.RC:$dst), (ins GR32:$src),
1206                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1207                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1208                         "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1209                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1211   def : Pat <(_.VT (OpNode SrcRC:$src)),
1212              (!cast<Instruction>(Name#r)
1213               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1215   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1216              (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1217               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1219   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1220              (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1221               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1224 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1225                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1226                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1227   let Predicates = [prd] in
1228     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1229               OpNode, SrcRC, Subreg>, EVEX_V512;
1230   let Predicates = [prd, HasVLX] in {
1231     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1232               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1233     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1234               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1235   }
1238 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1239                                        SDPatternOperator OpNode,
1240                                        RegisterClass SrcRC, Predicate prd> {
1241   let Predicates = [prd] in
1242     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1243                                       SrcRC>, EVEX_V512;
1244   let Predicates = [prd, HasVLX] in {
1245     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1246                                          SrcRC>, EVEX_V256;
1247     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1248                                          SrcRC>, EVEX_V128;
1249   }
1252 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1253                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1254 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1255                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1256                        HasBWI>;
1257 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1258                                                  X86VBroadcast, GR32, HasAVX512>;
1259 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1260                                                  X86VBroadcast, GR64, HasAVX512>, VEX_W;
1262 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1263                                         AVX512VLVectorVTInfo _, Predicate prd> {
1264   let Predicates = [prd] in {
1265     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1266                                    WriteShuffle256Ld, _.info512, _.info128>,
1267                                   EVEX_V512;
1268   }
1269   let Predicates = [prd, HasVLX] in {
1270     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1271                                     WriteShuffle256Ld, _.info256, _.info128>,
1272                                  EVEX_V256;
1273     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1274                                     WriteShuffleXLd, _.info128, _.info128>,
1275                                  EVEX_V128;
1276   }
1279 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1280                                            avx512vl_i8_info, HasBWI>;
1281 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1282                                            avx512vl_i16_info, HasBWI>;
1283 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1284                                            avx512vl_i32_info, HasAVX512>;
1285 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1286                                            avx512vl_i64_info, HasAVX512>, VEX_W1X;
1288 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1289                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1290   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1291                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1292                            (_Dst.VT (X86SubVBroadcast
1293                              (_Src.VT (_Src.LdFrag addr:$src))))>,
1294                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1295                            AVX5128IBase, EVEX;
1298 // This should be used for the AVX512DQ broadcast instructions. It disables
1299 // the unmasked patterns so that we only use the DQ instructions when masking
1300 //  is requested.
1301 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1302                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1303   let hasSideEffects = 0, mayLoad = 1 in
1304   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1305                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1306                            (null_frag),
1307                            (_Dst.VT (X86SubVBroadcast
1308                              (_Src.VT (_Src.LdFrag addr:$src))))>,
1309                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1310                            AVX5128IBase, EVEX;
1313 let Predicates = [HasAVX512] in {
1314   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1315   def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1316             (VPBROADCASTQZm addr:$src)>;
1319 let Predicates = [HasVLX] in {
1320   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1321   def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1322             (VPBROADCASTQZ128m addr:$src)>;
1323   def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1324             (VPBROADCASTQZ256m addr:$src)>;
1326 let Predicates = [HasVLX, HasBWI] in {
1327   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1328   // This means we'll encounter truncated i32 loads; match that here.
1329   def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1330             (VPBROADCASTWZ128m addr:$src)>;
1331   def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1332             (VPBROADCASTWZ256m addr:$src)>;
1333   def : Pat<(v8i16 (X86VBroadcast
1334               (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1335             (VPBROADCASTWZ128m addr:$src)>;
1336   def : Pat<(v8i16 (X86VBroadcast
1337               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1338             (VPBROADCASTWZ128m addr:$src)>;
1339   def : Pat<(v16i16 (X86VBroadcast
1340               (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1341             (VPBROADCASTWZ256m addr:$src)>;
1342   def : Pat<(v16i16 (X86VBroadcast
1343               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1344             (VPBROADCASTWZ256m addr:$src)>;
1346 let Predicates = [HasBWI] in {
1347   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1348   // This means we'll encounter truncated i32 loads; match that here.
1349   def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1350             (VPBROADCASTWZm addr:$src)>;
1351   def : Pat<(v32i16 (X86VBroadcast
1352               (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1353             (VPBROADCASTWZm addr:$src)>;
1354   def : Pat<(v32i16 (X86VBroadcast
1355               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1356             (VPBROADCASTWZm addr:$src)>;
1359 //===----------------------------------------------------------------------===//
1360 // AVX-512 BROADCAST SUBVECTORS
1363 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1364                        v16i32_info, v4i32x_info>,
1365                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1366 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1367                        v16f32_info, v4f32x_info>,
1368                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1369 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1370                        v8i64_info, v4i64x_info>, VEX_W,
1371                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1372 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1373                        v8f64_info, v4f64x_info>, VEX_W,
1374                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1376 let Predicates = [HasAVX512] in {
1377 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1378           (VBROADCASTF64X4rm addr:$src)>;
1379 def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1380           (VBROADCASTI64X4rm addr:$src)>;
1381 def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1382           (VBROADCASTI64X4rm addr:$src)>;
1383 def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1384           (VBROADCASTI64X4rm addr:$src)>;
1386 // Provide fallback in case the load node that is used in the patterns above
1387 // is used by additional users, which prevents the pattern selection.
1388 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1389           (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1390                            (v4f64 VR256X:$src), 1)>;
1391 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1392           (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1393                            (v8f32 VR256X:$src), 1)>;
1394 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1395           (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1396                            (v4i64 VR256X:$src), 1)>;
1397 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1398           (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1399                            (v8i32 VR256X:$src), 1)>;
1400 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1401           (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1402                            (v16i16 VR256X:$src), 1)>;
1403 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1404           (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1405                            (v32i8 VR256X:$src), 1)>;
1407 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1408           (VBROADCASTF32X4rm addr:$src)>;
1409 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1410           (VBROADCASTI32X4rm addr:$src)>;
1411 def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1412           (VBROADCASTI32X4rm addr:$src)>;
1413 def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1414           (VBROADCASTI32X4rm addr:$src)>;
1416 // Patterns for selects of bitcasted operations.
1417 def : Pat<(vselect VK16WM:$mask,
1418                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1419                    (v16f32 immAllZerosV)),
1420           (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1421 def : Pat<(vselect VK16WM:$mask,
1422                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1423                    VR512:$src0),
1424           (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1425 def : Pat<(vselect VK16WM:$mask,
1426                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1427                    (v16i32 immAllZerosV)),
1428           (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1429 def : Pat<(vselect VK16WM:$mask,
1430                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1431                    VR512:$src0),
1432           (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1434 def : Pat<(vselect VK8WM:$mask,
1435                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1436                    (v8f64 immAllZerosV)),
1437           (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1438 def : Pat<(vselect VK8WM:$mask,
1439                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1440                    VR512:$src0),
1441           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1442 def : Pat<(vselect VK8WM:$mask,
1443                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1444                    (v8i64 immAllZerosV)),
1445           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1446 def : Pat<(vselect VK8WM:$mask,
1447                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1448                    VR512:$src0),
1449           (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1452 let Predicates = [HasVLX] in {
1453 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1454                            v8i32x_info, v4i32x_info>,
1455                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1456 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1457                            v8f32x_info, v4f32x_info>,
1458                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1460 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1461           (VBROADCASTF32X4Z256rm addr:$src)>;
1462 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1463           (VBROADCASTI32X4Z256rm addr:$src)>;
1464 def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1465           (VBROADCASTI32X4Z256rm addr:$src)>;
1466 def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1467           (VBROADCASTI32X4Z256rm addr:$src)>;
1469 // Patterns for selects of bitcasted operations.
1470 def : Pat<(vselect VK8WM:$mask,
1471                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1472                    (v8f32 immAllZerosV)),
1473           (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1474 def : Pat<(vselect VK8WM:$mask,
1475                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1476                    VR256X:$src0),
1477           (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1478 def : Pat<(vselect VK8WM:$mask,
1479                    (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1480                    (v8i32 immAllZerosV)),
1481           (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1482 def : Pat<(vselect VK8WM:$mask,
1483                    (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1484                    VR256X:$src0),
1485           (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1488 // Provide fallback in case the load node that is used in the patterns above
1489 // is used by additional users, which prevents the pattern selection.
1490 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1491           (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1492                               (v2f64 VR128X:$src), 1)>;
1493 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1494           (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1495                               (v4f32 VR128X:$src), 1)>;
1496 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1497           (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1498                               (v2i64 VR128X:$src), 1)>;
1499 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1500           (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1501                               (v4i32 VR128X:$src), 1)>;
1502 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1503           (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1504                               (v8i16 VR128X:$src), 1)>;
1505 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1506           (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1507                               (v16i8 VR128X:$src), 1)>;
1510 let Predicates = [HasVLX, HasDQI] in {
1511 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1512                            v4i64x_info, v2i64x_info>, VEX_W1X,
1513                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1514 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1515                            v4f64x_info, v2f64x_info>, VEX_W1X,
1516                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1518 // Patterns for selects of bitcasted operations.
1519 def : Pat<(vselect VK4WM:$mask,
1520                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1521                    (v4f64 immAllZerosV)),
1522           (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1523 def : Pat<(vselect VK4WM:$mask,
1524                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1525                    VR256X:$src0),
1526           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1527 def : Pat<(vselect VK4WM:$mask,
1528                    (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1529                    (v4i64 immAllZerosV)),
1530           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1531 def : Pat<(vselect VK4WM:$mask,
1532                    (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1533                    VR256X:$src0),
1534           (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1537 let Predicates = [HasDQI] in {
1538 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1539                        v8i64_info, v2i64x_info>, VEX_W,
1540                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1541 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1542                        v16i32_info, v8i32x_info>,
1543                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1544 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1545                        v8f64_info, v2f64x_info>, VEX_W,
1546                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1547 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1548                        v16f32_info, v8f32x_info>,
1549                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1551 // Patterns for selects of bitcasted operations.
1552 def : Pat<(vselect VK16WM:$mask,
1553                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1554                    (v16f32 immAllZerosV)),
1555           (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1556 def : Pat<(vselect VK16WM:$mask,
1557                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1558                    VR512:$src0),
1559           (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1560 def : Pat<(vselect VK16WM:$mask,
1561                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1562                    (v16i32 immAllZerosV)),
1563           (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1564 def : Pat<(vselect VK16WM:$mask,
1565                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1566                    VR512:$src0),
1567           (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1569 def : Pat<(vselect VK8WM:$mask,
1570                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1571                    (v8f64 immAllZerosV)),
1572           (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1573 def : Pat<(vselect VK8WM:$mask,
1574                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1575                    VR512:$src0),
1576           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1577 def : Pat<(vselect VK8WM:$mask,
1578                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1579                    (v8i64 immAllZerosV)),
1580           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1581 def : Pat<(vselect VK8WM:$mask,
1582                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1583                    VR512:$src0),
1584           (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1587 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1588                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1589   let Predicates = [HasDQI] in
1590     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1591                                           WriteShuffle256Ld, _Dst.info512,
1592                                           _Src.info512, _Src.info128, null_frag>,
1593                                           EVEX_V512;
1594   let Predicates = [HasDQI, HasVLX] in
1595     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1596                                           WriteShuffle256Ld, _Dst.info256,
1597                                           _Src.info256, _Src.info128, null_frag>,
1598                                           EVEX_V256;
1601 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1602                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1603   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1605   let Predicates = [HasDQI, HasVLX] in
1606     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1607                                           WriteShuffleXLd, _Dst.info128,
1608                                           _Src.info128, _Src.info128, null_frag>,
1609                                           EVEX_V128;
1612 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1613                                           avx512vl_i32_info, avx512vl_i64_info>;
1614 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1615                                           avx512vl_f32_info, avx512vl_f64_info>;
1617 //===----------------------------------------------------------------------===//
1618 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1619 //---
1620 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1621                                   X86VectorVTInfo _, RegisterClass KRC> {
1622   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1623                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1624                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1625                   EVEX, Sched<[WriteShuffle]>;
1628 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1629                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1630   let Predicates = [HasCDI] in
1631     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1632   let Predicates = [HasCDI, HasVLX] in {
1633     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1634     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1635   }
1638 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1639                                                avx512vl_i32_info, VK16>;
1640 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1641                                                avx512vl_i64_info, VK8>, VEX_W;
1643 //===----------------------------------------------------------------------===//
1644 // -- VPERMI2 - 3 source operands form --
1645 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1646                          X86FoldableSchedWrite sched,
1647                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1648 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1649     hasSideEffects = 0 in {
1650   defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1651           (ins _.RC:$src2, _.RC:$src3),
1652           OpcodeStr, "$src3, $src2", "$src2, $src3",
1653           (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1654           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1656   let mayLoad = 1 in
1657   defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1658             (ins _.RC:$src2, _.MemOp:$src3),
1659             OpcodeStr, "$src3, $src2", "$src2, $src3",
1660             (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1661                    (_.VT (_.LdFrag addr:$src3)))), 1>,
1662             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1663   }
1666 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1667                             X86FoldableSchedWrite sched,
1668                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1669   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1670       hasSideEffects = 0, mayLoad = 1 in
1671   defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1672               (ins _.RC:$src2, _.ScalarMemOp:$src3),
1673               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1674               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1675               (_.VT (X86VPermt2 _.RC:$src2,
1676                IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1677               AVX5128IBase, EVEX_4V, EVEX_B,
1678               Sched<[sched.Folded, sched.ReadAfterFold]>;
1681 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1682                                X86FoldableSchedWrite sched,
1683                                AVX512VLVectorVTInfo VTInfo,
1684                                AVX512VLVectorVTInfo ShuffleMask> {
1685   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1686                            ShuffleMask.info512>,
1687             avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1688                              ShuffleMask.info512>, EVEX_V512;
1689   let Predicates = [HasVLX] in {
1690   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1691                                ShuffleMask.info128>,
1692                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1693                                   ShuffleMask.info128>, EVEX_V128;
1694   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1695                                ShuffleMask.info256>,
1696                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1697                                   ShuffleMask.info256>, EVEX_V256;
1698   }
1701 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1702                                   X86FoldableSchedWrite sched,
1703                                   AVX512VLVectorVTInfo VTInfo,
1704                                   AVX512VLVectorVTInfo Idx,
1705                                   Predicate Prd> {
1706   let Predicates = [Prd] in
1707   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1708                            Idx.info512>, EVEX_V512;
1709   let Predicates = [Prd, HasVLX] in {
1710   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1711                                Idx.info128>, EVEX_V128;
1712   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1713                                Idx.info256>,  EVEX_V256;
1714   }
1717 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1718                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1719 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1720                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1721 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1722                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1723                   VEX_W, EVEX_CD8<16, CD8VF>;
1724 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1725                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1726                   EVEX_CD8<8, CD8VF>;
1727 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1728                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1729 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1730                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1732 // Extra patterns to deal with extra bitcasts due to passthru and index being
1733 // different types on the fp versions.
1734 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1735                                   X86VectorVTInfo IdxVT,
1736                                   X86VectorVTInfo CastVT> {
1737   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1738                              (X86VPermt2 (_.VT _.RC:$src2),
1739                                          (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1740                              (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1741             (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1742                                                 _.RC:$src2, _.RC:$src3)>;
1743   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1744                              (X86VPermt2 _.RC:$src2,
1745                                          (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1746                                          (_.LdFrag addr:$src3)),
1747                              (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1748             (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1749                                                 _.RC:$src2, addr:$src3)>;
1750   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1751                              (X86VPermt2 _.RC:$src2,
1752                                          (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1753                                          (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1754                              (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1755             (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1756                                                  _.RC:$src2, addr:$src3)>;
1759 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1760 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1761 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1762 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1764 // VPERMT2
1765 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1766                          X86FoldableSchedWrite sched,
1767                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1768 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1769   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1770           (ins IdxVT.RC:$src2, _.RC:$src3),
1771           OpcodeStr, "$src3, $src2", "$src2, $src3",
1772           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1773           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1775   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1776             (ins IdxVT.RC:$src2, _.MemOp:$src3),
1777             OpcodeStr, "$src3, $src2", "$src2, $src3",
1778             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1779                    (_.LdFrag addr:$src3))), 1>,
1780             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1781   }
1783 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1784                             X86FoldableSchedWrite sched,
1785                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1786   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1787   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1788               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1789               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1790               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1791               (_.VT (X86VPermt2 _.RC:$src1,
1792                IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1793               AVX5128IBase, EVEX_4V, EVEX_B,
1794               Sched<[sched.Folded, sched.ReadAfterFold]>;
1797 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1798                                X86FoldableSchedWrite sched,
1799                                AVX512VLVectorVTInfo VTInfo,
1800                                AVX512VLVectorVTInfo ShuffleMask> {
1801   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1802                               ShuffleMask.info512>,
1803             avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1804                               ShuffleMask.info512>, EVEX_V512;
1805   let Predicates = [HasVLX] in {
1806   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1807                               ShuffleMask.info128>,
1808                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1809                               ShuffleMask.info128>, EVEX_V128;
1810   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1811                               ShuffleMask.info256>,
1812                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1813                               ShuffleMask.info256>, EVEX_V256;
1814   }
1817 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1818                                   X86FoldableSchedWrite sched,
1819                                   AVX512VLVectorVTInfo VTInfo,
1820                                   AVX512VLVectorVTInfo Idx, Predicate Prd> {
1821   let Predicates = [Prd] in
1822   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1823                            Idx.info512>, EVEX_V512;
1824   let Predicates = [Prd, HasVLX] in {
1825   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1826                                Idx.info128>, EVEX_V128;
1827   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1828                                Idx.info256>, EVEX_V256;
1829   }
1832 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1833                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1834 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1835                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1836 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1837                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1838                   VEX_W, EVEX_CD8<16, CD8VF>;
1839 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1840                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1841                   EVEX_CD8<8, CD8VF>;
1842 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1843                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1844 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1845                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1847 //===----------------------------------------------------------------------===//
1848 // AVX-512 - BLEND using mask
1851 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1852                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1853   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1854   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1855              (ins _.RC:$src1, _.RC:$src2),
1856              !strconcat(OpcodeStr,
1857              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1858              EVEX_4V, Sched<[sched]>;
1859   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1860              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1861              !strconcat(OpcodeStr,
1862              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1863              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1864   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1865              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1866              !strconcat(OpcodeStr,
1867              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1868              []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1869   let mayLoad = 1 in {
1870   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1871              (ins _.RC:$src1, _.MemOp:$src2),
1872              !strconcat(OpcodeStr,
1873              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1874              []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1875              Sched<[sched.Folded, sched.ReadAfterFold]>;
1876   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1877              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1878              !strconcat(OpcodeStr,
1879              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1880              []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1881              Sched<[sched.Folded, sched.ReadAfterFold]>;
1882   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1883              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1884              !strconcat(OpcodeStr,
1885              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1886              []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1887              Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1888   }
1889   }
1891 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1892                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1893   let mayLoad = 1, hasSideEffects = 0 in {
1894   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1895       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1896        !strconcat(OpcodeStr,
1897             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1898             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1899       EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1900       Sched<[sched.Folded, sched.ReadAfterFold]>;
1902   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1903       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1904        !strconcat(OpcodeStr,
1905             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1906             "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1907       EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1908       Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1910   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1911       (ins _.RC:$src1, _.ScalarMemOp:$src2),
1912        !strconcat(OpcodeStr,
1913             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1914             "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1915       EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1916       Sched<[sched.Folded, sched.ReadAfterFold]>;
1917   }
1920 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1921                         AVX512VLVectorVTInfo VTInfo> {
1922   defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1923            WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1924                                  EVEX_V512;
1926   let Predicates = [HasVLX] in {
1927     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1928                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1929                                       EVEX_V256;
1930     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1931                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1932                                       EVEX_V128;
1933   }
1936 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1937                         AVX512VLVectorVTInfo VTInfo> {
1938   let Predicates = [HasBWI] in
1939     defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1940                                EVEX_V512;
1942   let Predicates = [HasBWI, HasVLX] in {
1943     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1944                                   EVEX_V256;
1945     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1946                                   EVEX_V128;
1947   }
1950 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
1951                               avx512vl_f32_info>;
1952 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
1953                               avx512vl_f64_info>, VEX_W;
1954 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
1955                               avx512vl_i32_info>;
1956 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
1957                               avx512vl_i64_info>, VEX_W;
1958 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
1959                               avx512vl_i8_info>;
1960 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
1961                               avx512vl_i16_info>, VEX_W;
1963 //===----------------------------------------------------------------------===//
1964 // Compare Instructions
1965 //===----------------------------------------------------------------------===//
1967 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1969 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
1970                              PatFrag OpNode_su, PatFrag OpNodeSAE_su,
1971                              X86FoldableSchedWrite sched> {
1972   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1973                       (outs _.KRC:$dst),
1974                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1975                       "vcmp"#_.Suffix,
1976                       "$cc, $src2, $src1", "$src1, $src2, $cc",
1977                       (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
1978                       (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1979                                  imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
1980   let mayLoad = 1 in
1981   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1982                     (outs _.KRC:$dst),
1983                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
1984                     "vcmp"#_.Suffix,
1985                     "$cc, $src2, $src1", "$src1, $src2, $cc",
1986                     (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
1987                         imm:$cc),
1988                     (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
1989                         imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1990                     Sched<[sched.Folded, sched.ReadAfterFold]>;
1992   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1993                      (outs _.KRC:$dst),
1994                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1995                      "vcmp"#_.Suffix,
1996                      "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
1997                      (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1998                                 imm:$cc),
1999                      (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2000                                    imm:$cc)>,
2001                      EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2003   let isCodeGenOnly = 1 in {
2004     let isCommutable = 1 in
2005     def rr : AVX512Ii8<0xC2, MRMSrcReg,
2006                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2007                 !strconcat("vcmp", _.Suffix,
2008                            "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2009                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2010                                           _.FRC:$src2,
2011                                           imm:$cc))]>,
2012                 EVEX_4V, VEX_LIG, Sched<[sched]>;
2013     def rm : AVX512Ii8<0xC2, MRMSrcMem,
2014               (outs _.KRC:$dst),
2015               (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2016               !strconcat("vcmp", _.Suffix,
2017                          "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2018               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2019                                         (_.ScalarLdFrag addr:$src2),
2020                                         imm:$cc))]>,
2021               EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2022               Sched<[sched.Folded, sched.ReadAfterFold]>;
2023   }
2026 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2027                           (X86cmpms node:$src1, node:$src2, node:$cc), [{
2028   return N->hasOneUse();
2029 }]>;
2030 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2031                           (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2032   return N->hasOneUse();
2033 }]>;
2035 let Predicates = [HasAVX512] in {
2036   let ExeDomain = SSEPackedSingle in
2037   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2038                                    X86cmpms_su, X86cmpmsSAE_su,
2039                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2040   let ExeDomain = SSEPackedDouble in
2041   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2042                                    X86cmpms_su, X86cmpmsSAE_su,
2043                                    SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2046 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2047                               PatFrag OpNode_su, X86FoldableSchedWrite sched,
2048                               X86VectorVTInfo _, bit IsCommutable> {
2049   let isCommutable = IsCommutable in
2050   def rr : AVX512BI<opc, MRMSrcReg,
2051              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2052              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2053              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2054              EVEX_4V, Sched<[sched]>;
2055   def rm : AVX512BI<opc, MRMSrcMem,
2056              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2057              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2058              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2059                                        (_.VT (_.LdFrag addr:$src2))))]>,
2060              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2061   let isCommutable = IsCommutable in
2062   def rrk : AVX512BI<opc, MRMSrcReg,
2063               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2064               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2065                           "$dst {${mask}}, $src1, $src2}"),
2066               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2067                                    (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2068               EVEX_4V, EVEX_K, Sched<[sched]>;
2069   def rmk : AVX512BI<opc, MRMSrcMem,
2070               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2071               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2072                           "$dst {${mask}}, $src1, $src2}"),
2073               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2074                                    (OpNode_su (_.VT _.RC:$src1),
2075                                        (_.VT (_.LdFrag addr:$src2)))))]>,
2076               EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2079 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2080                                   PatFrag OpNode_su,
2081                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
2082                                   bit IsCommutable> :
2083            avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> {
2084   def rmb : AVX512BI<opc, MRMSrcMem,
2085               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2086               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2087                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2088               [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2089                               (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2090               EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2091   def rmbk : AVX512BI<opc, MRMSrcMem,
2092                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2093                                        _.ScalarMemOp:$src2),
2094                !strconcat(OpcodeStr,
2095                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2096                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2097                [(set _.KRC:$dst, (and _.KRCWM:$mask,
2098                                       (OpNode_su (_.VT _.RC:$src1),
2099                                         (X86VBroadcast
2100                                           (_.ScalarLdFrag addr:$src2)))))]>,
2101                EVEX_4V, EVEX_K, EVEX_B,
2102                Sched<[sched.Folded, sched.ReadAfterFold]>;
2105 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2106                                  PatFrag OpNode_su, X86SchedWriteWidths sched,
2107                                  AVX512VLVectorVTInfo VTInfo, Predicate prd,
2108                                  bit IsCommutable = 0> {
2109   let Predicates = [prd] in
2110   defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
2111                               VTInfo.info512, IsCommutable>, EVEX_V512;
2113   let Predicates = [prd, HasVLX] in {
2114     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
2115                                    VTInfo.info256, IsCommutable>, EVEX_V256;
2116     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
2117                                    VTInfo.info128, IsCommutable>, EVEX_V128;
2118   }
2121 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2122                                      PatFrag OpNode, PatFrag OpNode_su,
2123                                      X86SchedWriteWidths sched,
2124                                      AVX512VLVectorVTInfo VTInfo,
2125                                      Predicate prd, bit IsCommutable = 0> {
2126   let Predicates = [prd] in
2127   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
2128                                   VTInfo.info512, IsCommutable>, EVEX_V512;
2130   let Predicates = [prd, HasVLX] in {
2131     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
2132                                        VTInfo.info256, IsCommutable>, EVEX_V256;
2133     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
2134                                        VTInfo.info128, IsCommutable>, EVEX_V128;
2135   }
2138 // This fragment treats X86cmpm as commutable to help match loads in both
2139 // operands for PCMPEQ.
2140 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2141 def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2142                            (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2143 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2144                          (setcc node:$src1, node:$src2, SETGT)>;
2146 def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2),
2147                               (X86pcmpeqm_c node:$src1, node:$src2), [{
2148   return N->hasOneUse();
2149 }]>;
2150 def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2),
2151                             (X86pcmpgtm node:$src1, node:$src2), [{
2152   return N->hasOneUse();
2153 }]>;
2155 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2156 // increase the pattern complexity the way an immediate would.
2157 let AddedComplexity = 2 in {
2158 // FIXME: Is there a better scheduler class for VPCMP?
2159 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su,
2160                       SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2161                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2163 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su,
2164                       SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2165                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2167 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su,
2168                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2169                 EVEX_CD8<32, CD8VF>;
2171 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su,
2172                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2173                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2175 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su,
2176                       SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2177                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2179 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su,
2180                       SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2181                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2183 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su,
2184                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2185                 EVEX_CD8<32, CD8VF>;
2187 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su,
2188                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2189                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2192 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2193                           PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2194                           X86FoldableSchedWrite sched,
2195                           X86VectorVTInfo _, string Name> {
2196   let isCommutable = 1 in
2197   def rri : AVX512AIi8<opc, MRMSrcReg,
2198              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2199              !strconcat("vpcmp", Suffix,
2200                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2201              [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2202                                                 (_.VT _.RC:$src2),
2203                                                 cond)))]>,
2204              EVEX_4V, Sched<[sched]>;
2205   def rmi : AVX512AIi8<opc, MRMSrcMem,
2206              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2207              !strconcat("vpcmp", Suffix,
2208                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2209              [(set _.KRC:$dst, (_.KVT
2210                                 (Frag:$cc
2211                                  (_.VT _.RC:$src1),
2212                                  (_.VT (_.LdFrag addr:$src2)),
2213                                  cond)))]>,
2214              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2215   let isCommutable = 1 in
2216   def rrik : AVX512AIi8<opc, MRMSrcReg,
2217               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2218                                       u8imm:$cc),
2219               !strconcat("vpcmp", Suffix,
2220                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2221                          "$dst {${mask}}, $src1, $src2, $cc}"),
2222               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2223                                      (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2224                                                          (_.VT _.RC:$src2),
2225                                                          cond))))]>,
2226               EVEX_4V, EVEX_K, Sched<[sched]>;
2227   def rmik : AVX512AIi8<opc, MRMSrcMem,
2228               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2229                                     u8imm:$cc),
2230               !strconcat("vpcmp", Suffix,
2231                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2232                          "$dst {${mask}}, $src1, $src2, $cc}"),
2233               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2234                                      (_.KVT
2235                                       (Frag_su:$cc
2236                                        (_.VT _.RC:$src1),
2237                                        (_.VT (_.LdFrag addr:$src2)),
2238                                        cond))))]>,
2239               EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2241   def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2242                                  (_.VT _.RC:$src1), cond)),
2243             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2244              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2246   def : Pat<(and _.KRCWM:$mask,
2247                  (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2248                                       (_.VT _.RC:$src1), cond))),
2249             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2250              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2251              (CommFrag.OperandTransform $cc))>;
2254 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2255                               PatFrag Frag_su, PatFrag CommFrag,
2256                               PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2257                               X86VectorVTInfo _, string Name> :
2258            avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2259                           sched, _, Name> {
2260   def rmib : AVX512AIi8<opc, MRMSrcMem,
2261              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2262                                      u8imm:$cc),
2263              !strconcat("vpcmp", Suffix,
2264                         "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2265                         "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2266              [(set _.KRC:$dst, (_.KVT (Frag:$cc
2267                                        (_.VT _.RC:$src1),
2268                                        (X86VBroadcast
2269                                         (_.ScalarLdFrag addr:$src2)),
2270                                        cond)))]>,
2271              EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2272   def rmibk : AVX512AIi8<opc, MRMSrcMem,
2273               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2274                                        _.ScalarMemOp:$src2, u8imm:$cc),
2275               !strconcat("vpcmp", Suffix,
2276                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2277                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2278               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2279                                      (_.KVT (Frag_su:$cc
2280                                              (_.VT _.RC:$src1),
2281                                              (X86VBroadcast
2282                                               (_.ScalarLdFrag addr:$src2)),
2283                                              cond))))]>,
2284               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2286   def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2287                     (_.VT _.RC:$src1), cond)),
2288             (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2289              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2291   def : Pat<(and _.KRCWM:$mask,
2292                  (_.KVT (CommFrag_su:$cc (X86VBroadcast
2293                                        (_.ScalarLdFrag addr:$src2)),
2294                                       (_.VT _.RC:$src1), cond))),
2295             (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2296              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2297              (CommFrag.OperandTransform $cc))>;
2300 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2301                              PatFrag Frag_su, PatFrag CommFrag,
2302                              PatFrag CommFrag_su, X86SchedWriteWidths sched,
2303                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2304   let Predicates = [prd] in
2305   defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2306                           sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2308   let Predicates = [prd, HasVLX] in {
2309     defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2310                                sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2311     defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2312                                sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2313   }
2316 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2317                                  PatFrag Frag_su, PatFrag CommFrag,
2318                                  PatFrag CommFrag_su, X86SchedWriteWidths sched,
2319                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2320   let Predicates = [prd] in
2321   defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2322                               sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2324   let Predicates = [prd, HasVLX] in {
2325     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2326                                    sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2327     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2328                                    sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2329   }
2332 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2333   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2334   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2335   return getI8Imm(SSECC, SDLoc(N));
2336 }]>;
2338 // Swapped operand version of the above.
2339 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2340   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2341   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2342   SSECC = X86::getSwappedVPCMPImm(SSECC);
2343   return getI8Imm(SSECC, SDLoc(N));
2344 }]>;
2346 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2347                        (setcc node:$src1, node:$src2, node:$cc), [{
2348   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2349   return !ISD::isUnsignedIntSetCC(CC);
2350 }], X86pcmpm_imm>;
2352 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2353                           (setcc node:$src1, node:$src2, node:$cc), [{
2354   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2355   return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2356 }], X86pcmpm_imm>;
2358 // Same as above, but commutes immediate. Use for load folding.
2359 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2360                                (setcc node:$src1, node:$src2, node:$cc), [{
2361   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2362   return !ISD::isUnsignedIntSetCC(CC);
2363 }], X86pcmpm_imm_commute>;
2365 def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2366                                   (setcc node:$src1, node:$src2, node:$cc), [{
2367   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2368   return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2369 }], X86pcmpm_imm_commute>;
2371 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2372                         (setcc node:$src1, node:$src2, node:$cc), [{
2373   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2374   return ISD::isUnsignedIntSetCC(CC);
2375 }], X86pcmpm_imm>;
2377 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2378                            (setcc node:$src1, node:$src2, node:$cc), [{
2379   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2380   return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2381 }], X86pcmpm_imm>;
2383 // Same as above, but commutes immediate. Use for load folding.
2384 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2385                                 (setcc node:$src1, node:$src2, node:$cc), [{
2386   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2387   return ISD::isUnsignedIntSetCC(CC);
2388 }], X86pcmpm_imm_commute>;
2390 def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2391                                    (setcc node:$src1, node:$src2, node:$cc), [{
2392   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2393   return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2394 }], X86pcmpm_imm_commute>;
2396 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2397 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2398                                 X86pcmpm_commute, X86pcmpm_commute_su,
2399                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2400                                 EVEX_CD8<8, CD8VF>;
2401 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2402                                  X86pcmpum_commute, X86pcmpum_commute_su,
2403                                  SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2404                                  EVEX_CD8<8, CD8VF>;
2406 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2407                                 X86pcmpm_commute, X86pcmpm_commute_su,
2408                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2409                                 VEX_W, EVEX_CD8<16, CD8VF>;
2410 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2411                                  X86pcmpum_commute, X86pcmpum_commute_su,
2412                                  SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2413                                  VEX_W, EVEX_CD8<16, CD8VF>;
2415 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2416                                     X86pcmpm_commute, X86pcmpm_commute_su,
2417                                     SchedWriteVecALU, avx512vl_i32_info,
2418                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2419 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2420                                      X86pcmpum_commute, X86pcmpum_commute_su,
2421                                      SchedWriteVecALU, avx512vl_i32_info,
2422                                      HasAVX512>, EVEX_CD8<32, CD8VF>;
2424 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2425                                     X86pcmpm_commute, X86pcmpm_commute_su,
2426                                     SchedWriteVecALU, avx512vl_i64_info,
2427                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2428 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2429                                      X86pcmpum_commute, X86pcmpum_commute_su,
2430                                      SchedWriteVecALU, avx512vl_i64_info,
2431                                      HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2433 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2434                          (X86cmpm node:$src1, node:$src2, node:$cc), [{
2435   return N->hasOneUse();
2436 }]>;
2437 def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2438                             (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2439   return N->hasOneUse();
2440 }]>;
2442 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2443                               string Name> {
2444   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2445                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2446                    "vcmp"#_.Suffix,
2447                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2448                    (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2449                    (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2450                    1>, Sched<[sched]>;
2452   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2453                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2454                 "vcmp"#_.Suffix,
2455                 "$cc, $src2, $src1", "$src1, $src2, $cc",
2456                 (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2457                          imm:$cc),
2458                 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2459                             imm:$cc)>,
2460                 Sched<[sched.Folded, sched.ReadAfterFold]>;
2462   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2463                 (outs _.KRC:$dst),
2464                 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2465                 "vcmp"#_.Suffix,
2466                 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2467                 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2468                 (X86cmpm (_.VT _.RC:$src1),
2469                         (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2470                         imm:$cc),
2471                 (X86cmpm_su (_.VT _.RC:$src1),
2472                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2473                             imm:$cc)>,
2474                 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2476   // Patterns for selecting with loads in other operand.
2477   def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2478                      CommutableCMPCC:$cc),
2479             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2480                                                       imm:$cc)>;
2482   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2483                                             (_.VT _.RC:$src1),
2484                                             CommutableCMPCC:$cc)),
2485             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2486                                                        _.RC:$src1, addr:$src2,
2487                                                        imm:$cc)>;
2489   def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2490                      (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2491             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2492                                                        imm:$cc)>;
2494   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast
2495                                              (_.ScalarLdFrag addr:$src2)),
2496                                             (_.VT _.RC:$src1),
2497                                             CommutableCMPCC:$cc)),
2498             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2499                                                         _.RC:$src1, addr:$src2,
2500                                                         imm:$cc)>;
2503 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2504   // comparison code form (VCMP[EQ/LT/LE/...]
2505   defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2506                      (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2507                      "vcmp"#_.Suffix,
2508                      "$cc, {sae}, $src2, $src1",
2509                      "$src1, $src2, {sae}, $cc",
2510                      (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2511                      (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2512                                     imm:$cc)>,
2513                      EVEX_B, Sched<[sched]>;
2516 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2517   let Predicates = [HasAVX512] in {
2518     defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2519                 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2521   }
2522   let Predicates = [HasAVX512,HasVLX] in {
2523    defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2524    defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2525   }
2528 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2529                           AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2530 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2531                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2533 // Patterns to select fp compares with load as first operand.
2534 let Predicates = [HasAVX512] in {
2535   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2536                             CommutableCMPCC:$cc)),
2537             (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2539   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2540                             CommutableCMPCC:$cc)),
2541             (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2544 // ----------------------------------------------------------------
2545 // FPClass
2547 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2548                               (X86Vfpclasss node:$src1, node:$src2), [{
2549   return N->hasOneUse();
2550 }]>;
2552 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2553                              (X86Vfpclass node:$src1, node:$src2), [{
2554   return N->hasOneUse();
2555 }]>;
2557 //handle fpclass instruction  mask =  op(reg_scalar,imm)
2558 //                                    op(mem_scalar,imm)
2559 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2560                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2561                                  Predicate prd> {
2562   let Predicates = [prd], ExeDomain = _.ExeDomain in {
2563       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2564                       (ins _.RC:$src1, i32u8imm:$src2),
2565                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2566                       [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2567                               (i32 imm:$src2)))]>,
2568                       Sched<[sched]>;
2569       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2570                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2571                       OpcodeStr##_.Suffix#
2572                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2573                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2574                                       (X86Vfpclasss_su (_.VT _.RC:$src1),
2575                                       (i32 imm:$src2))))]>,
2576                       EVEX_K, Sched<[sched]>;
2577     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2578                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2579                     OpcodeStr##_.Suffix##
2580                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2581                     [(set _.KRC:$dst,
2582                           (X86Vfpclasss _.ScalarIntMemCPat:$src1,
2583                                        (i32 imm:$src2)))]>,
2584                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2585     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2586                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2587                     OpcodeStr##_.Suffix##
2588                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2589                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
2590                         (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
2591                             (i32 imm:$src2))))]>,
2592                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2593   }
2596 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2597 //                                  fpclass(reg_vec, mem_vec, imm)
2598 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
2599 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2600                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2601                                  string mem>{
2602   let ExeDomain = _.ExeDomain in {
2603   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2604                       (ins _.RC:$src1, i32u8imm:$src2),
2605                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2606                       [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2607                                        (i32 imm:$src2)))]>,
2608                       Sched<[sched]>;
2609   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2610                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2611                       OpcodeStr##_.Suffix#
2612                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2613                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2614                                        (X86Vfpclass_su (_.VT _.RC:$src1),
2615                                        (i32 imm:$src2))))]>,
2616                       EVEX_K, Sched<[sched]>;
2617   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2618                     (ins _.MemOp:$src1, i32u8imm:$src2),
2619                     OpcodeStr##_.Suffix#"{"#mem#"}"#
2620                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2621                     [(set _.KRC:$dst,(X86Vfpclass
2622                                      (_.VT (_.LdFrag addr:$src1)),
2623                                      (i32 imm:$src2)))]>,
2624                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2625   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2626                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2627                     OpcodeStr##_.Suffix#"{"#mem#"}"#
2628                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2629                     [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2630                                   (_.VT (_.LdFrag addr:$src1)),
2631                                   (i32 imm:$src2))))]>,
2632                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2633   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2634                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2635                     OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2636                                       _.BroadcastStr##", $dst|$dst, ${src1}"
2637                                                   ##_.BroadcastStr##", $src2}",
2638                     [(set _.KRC:$dst,(X86Vfpclass
2639                                      (_.VT (X86VBroadcast
2640                                            (_.ScalarLdFrag addr:$src1))),
2641                                      (i32 imm:$src2)))]>,
2642                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2643   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2644                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2645                     OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2646                           _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2647                                                    _.BroadcastStr##", $src2}",
2648                     [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2649                                      (_.VT (X86VBroadcast
2650                                            (_.ScalarLdFrag addr:$src1))),
2651                                      (i32 imm:$src2))))]>,
2652                     EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2653   }
2655   // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2656   // the memory form.
2657   def : InstAlias<OpcodeStr#_.Suffix#mem#
2658                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2659                   (!cast<Instruction>(NAME#"rr")
2660                    _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2661   def : InstAlias<OpcodeStr#_.Suffix#mem#
2662                   "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2663                   (!cast<Instruction>(NAME#"rrk")
2664                    _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2665   def : InstAlias<OpcodeStr#_.Suffix#mem#
2666                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2667                   _.BroadcastStr#", $src2}",
2668                   (!cast<Instruction>(NAME#"rmb")
2669                    _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2670   def : InstAlias<OpcodeStr#_.Suffix#mem#
2671                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2672                   "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2673                   (!cast<Instruction>(NAME#"rmbk")
2674                    _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2677 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2678                                      bits<8> opc, X86SchedWriteWidths sched,
2679                                      Predicate prd>{
2680   let Predicates = [prd] in {
2681     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2682                                       _.info512, "z">, EVEX_V512;
2683   }
2684   let Predicates = [prd, HasVLX] in {
2685     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2686                                       _.info128, "x">, EVEX_V128;
2687     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2688                                       _.info256, "y">, EVEX_V256;
2689   }
2692 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2693                                  bits<8> opcScalar, X86SchedWriteWidths sched,
2694                                  Predicate prd> {
2695   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2696                                       sched, prd>,
2697                                       EVEX_CD8<32, CD8VF>;
2698   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2699                                       sched, prd>,
2700                                       EVEX_CD8<64, CD8VF> , VEX_W;
2701   defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2702                                    sched.Scl, f32x_info, prd>, VEX_LIG,
2703                                    EVEX_CD8<32, CD8VT1>;
2704   defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2705                                    sched.Scl, f64x_info, prd>, VEX_LIG,
2706                                    EVEX_CD8<64, CD8VT1>, VEX_W;
2709 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2710                                       HasDQI>, AVX512AIi8Base, EVEX;
2712 //-----------------------------------------------------------------
2713 // Mask register copy, including
2714 // - copy between mask registers
2715 // - load/store mask registers
2716 // - copy from GPR to mask register and vice versa
2718 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2719                          string OpcodeStr, RegisterClass KRC,
2720                          ValueType vvt, X86MemOperand x86memop> {
2721   let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2722   def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2723              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2724              Sched<[WriteMove]>;
2725   def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2726              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2727              [(set KRC:$dst, (vvt (load addr:$src)))]>,
2728              Sched<[WriteLoad]>;
2729   def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2730              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2731              [(store KRC:$src, addr:$dst)]>,
2732              Sched<[WriteStore]>;
2735 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2736                              string OpcodeStr,
2737                              RegisterClass KRC, RegisterClass GRC> {
2738   let hasSideEffects = 0 in {
2739     def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2740                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2741                Sched<[WriteMove]>;
2742     def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2743                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2744                Sched<[WriteMove]>;
2745   }
2748 let Predicates = [HasDQI] in
2749   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2750                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2751                VEX, PD;
2753 let Predicates = [HasAVX512] in
2754   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2755                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2756                VEX, PS;
2758 let Predicates = [HasBWI] in {
2759   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2760                VEX, PD, VEX_W;
2761   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2762                VEX, XD;
2763   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2764                VEX, PS, VEX_W;
2765   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2766                VEX, XD, VEX_W;
2769 // GR from/to mask register
2770 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2771           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2772 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2773           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2775 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2776           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2777 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2778           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2780 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2781           (KMOVWrk VK16:$src)>;
2782 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2783           (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2784 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2785           (COPY_TO_REGCLASS VK16:$src, GR32)>;
2786 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2787           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2789 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2790           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2791 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2792           (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2793 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2794           (COPY_TO_REGCLASS VK8:$src, GR32)>;
2795 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2796           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2798 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2799           (COPY_TO_REGCLASS GR32:$src, VK32)>;
2800 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2801           (COPY_TO_REGCLASS VK32:$src, GR32)>;
2802 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2803           (COPY_TO_REGCLASS GR64:$src, VK64)>;
2804 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2805           (COPY_TO_REGCLASS VK64:$src, GR64)>;
2807 // Load/store kreg
2808 let Predicates = [HasDQI] in {
2809   def : Pat<(store VK1:$src, addr:$dst),
2810             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2812   def : Pat<(v1i1 (load addr:$src)),
2813             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2814   def : Pat<(v2i1 (load addr:$src)),
2815             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2816   def : Pat<(v4i1 (load addr:$src)),
2817             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2820 let Predicates = [HasAVX512] in {
2821   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2822             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2823   def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2824             (KMOVWkm addr:$src)>;
2827 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2828                          SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2829                                               SDTCVecEltisVT<1, i1>,
2830                                               SDTCisPtrTy<2>]>>;
2832 let Predicates = [HasAVX512] in {
2833   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2834     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2835               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2837     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2838               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2840     def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2841               (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2843     def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2844               (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2845   }
2847   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2848   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2849   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2850   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2851   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2852   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2853   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2855   def : Pat<(insert_subvector (v16i1 immAllZerosV),
2856                               (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2857             (COPY_TO_REGCLASS
2858              (KMOVWkr (AND32ri8
2859                        (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2860                        (i32 1))), VK16)>;
2863 // Mask unary operation
2864 // - KNOT
2865 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2866                             RegisterClass KRC, SDPatternOperator OpNode,
2867                             X86FoldableSchedWrite sched, Predicate prd> {
2868   let Predicates = [prd] in
2869     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2870                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2871                [(set KRC:$dst, (OpNode KRC:$src))]>,
2872                Sched<[sched]>;
2875 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2876                                 SDPatternOperator OpNode,
2877                                 X86FoldableSchedWrite sched> {
2878   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2879                             sched, HasDQI>, VEX, PD;
2880   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2881                             sched, HasAVX512>, VEX, PS;
2882   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2883                             sched, HasBWI>, VEX, PD, VEX_W;
2884   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2885                             sched, HasBWI>, VEX, PS, VEX_W;
2888 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2889 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2891 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2892 let Predicates = [HasAVX512, NoDQI] in
2893 def : Pat<(vnot VK8:$src),
2894           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2896 def : Pat<(vnot VK4:$src),
2897           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2898 def : Pat<(vnot VK2:$src),
2899           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2901 // Mask binary operation
2902 // - KAND, KANDN, KOR, KXNOR, KXOR
2903 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2904                            RegisterClass KRC, SDPatternOperator OpNode,
2905                            X86FoldableSchedWrite sched, Predicate prd,
2906                            bit IsCommutable> {
2907   let Predicates = [prd], isCommutable = IsCommutable in
2908     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2909                !strconcat(OpcodeStr,
2910                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2911                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2912                Sched<[sched]>;
2915 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2916                                  SDPatternOperator OpNode,
2917                                  X86FoldableSchedWrite sched, bit IsCommutable,
2918                                  Predicate prdW = HasAVX512> {
2919   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2920                              sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2921   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2922                              sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2923   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2924                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2925   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2926                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2929 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2930 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2931 // These nodes use 'vnot' instead of 'not' to support vectors.
2932 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2933 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2935 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2936 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
2937 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
2938 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
2939 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
2940 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
2941 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2943 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2944                             Instruction Inst> {
2945   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2946   // for the DQI set, this type is legal and KxxxB instruction is used
2947   let Predicates = [NoDQI] in
2948   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2949             (COPY_TO_REGCLASS
2950               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2951                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2953   // All types smaller than 8 bits require conversion anyway
2954   def : Pat<(OpNode VK1:$src1, VK1:$src2),
2955         (COPY_TO_REGCLASS (Inst
2956                            (COPY_TO_REGCLASS VK1:$src1, VK16),
2957                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2958   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2959         (COPY_TO_REGCLASS (Inst
2960                            (COPY_TO_REGCLASS VK2:$src1, VK16),
2961                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
2962   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2963         (COPY_TO_REGCLASS (Inst
2964                            (COPY_TO_REGCLASS VK4:$src1, VK16),
2965                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
2968 defm : avx512_binop_pat<and,   and,  KANDWrr>;
2969 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2970 defm : avx512_binop_pat<or,    or,   KORWrr>;
2971 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2972 defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
2974 // Mask unpacking
2975 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
2976                              X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
2977                              Predicate prd> {
2978   let Predicates = [prd] in {
2979     let hasSideEffects = 0 in
2980     def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
2981                (ins Src.KRC:$src1, Src.KRC:$src2),
2982                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2983                VEX_4V, VEX_L, Sched<[sched]>;
2985     def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
2986               (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
2987   }
2990 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
2991 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
2992 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
2994 // Mask bit testing
2995 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2996                               SDNode OpNode, X86FoldableSchedWrite sched,
2997                               Predicate prd> {
2998   let Predicates = [prd], Defs = [EFLAGS] in
2999     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3000                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3001                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3002                Sched<[sched]>;
3005 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3006                                 X86FoldableSchedWrite sched,
3007                                 Predicate prdW = HasAVX512> {
3008   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3009                                                                 VEX, PD;
3010   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3011                                                                 VEX, PS;
3012   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3013                                                                 VEX, PS, VEX_W;
3014   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3015                                                                 VEX, PD, VEX_W;
3018 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3019 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3020 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3022 // Mask shift
3023 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3024                                SDNode OpNode, X86FoldableSchedWrite sched> {
3025   let Predicates = [HasAVX512] in
3026     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3027                  !strconcat(OpcodeStr,
3028                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3029                             [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3030                  Sched<[sched]>;
3033 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3034                                  SDNode OpNode, X86FoldableSchedWrite sched> {
3035   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3036                                sched>, VEX, TAPD, VEX_W;
3037   let Predicates = [HasDQI] in
3038   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3039                                sched>, VEX, TAPD;
3040   let Predicates = [HasBWI] in {
3041   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3042                                sched>, VEX, TAPD, VEX_W;
3043   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3044                                sched>, VEX, TAPD;
3045   }
3048 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3049 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3051 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3052 multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3053                                               string InstStr,
3054                                               X86VectorVTInfo Narrow,
3055                                               X86VectorVTInfo Wide> {
3056   def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3057                               (Narrow.VT Narrow.RC:$src2))),
3058           (COPY_TO_REGCLASS
3059            (!cast<Instruction>(InstStr#"Zrr")
3060             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3061             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3062            Narrow.KRC)>;
3064   def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3065                              (Frag_su (Narrow.VT Narrow.RC:$src1),
3066                                       (Narrow.VT Narrow.RC:$src2)))),
3067           (COPY_TO_REGCLASS
3068            (!cast<Instruction>(InstStr#"Zrrk")
3069             (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3070             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3071             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3072            Narrow.KRC)>;
3075 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3076 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3077                                                  string InstStr,
3078                                                  X86VectorVTInfo Narrow,
3079                                                  X86VectorVTInfo Wide> {
3080 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3081                                 (Narrow.VT Narrow.RC:$src2), cond)),
3082           (COPY_TO_REGCLASS
3083            (!cast<Instruction>(InstStr##Zrri)
3084             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3085             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3086             (Frag.OperandTransform $cc)), Narrow.KRC)>;
3088 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3089                            (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3090                                                     (Narrow.VT Narrow.RC:$src2),
3091                                                     cond)))),
3092           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3093            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3094            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3095            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3096            (Frag.OperandTransform $cc)), Narrow.KRC)>;
3099 // Same as above, but for fp types which don't use PatFrags.
3100 multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su,
3101                                                 string InstStr,
3102                                                 X86VectorVTInfo Narrow,
3103                                                 X86VectorVTInfo Wide> {
3104 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3105                               (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3106           (COPY_TO_REGCLASS
3107            (!cast<Instruction>(InstStr##Zrri)
3108             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3109             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3110             imm:$cc), Narrow.KRC)>;
3112 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3113                            (OpNode_su (Narrow.VT Narrow.RC:$src1),
3114                                       (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3115           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3116            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3117            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3118            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3119            imm:$cc), Narrow.KRC)>;
3122 let Predicates = [HasAVX512, NoVLX] in {
3123   // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3124   // increase the pattern complexity the way an immediate would.
3125   let AddedComplexity = 2 in {
3126   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
3127   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
3129   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
3130   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
3132   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3133   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3135   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3136   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3137   }
3139   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3140   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3142   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3143   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3145   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3146   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3148   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3149   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3151   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>;
3152   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>;
3153   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>;
3154   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>;
3157 let Predicates = [HasBWI, NoVLX] in {
3158   // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3159   // increase the pattern complexity the way an immediate would.
3160   let AddedComplexity = 2 in {
3161   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v32i8x_info, v64i8_info>;
3162   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v32i8x_info, v64i8_info>;
3164   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v16i8x_info, v64i8_info>;
3165   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v16i8x_info, v64i8_info>;
3167   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v16i16x_info, v32i16_info>;
3168   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v16i16x_info, v32i16_info>;
3170   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v8i16x_info, v32i16_info>;
3171   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v8i16x_info, v32i16_info>;
3172   }
3174   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3175   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3177   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3178   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3180   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3181   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3183   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3184   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3187 // Mask setting all 0s or 1s
3188 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3189   let Predicates = [HasAVX512] in
3190     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3191         SchedRW = [WriteZero] in
3192       def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3193                      [(set KRC:$dst, (VT Val))]>;
3196 multiclass avx512_mask_setop_w<PatFrag Val> {
3197   defm W : avx512_mask_setop<VK16, v16i1, Val>;
3198   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3199   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3202 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3203 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3205 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3206 let Predicates = [HasAVX512] in {
3207   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3208   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3209   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3210   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3211   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3212   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3213   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3214   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3217 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3218 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3219                                              RegisterClass RC, ValueType VT> {
3220   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3221             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3223   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3224             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3226 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3227 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3228 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3229 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3230 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3231 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3233 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3234 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3235 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3236 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3237 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3239 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3240 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3241 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3242 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3244 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3245 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3246 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3248 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3249 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3251 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3253 //===----------------------------------------------------------------------===//
3254 // AVX-512 - Aligned and unaligned load and store
3257 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3258                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3259                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3260                        bit NoRMPattern = 0,
3261                        SDPatternOperator SelectOprr = vselect> {
3262   let hasSideEffects = 0 in {
3263   let isMoveReg = 1 in
3264   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3265                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3266                     _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3267                     EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3268   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3269                       (ins _.KRCWM:$mask,  _.RC:$src),
3270                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3271                        "${dst} {${mask}} {z}, $src}"),
3272                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3273                                            (_.VT _.RC:$src),
3274                                            _.ImmAllZerosV)))], _.ExeDomain>,
3275                        EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3277   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3278   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3279                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3280                     !if(NoRMPattern, [],
3281                         [(set _.RC:$dst,
3282                           (_.VT (ld_frag addr:$src)))]),
3283                     _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3284                     EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3286   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3287     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3288                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3289                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3290                       "${dst} {${mask}}, $src1}"),
3291                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3292                                           (_.VT _.RC:$src1),
3293                                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3294                        EVEX, EVEX_K, Sched<[Sched.RR]>;
3295     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3296                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3297                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3298                       "${dst} {${mask}}, $src1}"),
3299                      [(set _.RC:$dst, (_.VT
3300                          (vselect _.KRCWM:$mask,
3301                           (_.VT (ld_frag addr:$src1)),
3302                            (_.VT _.RC:$src0))))], _.ExeDomain>,
3303                      EVEX, EVEX_K, Sched<[Sched.RM]>;
3304   }
3305   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3306                   (ins _.KRCWM:$mask, _.MemOp:$src),
3307                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3308                                 "${dst} {${mask}} {z}, $src}",
3309                   [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3310                     (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3311                   _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3312   }
3313   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3314             (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3316   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3317             (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3319   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3320             (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3321              _.KRCWM:$mask, addr:$ptr)>;
3324 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3325                                  AVX512VLVectorVTInfo _, Predicate prd,
3326                                  X86SchedWriteMoveLSWidths Sched,
3327                                  string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3328   let Predicates = [prd] in
3329   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3330                        _.info512.AlignedLdFrag, masked_load_aligned,
3331                        Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3333   let Predicates = [prd, HasVLX] in {
3334   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3335                           _.info256.AlignedLdFrag, masked_load_aligned,
3336                           Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3337   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3338                           _.info128.AlignedLdFrag, masked_load_aligned,
3339                           Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3340   }
3343 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3344                           AVX512VLVectorVTInfo _, Predicate prd,
3345                           X86SchedWriteMoveLSWidths Sched,
3346                           string EVEX2VEXOvrd, bit NoRMPattern = 0,
3347                           SDPatternOperator SelectOprr = vselect> {
3348   let Predicates = [prd] in
3349   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3350                        masked_load, Sched.ZMM, "",
3351                        NoRMPattern, SelectOprr>, EVEX_V512;
3353   let Predicates = [prd, HasVLX] in {
3354   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3355                          masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3356                          NoRMPattern, SelectOprr>, EVEX_V256;
3357   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3358                          masked_load, Sched.XMM, EVEX2VEXOvrd,
3359                          NoRMPattern, SelectOprr>, EVEX_V128;
3360   }
3363 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3364                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3365                         X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3366                         bit NoMRPattern = 0> {
3367   let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3368   let isMoveReg = 1 in
3369   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3370                          OpcodeStr # "\t{$src, $dst|$dst, $src}",
3371                          [], _.ExeDomain>, EVEX,
3372                          FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3373                          EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3374   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3375                          (ins _.KRCWM:$mask, _.RC:$src),
3376                          OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3377                          "${dst} {${mask}}, $src}",
3378                          [], _.ExeDomain>,  EVEX, EVEX_K,
3379                          FoldGenData<BaseName#_.ZSuffix#rrk>,
3380                          Sched<[Sched.RR]>;
3381   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3382                           (ins _.KRCWM:$mask, _.RC:$src),
3383                           OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3384                           "${dst} {${mask}} {z}, $src}",
3385                           [], _.ExeDomain>, EVEX, EVEX_KZ,
3386                           FoldGenData<BaseName#_.ZSuffix#rrkz>,
3387                           Sched<[Sched.RR]>;
3388   }
3390   let hasSideEffects = 0, mayStore = 1 in
3391   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3392                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3393                     !if(NoMRPattern, [],
3394                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3395                     _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3396                     EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3397   def mrk : AVX512PI<opc, MRMDestMem, (outs),
3398                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3399               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3400                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3401                NotMemoryFoldable;
3403   def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3404            (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3405                                                         _.KRCWM:$mask, _.RC:$src)>;
3407   def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3408                   (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3409                    _.RC:$dst, _.RC:$src), 0>;
3410   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3411                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3412                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3413   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3414                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3415                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3418 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3419                             AVX512VLVectorVTInfo _, Predicate prd,
3420                             X86SchedWriteMoveLSWidths Sched,
3421                             string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3422   let Predicates = [prd] in
3423   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3424                         masked_store, Sched.ZMM, "",
3425                         NoMRPattern>, EVEX_V512;
3426   let Predicates = [prd, HasVLX] in {
3427     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3428                              masked_store, Sched.YMM,
3429                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3430     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3431                              masked_store, Sched.XMM, EVEX2VEXOvrd,
3432                              NoMRPattern>, EVEX_V128;
3433   }
3436 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3437                                   AVX512VLVectorVTInfo _, Predicate prd,
3438                                   X86SchedWriteMoveLSWidths Sched,
3439                                   string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3440   let Predicates = [prd] in
3441   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3442                         masked_store_aligned, Sched.ZMM, "",
3443                         NoMRPattern>, EVEX_V512;
3445   let Predicates = [prd, HasVLX] in {
3446     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3447                              masked_store_aligned, Sched.YMM,
3448                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3449     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3450                              masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3451                              NoMRPattern>, EVEX_V128;
3452   }
3455 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3456                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3457                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3458                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3459                PS, EVEX_CD8<32, CD8VF>;
3461 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3462                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3463                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3464                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3465                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3467 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3468                               SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3469                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3470                                SchedWriteFMoveLS, "VMOVUPS">,
3471                                PS, EVEX_CD8<32, CD8VF>;
3473 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3474                               SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3475                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3476                                SchedWriteFMoveLS, "VMOVUPD">,
3477                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3479 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3480                                        HasAVX512, SchedWriteVecMoveLS,
3481                                        "VMOVDQA", 1>,
3482                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3483                                         HasAVX512, SchedWriteVecMoveLS,
3484                                         "VMOVDQA", 1>,
3485                  PD, EVEX_CD8<32, CD8VF>;
3487 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3488                                        HasAVX512, SchedWriteVecMoveLS,
3489                                        "VMOVDQA">,
3490                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3491                                         HasAVX512, SchedWriteVecMoveLS,
3492                                         "VMOVDQA">,
3493                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
3495 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3496                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3497                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3498                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3499                 XD, EVEX_CD8<8, CD8VF>;
3501 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3502                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3503                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3504                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3505                  XD, VEX_W, EVEX_CD8<16, CD8VF>;
3507 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3508                                 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3509                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3510                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3511                  XS, EVEX_CD8<32, CD8VF>;
3513 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3514                                 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3515                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3516                                  SchedWriteVecMoveLS, "VMOVDQU">,
3517                  XS, VEX_W, EVEX_CD8<64, CD8VF>;
3519 // Special instructions to help with spilling when we don't have VLX. We need
3520 // to load or store from a ZMM register instead. These are converted in
3521 // expandPostRAPseudos.
3522 let isReMaterializable = 1, canFoldAsLoad = 1,
3523     isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3524 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3525                             "", []>, Sched<[WriteFLoadX]>;
3526 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3527                             "", []>, Sched<[WriteFLoadY]>;
3528 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3529                             "", []>, Sched<[WriteFLoadX]>;
3530 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3531                             "", []>, Sched<[WriteFLoadY]>;
3534 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3535 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3536                             "", []>, Sched<[WriteFStoreX]>;
3537 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3538                             "", []>, Sched<[WriteFStoreY]>;
3539 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3540                             "", []>, Sched<[WriteFStoreX]>;
3541 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3542                             "", []>, Sched<[WriteFStoreY]>;
3545 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3546                           (v8i64 VR512:$src))),
3547    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3548                                               VK8), VR512:$src)>;
3550 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3551                            (v16i32 VR512:$src))),
3552                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3554 // These patterns exist to prevent the above patterns from introducing a second
3555 // mask inversion when one already exists.
3556 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3557                           (v8i64 immAllZerosV),
3558                           (v8i64 VR512:$src))),
3559                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3560 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3561                            (v16i32 immAllZerosV),
3562                            (v16i32 VR512:$src))),
3563                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3565 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3566                               X86VectorVTInfo Wide> {
3567  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3568                                Narrow.RC:$src1, Narrow.RC:$src0)),
3569            (EXTRACT_SUBREG
3570             (Wide.VT
3571              (!cast<Instruction>(InstrStr#"rrk")
3572               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3573               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3574               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3575             Narrow.SubRegIdx)>;
3577  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3578                                Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3579            (EXTRACT_SUBREG
3580             (Wide.VT
3581              (!cast<Instruction>(InstrStr#"rrkz")
3582               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3583               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3584             Narrow.SubRegIdx)>;
3587 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3588 // available. Use a 512-bit operation and extract.
3589 let Predicates = [HasAVX512, NoVLX] in {
3590   defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3591   defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3592   defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3593   defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3595   defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3596   defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3597   defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3598   defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3601 let Predicates = [HasBWI, NoVLX] in {
3602   defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3603   defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3605   defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3606   defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3609 let Predicates = [HasAVX512] in {
3610   // 512-bit load.
3611   def : Pat<(alignedloadv16i32 addr:$src),
3612             (VMOVDQA64Zrm addr:$src)>;
3613   def : Pat<(alignedloadv32i16 addr:$src),
3614             (VMOVDQA64Zrm addr:$src)>;
3615   def : Pat<(alignedloadv64i8 addr:$src),
3616             (VMOVDQA64Zrm addr:$src)>;
3617   def : Pat<(loadv16i32 addr:$src),
3618             (VMOVDQU64Zrm addr:$src)>;
3619   def : Pat<(loadv32i16 addr:$src),
3620             (VMOVDQU64Zrm addr:$src)>;
3621   def : Pat<(loadv64i8 addr:$src),
3622             (VMOVDQU64Zrm addr:$src)>;
3624   // 512-bit store.
3625   def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3626             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3627   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3628             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3629   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3630             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3631   def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3632             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3633   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3634             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3635   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3636             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3639 let Predicates = [HasVLX] in {
3640   // 128-bit load.
3641   def : Pat<(alignedloadv4i32 addr:$src),
3642             (VMOVDQA64Z128rm addr:$src)>;
3643   def : Pat<(alignedloadv8i16 addr:$src),
3644             (VMOVDQA64Z128rm addr:$src)>;
3645   def : Pat<(alignedloadv16i8 addr:$src),
3646             (VMOVDQA64Z128rm addr:$src)>;
3647   def : Pat<(loadv4i32 addr:$src),
3648             (VMOVDQU64Z128rm addr:$src)>;
3649   def : Pat<(loadv8i16 addr:$src),
3650             (VMOVDQU64Z128rm addr:$src)>;
3651   def : Pat<(loadv16i8 addr:$src),
3652             (VMOVDQU64Z128rm addr:$src)>;
3654   // 128-bit store.
3655   def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3656             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3657   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3658             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3659   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3660             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3661   def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3662             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3663   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3664             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3665   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3666             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3668   // 256-bit load.
3669   def : Pat<(alignedloadv8i32 addr:$src),
3670             (VMOVDQA64Z256rm addr:$src)>;
3671   def : Pat<(alignedloadv16i16 addr:$src),
3672             (VMOVDQA64Z256rm addr:$src)>;
3673   def : Pat<(alignedloadv32i8 addr:$src),
3674             (VMOVDQA64Z256rm addr:$src)>;
3675   def : Pat<(loadv8i32 addr:$src),
3676             (VMOVDQU64Z256rm addr:$src)>;
3677   def : Pat<(loadv16i16 addr:$src),
3678             (VMOVDQU64Z256rm addr:$src)>;
3679   def : Pat<(loadv32i8 addr:$src),
3680             (VMOVDQU64Z256rm addr:$src)>;
3682   // 256-bit store.
3683   def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3684             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3685   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3686             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3687   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3688             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3689   def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3690             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3691   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3692             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3693   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3694             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3697 // Move Int Doubleword to Packed Double Int
3699 let ExeDomain = SSEPackedInt in {
3700 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3701                       "vmovd\t{$src, $dst|$dst, $src}",
3702                       [(set VR128X:$dst,
3703                         (v4i32 (scalar_to_vector GR32:$src)))]>,
3704                         EVEX, Sched<[WriteVecMoveFromGpr]>;
3705 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3706                       "vmovd\t{$src, $dst|$dst, $src}",
3707                       [(set VR128X:$dst,
3708                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3709                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3710 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3711                       "vmovq\t{$src, $dst|$dst, $src}",
3712                         [(set VR128X:$dst,
3713                           (v2i64 (scalar_to_vector GR64:$src)))]>,
3714                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3715 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3716 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3717                       (ins i64mem:$src),
3718                       "vmovq\t{$src, $dst|$dst, $src}", []>,
3719                       EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3720 let isCodeGenOnly = 1 in {
3721 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3722                        "vmovq\t{$src, $dst|$dst, $src}",
3723                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3724                        EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3725 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3726                          "vmovq\t{$src, $dst|$dst, $src}",
3727                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3728                          EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3730 } // ExeDomain = SSEPackedInt
3732 // Move Int Doubleword to Single Scalar
3734 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3735 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3736                       "vmovd\t{$src, $dst|$dst, $src}",
3737                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3738                       EVEX, Sched<[WriteVecMoveFromGpr]>;
3739 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3741 // Move doubleword from xmm register to r/m32
3743 let ExeDomain = SSEPackedInt in {
3744 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3745                        "vmovd\t{$src, $dst|$dst, $src}",
3746                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3747                                         (iPTR 0)))]>,
3748                        EVEX, Sched<[WriteVecMoveToGpr]>;
3749 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3750                        (ins i32mem:$dst, VR128X:$src),
3751                        "vmovd\t{$src, $dst|$dst, $src}",
3752                        [(store (i32 (extractelt (v4i32 VR128X:$src),
3753                                      (iPTR 0))), addr:$dst)]>,
3754                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3755 } // ExeDomain = SSEPackedInt
3757 // Move quadword from xmm1 register to r/m64
3759 let ExeDomain = SSEPackedInt in {
3760 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3761                       "vmovq\t{$src, $dst|$dst, $src}",
3762                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3763                                                    (iPTR 0)))]>,
3764                       PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3765                       Requires<[HasAVX512]>;
3767 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3768 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3769                       "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3770                       EVEX, VEX_W, Sched<[WriteVecStore]>,
3771                       Requires<[HasAVX512, In64BitMode]>;
3773 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3774                       (ins i64mem:$dst, VR128X:$src),
3775                       "vmovq\t{$src, $dst|$dst, $src}",
3776                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3777                               addr:$dst)]>,
3778                       EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3779                       Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3781 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3782 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3783                              (ins VR128X:$src),
3784                              "vmovq\t{$src, $dst|$dst, $src}", []>,
3785                              EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3786 } // ExeDomain = SSEPackedInt
3788 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3789                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3791 let Predicates = [HasAVX512] in {
3792   def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3793             (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3796 // Move Scalar Single to Double Int
3798 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3799 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3800                       (ins FR32X:$src),
3801                       "vmovd\t{$src, $dst|$dst, $src}",
3802                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3803                       EVEX, Sched<[WriteVecMoveToGpr]>;
3804 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3806 // Move Quadword Int to Packed Quadword Int
3808 let ExeDomain = SSEPackedInt in {
3809 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3810                       (ins i64mem:$src),
3811                       "vmovq\t{$src, $dst|$dst, $src}",
3812                       [(set VR128X:$dst,
3813                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3814                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3815 } // ExeDomain = SSEPackedInt
3817 // Allow "vmovd" but print "vmovq".
3818 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3819                 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3820 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3821                 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3823 //===----------------------------------------------------------------------===//
3824 // AVX-512  MOVSS, MOVSD
3825 //===----------------------------------------------------------------------===//
3827 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3828                               X86VectorVTInfo _> {
3829   let Predicates = [HasAVX512, OptForSize] in
3830   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3831              (ins _.RC:$src1, _.RC:$src2),
3832              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3833              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3834              _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3835   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3836               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3837               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3838               "$dst {${mask}} {z}, $src1, $src2}"),
3839               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3840                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3841                                       _.ImmAllZerosV)))],
3842               _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3843   let Constraints = "$src0 = $dst"  in
3844   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3845              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3846              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3847              "$dst {${mask}}, $src1, $src2}"),
3848              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3849                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3850                                      (_.VT _.RC:$src0))))],
3851              _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3852   let canFoldAsLoad = 1, isReMaterializable = 1 in {
3853   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3854              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3855              [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3856              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3857   // _alt version uses FR32/FR64 register class.
3858   let isCodeGenOnly = 1 in
3859   def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3860                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3861                  [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3862                  _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3863   }
3864   let mayLoad = 1, hasSideEffects = 0 in {
3865     let Constraints = "$src0 = $dst" in
3866     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3867                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3868                !strconcat(asm, "\t{$src, $dst {${mask}}|",
3869                "$dst {${mask}}, $src}"),
3870                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3871     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3872                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3873                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3874                "$dst {${mask}} {z}, $src}"),
3875                [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3876   }
3877   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3878              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3879              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3880              EVEX, Sched<[WriteFStore]>;
3881   let mayStore = 1, hasSideEffects = 0 in
3882   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3883               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3884               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3885               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3886               NotMemoryFoldable;
3889 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3890                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3892 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3893                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3896 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3897                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
3899 def : Pat<(_.VT (OpNode _.RC:$src0,
3900                         (_.VT (scalar_to_vector
3901                                   (_.EltVT (X86selects VK1WM:$mask,
3902                                                        (_.EltVT _.FRC:$src1),
3903                                                        (_.EltVT _.FRC:$src2))))))),
3904           (!cast<Instruction>(InstrStr#rrk)
3905                         (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3906                         VK1WM:$mask,
3907                         (_.VT _.RC:$src0),
3908                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3910 def : Pat<(_.VT (OpNode _.RC:$src0,
3911                         (_.VT (scalar_to_vector
3912                                   (_.EltVT (X86selects VK1WM:$mask,
3913                                                        (_.EltVT _.FRC:$src1),
3914                                                        (_.EltVT ZeroFP))))))),
3915           (!cast<Instruction>(InstrStr#rrkz)
3916                         VK1WM:$mask,
3917                         (_.VT _.RC:$src0),
3918                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3921 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3922                                         dag Mask, RegisterClass MaskRC> {
3924 def : Pat<(masked_store
3925              (_.info512.VT (insert_subvector undef,
3926                                (_.info128.VT _.info128.RC:$src),
3927                                (iPTR 0))), addr:$dst, Mask),
3928           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3929                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3930                       _.info128.RC:$src)>;
3934 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3935                                                AVX512VLVectorVTInfo _,
3936                                                dag Mask, RegisterClass MaskRC,
3937                                                SubRegIndex subreg> {
3939 def : Pat<(masked_store
3940              (_.info512.VT (insert_subvector undef,
3941                                (_.info128.VT _.info128.RC:$src),
3942                                (iPTR 0))), addr:$dst, Mask),
3943           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3944                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3945                       _.info128.RC:$src)>;
3949 // This matches the more recent codegen from clang that avoids emitting a 512
3950 // bit masked store directly. Codegen will widen 128-bit masked store to 512
3951 // bits on AVX512F only targets.
3952 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
3953                                                AVX512VLVectorVTInfo _,
3954                                                dag Mask512, dag Mask128,
3955                                                RegisterClass MaskRC,
3956                                                SubRegIndex subreg> {
3958 // AVX512F pattern.
3959 def : Pat<(masked_store
3960              (_.info512.VT (insert_subvector undef,
3961                                (_.info128.VT _.info128.RC:$src),
3962                                (iPTR 0))), addr:$dst, Mask512),
3963           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3964                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3965                       _.info128.RC:$src)>;
3967 // AVX512VL pattern.
3968 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
3969           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3970                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3971                       _.info128.RC:$src)>;
3974 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3975                                        dag Mask, RegisterClass MaskRC> {
3977 def : Pat<(_.info128.VT (extract_subvector
3978                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
3979                                         _.info512.ImmAllZerosV)),
3980                            (iPTR 0))),
3981           (!cast<Instruction>(InstrStr#rmkz)
3982                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3983                       addr:$srcAddr)>;
3985 def : Pat<(_.info128.VT (extract_subvector
3986                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3987                       (_.info512.VT (insert_subvector undef,
3988                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3989                             (iPTR 0))))),
3990                 (iPTR 0))),
3991           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3992                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3993                       addr:$srcAddr)>;
3997 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3998                                               AVX512VLVectorVTInfo _,
3999                                               dag Mask, RegisterClass MaskRC,
4000                                               SubRegIndex subreg> {
4002 def : Pat<(_.info128.VT (extract_subvector
4003                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4004                                         _.info512.ImmAllZerosV)),
4005                            (iPTR 0))),
4006           (!cast<Instruction>(InstrStr#rmkz)
4007                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4008                       addr:$srcAddr)>;
4010 def : Pat<(_.info128.VT (extract_subvector
4011                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4012                       (_.info512.VT (insert_subvector undef,
4013                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4014                             (iPTR 0))))),
4015                 (iPTR 0))),
4016           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4017                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4018                       addr:$srcAddr)>;
4022 // This matches the more recent codegen from clang that avoids emitting a 512
4023 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4024 // bits on AVX512F only targets.
4025 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4026                                               AVX512VLVectorVTInfo _,
4027                                               dag Mask512, dag Mask128,
4028                                               RegisterClass MaskRC,
4029                                               SubRegIndex subreg> {
4030 // AVX512F patterns.
4031 def : Pat<(_.info128.VT (extract_subvector
4032                          (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4033                                         _.info512.ImmAllZerosV)),
4034                            (iPTR 0))),
4035           (!cast<Instruction>(InstrStr#rmkz)
4036                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4037                       addr:$srcAddr)>;
4039 def : Pat<(_.info128.VT (extract_subvector
4040                 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4041                       (_.info512.VT (insert_subvector undef,
4042                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4043                             (iPTR 0))))),
4044                 (iPTR 0))),
4045           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4046                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4047                       addr:$srcAddr)>;
4049 // AVX512Vl patterns.
4050 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4051                          _.info128.ImmAllZerosV)),
4052           (!cast<Instruction>(InstrStr#rmkz)
4053                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4054                       addr:$srcAddr)>;
4056 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4057                          (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4058           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4059                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4060                       addr:$srcAddr)>;
4063 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4064 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4066 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4067                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4068 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4069                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4070 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4071                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4073 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4074                    (v16i1 (insert_subvector
4075                            (v16i1 immAllZerosV),
4076                            (v4i1 (extract_subvector
4077                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4078                                   (iPTR 0))),
4079                            (iPTR 0))),
4080                    (v4i1 (extract_subvector
4081                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4082                           (iPTR 0))), GR8, sub_8bit>;
4083 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4084                    (v8i1
4085                     (extract_subvector
4086                      (v16i1
4087                       (insert_subvector
4088                        (v16i1 immAllZerosV),
4089                        (v2i1 (extract_subvector
4090                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4091                               (iPTR 0))),
4092                        (iPTR 0))),
4093                      (iPTR 0))),
4094                    (v2i1 (extract_subvector
4095                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4096                           (iPTR 0))), GR8, sub_8bit>;
4098 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4099                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4100 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4101                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4102 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4103                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4105 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4106                    (v16i1 (insert_subvector
4107                            (v16i1 immAllZerosV),
4108                            (v4i1 (extract_subvector
4109                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4110                                   (iPTR 0))),
4111                            (iPTR 0))),
4112                    (v4i1 (extract_subvector
4113                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4114                           (iPTR 0))), GR8, sub_8bit>;
4115 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4116                    (v8i1
4117                     (extract_subvector
4118                      (v16i1
4119                       (insert_subvector
4120                        (v16i1 immAllZerosV),
4121                        (v2i1 (extract_subvector
4122                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4123                               (iPTR 0))),
4124                        (iPTR 0))),
4125                      (iPTR 0))),
4126                    (v2i1 (extract_subvector
4127                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4128                           (iPTR 0))), GR8, sub_8bit>;
4130 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4131           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4132            (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4133            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4134            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4136 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4137           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4138            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4140 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4141           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4142            (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4143            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4144            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4146 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
4147           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4148            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4150 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4151   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4152                            (ins VR128X:$src1, VR128X:$src2),
4153                            "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4154                            []>, XS, EVEX_4V, VEX_LIG,
4155                            FoldGenData<"VMOVSSZrr">,
4156                            Sched<[SchedWriteFShuffle.XMM]>;
4158   let Constraints = "$src0 = $dst" in
4159   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4160                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4161                                                    VR128X:$src1, VR128X:$src2),
4162                              "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4163                                         "$dst {${mask}}, $src1, $src2}",
4164                              []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4165                              FoldGenData<"VMOVSSZrrk">,
4166                              Sched<[SchedWriteFShuffle.XMM]>;
4168   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4169                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4170                          "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4171                                     "$dst {${mask}} {z}, $src1, $src2}",
4172                          []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4173                          FoldGenData<"VMOVSSZrrkz">,
4174                          Sched<[SchedWriteFShuffle.XMM]>;
4176   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4177                            (ins VR128X:$src1, VR128X:$src2),
4178                            "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4179                            []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4180                            FoldGenData<"VMOVSDZrr">,
4181                            Sched<[SchedWriteFShuffle.XMM]>;
4183   let Constraints = "$src0 = $dst" in
4184   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4185                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4186                                                    VR128X:$src1, VR128X:$src2),
4187                              "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4188                                         "$dst {${mask}}, $src1, $src2}",
4189                              []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4190                              VEX_W, FoldGenData<"VMOVSDZrrk">,
4191                              Sched<[SchedWriteFShuffle.XMM]>;
4193   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4194                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4195                                                           VR128X:$src2),
4196                               "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4197                                          "$dst {${mask}} {z}, $src1, $src2}",
4198                               []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4199                               VEX_W, FoldGenData<"VMOVSDZrrkz">,
4200                               Sched<[SchedWriteFShuffle.XMM]>;
4203 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4204                 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4205 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4206                              "$dst {${mask}}, $src1, $src2}",
4207                 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4208                                 VR128X:$src1, VR128X:$src2), 0>;
4209 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4210                              "$dst {${mask}} {z}, $src1, $src2}",
4211                 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4212                                  VR128X:$src1, VR128X:$src2), 0>;
4213 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4214                 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4215 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4216                              "$dst {${mask}}, $src1, $src2}",
4217                 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4218                                 VR128X:$src1, VR128X:$src2), 0>;
4219 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4220                              "$dst {${mask}} {z}, $src1, $src2}",
4221                 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4222                                  VR128X:$src1, VR128X:$src2), 0>;
4224 let Predicates = [HasAVX512, OptForSize] in {
4225   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4226             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4227   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4228             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4230   // Move low f32 and clear high bits.
4231   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4232             (SUBREG_TO_REG (i32 0),
4233              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4234               (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4235   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4236             (SUBREG_TO_REG (i32 0),
4237              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4238               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4240   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4241             (SUBREG_TO_REG (i32 0),
4242              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4243               (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4244   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4245             (SUBREG_TO_REG (i32 0),
4246              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4247               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4250 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4251 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4252 let Predicates = [HasAVX512, OptForSpeed] in {
4253   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4254             (SUBREG_TO_REG (i32 0),
4255              (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4256                           (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4257                           (i8 1))), sub_xmm)>;
4258   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4259             (SUBREG_TO_REG (i32 0),
4260              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4261                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4262                           (i8 3))), sub_xmm)>;
4265 let Predicates = [HasAVX512] in {
4266   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4267             (VMOVSSZrm addr:$src)>;
4268   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4269             (VMOVSDZrm addr:$src)>;
4271   // Represent the same patterns above but in the form they appear for
4272   // 256-bit types
4273   def : Pat<(v8f32 (X86vzload32 addr:$src)),
4274             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4275   def : Pat<(v4f64 (X86vzload64 addr:$src)),
4276             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4278   // Represent the same patterns above but in the form they appear for
4279   // 512-bit types
4280   def : Pat<(v16f32 (X86vzload32 addr:$src)),
4281             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4282   def : Pat<(v8f64 (X86vzload64 addr:$src)),
4283             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4286 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4287 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4288                                 (ins VR128X:$src),
4289                                 "vmovq\t{$src, $dst|$dst, $src}",
4290                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
4291                                                    (v2i64 VR128X:$src))))]>,
4292                                 EVEX, VEX_W;
4295 let Predicates = [HasAVX512] in {
4296   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4297             (VMOVDI2PDIZrr GR32:$src)>;
4299   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4300             (VMOV64toPQIZrr GR64:$src)>;
4302   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4303   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4304             (VMOVDI2PDIZrm addr:$src)>;
4305   def : Pat<(v4i32 (X86vzload32 addr:$src)),
4306             (VMOVDI2PDIZrm addr:$src)>;
4307   def : Pat<(v8i32 (X86vzload32 addr:$src)),
4308             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4309   def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4310             (VMOVZPQILo2PQIZrr VR128X:$src)>;
4311   def : Pat<(v2i64 (X86vzload64 addr:$src)),
4312             (VMOVQI2PQIZrm addr:$src)>;
4313   def : Pat<(v4i64 (X86vzload64 addr:$src)),
4314             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4316   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4317   def : Pat<(v16i32 (X86vzload32 addr:$src)),
4318             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4319   def : Pat<(v8i64 (X86vzload64 addr:$src)),
4320             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4322   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4323             (SUBREG_TO_REG (i32 0),
4324              (v2f64 (VMOVZPQILo2PQIZrr
4325                      (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4326              sub_xmm)>;
4327   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4328             (SUBREG_TO_REG (i32 0),
4329              (v2i64 (VMOVZPQILo2PQIZrr
4330                      (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4331              sub_xmm)>;
4333   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4334             (SUBREG_TO_REG (i32 0),
4335              (v2f64 (VMOVZPQILo2PQIZrr
4336                      (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4337              sub_xmm)>;
4338   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4339             (SUBREG_TO_REG (i32 0),
4340              (v2i64 (VMOVZPQILo2PQIZrr
4341                      (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4342              sub_xmm)>;
4345 //===----------------------------------------------------------------------===//
4346 // AVX-512 - Non-temporals
4347 //===----------------------------------------------------------------------===//
4349 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4350                       (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4351                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4352                       EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4354 let Predicates = [HasVLX] in {
4355   def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4356                        (ins i256mem:$src),
4357                        "vmovntdqa\t{$src, $dst|$dst, $src}",
4358                        [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4359                        EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4361   def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4362                       (ins i128mem:$src),
4363                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4364                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4365                       EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4368 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4369                         X86SchedWriteMoveLS Sched,
4370                         PatFrag st_frag = alignednontemporalstore> {
4371   let SchedRW = [Sched.MR], AddedComplexity = 400 in
4372   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4373                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4374                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
4375                     _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4378 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4379                            AVX512VLVectorVTInfo VTInfo,
4380                            X86SchedWriteMoveLSWidths Sched> {
4381   let Predicates = [HasAVX512] in
4382     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4384   let Predicates = [HasAVX512, HasVLX] in {
4385     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4386     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4387   }
4390 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4391                                 SchedWriteVecMoveLSNT>, PD;
4392 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4393                                 SchedWriteFMoveLSNT>, PD, VEX_W;
4394 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4395                                 SchedWriteFMoveLSNT>, PS;
4397 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4398   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4399             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4400   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4401             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4402   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4403             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4405   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4406             (VMOVNTDQAZrm addr:$src)>;
4407   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4408             (VMOVNTDQAZrm addr:$src)>;
4409   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4410             (VMOVNTDQAZrm addr:$src)>;
4411   def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4412             (VMOVNTDQAZrm addr:$src)>;
4413   def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4414             (VMOVNTDQAZrm addr:$src)>;
4415   def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4416             (VMOVNTDQAZrm addr:$src)>;
4419 let Predicates = [HasVLX], AddedComplexity = 400 in {
4420   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4421             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4422   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4423             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4424   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4425             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4427   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4428             (VMOVNTDQAZ256rm addr:$src)>;
4429   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4430             (VMOVNTDQAZ256rm addr:$src)>;
4431   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4432             (VMOVNTDQAZ256rm addr:$src)>;
4433   def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4434             (VMOVNTDQAZ256rm addr:$src)>;
4435   def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4436             (VMOVNTDQAZ256rm addr:$src)>;
4437   def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4438             (VMOVNTDQAZ256rm addr:$src)>;
4440   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4441             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4442   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4443             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4444   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4445             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4447   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4448             (VMOVNTDQAZ128rm addr:$src)>;
4449   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4450             (VMOVNTDQAZ128rm addr:$src)>;
4451   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4452             (VMOVNTDQAZ128rm addr:$src)>;
4453   def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4454             (VMOVNTDQAZ128rm addr:$src)>;
4455   def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4456             (VMOVNTDQAZ128rm addr:$src)>;
4457   def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4458             (VMOVNTDQAZ128rm addr:$src)>;
4461 //===----------------------------------------------------------------------===//
4462 // AVX-512 - Integer arithmetic
4464 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4465                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4466                            bit IsCommutable = 0> {
4467   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4468                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4469                     "$src2, $src1", "$src1, $src2",
4470                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4471                     IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4472                     Sched<[sched]>;
4474   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4475                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4476                   "$src2, $src1", "$src1, $src2",
4477                   (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4478                   AVX512BIBase, EVEX_4V,
4479                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4482 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4483                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
4484                             bit IsCommutable = 0> :
4485            avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4486   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4487                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4488                   "${src2}"##_.BroadcastStr##", $src1",
4489                   "$src1, ${src2}"##_.BroadcastStr,
4490                   (_.VT (OpNode _.RC:$src1,
4491                                 (X86VBroadcast
4492                                     (_.ScalarLdFrag addr:$src2))))>,
4493                   AVX512BIBase, EVEX_4V, EVEX_B,
4494                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4497 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4498                               AVX512VLVectorVTInfo VTInfo,
4499                               X86SchedWriteWidths sched, Predicate prd,
4500                               bit IsCommutable = 0> {
4501   let Predicates = [prd] in
4502     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4503                              IsCommutable>, EVEX_V512;
4505   let Predicates = [prd, HasVLX] in {
4506     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4507                                 sched.YMM, IsCommutable>, EVEX_V256;
4508     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4509                                 sched.XMM, IsCommutable>, EVEX_V128;
4510   }
4513 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4514                                AVX512VLVectorVTInfo VTInfo,
4515                                X86SchedWriteWidths sched, Predicate prd,
4516                                bit IsCommutable = 0> {
4517   let Predicates = [prd] in
4518     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4519                              IsCommutable>, EVEX_V512;
4521   let Predicates = [prd, HasVLX] in {
4522     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4523                                  sched.YMM, IsCommutable>, EVEX_V256;
4524     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4525                                  sched.XMM, IsCommutable>, EVEX_V128;
4526   }
4529 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4530                                 X86SchedWriteWidths sched, Predicate prd,
4531                                 bit IsCommutable = 0> {
4532   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4533                                   sched, prd, IsCommutable>,
4534                                   VEX_W, EVEX_CD8<64, CD8VF>;
4537 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4538                                 X86SchedWriteWidths sched, Predicate prd,
4539                                 bit IsCommutable = 0> {
4540   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4541                                   sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4544 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4545                                 X86SchedWriteWidths sched, Predicate prd,
4546                                 bit IsCommutable = 0> {
4547   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4548                                  sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4549                                  VEX_WIG;
4552 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4553                                 X86SchedWriteWidths sched, Predicate prd,
4554                                 bit IsCommutable = 0> {
4555   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4556                                  sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4557                                  VEX_WIG;
4560 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4561                                  SDNode OpNode, X86SchedWriteWidths sched,
4562                                  Predicate prd, bit IsCommutable = 0> {
4563   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4564                                    IsCommutable>;
4566   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4567                                    IsCommutable>;
4570 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4571                                  SDNode OpNode, X86SchedWriteWidths sched,
4572                                  Predicate prd, bit IsCommutable = 0> {
4573   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4574                                    IsCommutable>;
4576   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4577                                    IsCommutable>;
4580 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4581                                   bits<8> opc_d, bits<8> opc_q,
4582                                   string OpcodeStr, SDNode OpNode,
4583                                   X86SchedWriteWidths sched,
4584                                   bit IsCommutable = 0> {
4585   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4586                                     sched, HasAVX512, IsCommutable>,
4587               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4588                                     sched, HasBWI, IsCommutable>;
4591 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4592                             X86FoldableSchedWrite sched,
4593                             SDNode OpNode,X86VectorVTInfo _Src,
4594                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4595                             bit IsCommutable = 0> {
4596   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4597                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4598                             "$src2, $src1","$src1, $src2",
4599                             (_Dst.VT (OpNode
4600                                          (_Src.VT _Src.RC:$src1),
4601                                          (_Src.VT _Src.RC:$src2))),
4602                             IsCommutable>,
4603                             AVX512BIBase, EVEX_4V, Sched<[sched]>;
4604   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4605                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4606                         "$src2, $src1", "$src1, $src2",
4607                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4608                                       (_Src.LdFrag addr:$src2)))>,
4609                         AVX512BIBase, EVEX_4V,
4610                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4612   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4613                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4614                     OpcodeStr,
4615                     "${src2}"##_Brdct.BroadcastStr##", $src1",
4616                      "$src1, ${src2}"##_Brdct.BroadcastStr,
4617                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4618                                  (_Brdct.VT (X86VBroadcast
4619                                           (_Brdct.ScalarLdFrag addr:$src2))))))>,
4620                     AVX512BIBase, EVEX_4V, EVEX_B,
4621                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4624 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4625                                     SchedWriteVecALU, 1>;
4626 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4627                                     SchedWriteVecALU, 0>;
4628 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4629                                     SchedWriteVecALU, HasBWI, 1>;
4630 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4631                                     SchedWriteVecALU, HasBWI, 0>;
4632 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4633                                      SchedWriteVecALU, HasBWI, 1>;
4634 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4635                                      SchedWriteVecALU, HasBWI, 0>;
4636 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4637                                     SchedWritePMULLD, HasAVX512, 1>, T8PD;
4638 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4639                                     SchedWriteVecIMul, HasBWI, 1>;
4640 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4641                                     SchedWriteVecIMul, HasDQI, 1>, T8PD,
4642                                     NotEVEX2VEXConvertible;
4643 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4644                                     HasBWI, 1>;
4645 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4646                                      HasBWI, 1>;
4647 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4648                                       SchedWriteVecIMul, HasBWI, 1>, T8PD;
4649 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4650                                    SchedWriteVecALU, HasBWI, 1>;
4651 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4652                                     SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4653 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4654                                      SchedWriteVecIMul, HasAVX512, 1>;
4656 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4657                             X86SchedWriteWidths sched,
4658                             AVX512VLVectorVTInfo _SrcVTInfo,
4659                             AVX512VLVectorVTInfo _DstVTInfo,
4660                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4661   let Predicates = [prd] in
4662     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4663                                  _SrcVTInfo.info512, _DstVTInfo.info512,
4664                                  v8i64_info, IsCommutable>,
4665                                   EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4666   let Predicates = [HasVLX, prd] in {
4667     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4668                                       _SrcVTInfo.info256, _DstVTInfo.info256,
4669                                       v4i64x_info, IsCommutable>,
4670                                       EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4671     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4672                                       _SrcVTInfo.info128, _DstVTInfo.info128,
4673                                       v2i64x_info, IsCommutable>,
4674                                      EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4675   }
4678 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4679                                 avx512vl_i8_info, avx512vl_i8_info,
4680                                 X86multishift, HasVBMI, 0>, T8PD;
4682 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4683                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4684                             X86FoldableSchedWrite sched> {
4685   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4686                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4687                     OpcodeStr,
4688                     "${src2}"##_Src.BroadcastStr##", $src1",
4689                      "$src1, ${src2}"##_Src.BroadcastStr,
4690                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4691                                  (_Src.VT (X86VBroadcast
4692                                           (_Src.ScalarLdFrag addr:$src2))))))>,
4693                     EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4694                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4697 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4698                             SDNode OpNode,X86VectorVTInfo _Src,
4699                             X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4700                             bit IsCommutable = 0> {
4701   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4702                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4703                             "$src2, $src1","$src1, $src2",
4704                             (_Dst.VT (OpNode
4705                                          (_Src.VT _Src.RC:$src1),
4706                                          (_Src.VT _Src.RC:$src2))),
4707                             IsCommutable, IsCommutable>,
4708                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4709   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4710                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4711                         "$src2, $src1", "$src1, $src2",
4712                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4713                                       (_Src.LdFrag addr:$src2)))>,
4714                          EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4715                          Sched<[sched.Folded, sched.ReadAfterFold]>;
4718 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4719                                     SDNode OpNode> {
4720   let Predicates = [HasBWI] in
4721   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4722                                  v32i16_info, SchedWriteShuffle.ZMM>,
4723                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4724                                  v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4725   let Predicates = [HasBWI, HasVLX] in {
4726     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4727                                      v16i16x_info, SchedWriteShuffle.YMM>,
4728                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4729                                       v16i16x_info, SchedWriteShuffle.YMM>,
4730                                       EVEX_V256;
4731     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4732                                      v8i16x_info, SchedWriteShuffle.XMM>,
4733                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4734                                       v8i16x_info, SchedWriteShuffle.XMM>,
4735                                       EVEX_V128;
4736   }
4738 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4739                             SDNode OpNode> {
4740   let Predicates = [HasBWI] in
4741   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4742                                 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4743   let Predicates = [HasBWI, HasVLX] in {
4744     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4745                                      v32i8x_info, SchedWriteShuffle.YMM>,
4746                                      EVEX_V256, VEX_WIG;
4747     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4748                                      v16i8x_info, SchedWriteShuffle.XMM>,
4749                                      EVEX_V128, VEX_WIG;
4750   }
4753 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4754                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
4755                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4756   let Predicates = [HasBWI] in
4757   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4758                                 _Dst.info512, SchedWriteVecIMul.ZMM,
4759                                 IsCommutable>, EVEX_V512;
4760   let Predicates = [HasBWI, HasVLX] in {
4761     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4762                                      _Dst.info256, SchedWriteVecIMul.YMM,
4763                                      IsCommutable>, EVEX_V256;
4764     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4765                                      _Dst.info128, SchedWriteVecIMul.XMM,
4766                                      IsCommutable>, EVEX_V128;
4767   }
4770 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4771 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4772 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4773 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4775 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4776                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4777 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4778                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4780 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4781                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4782 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4783                                     SchedWriteVecALU, HasBWI, 1>;
4784 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4785                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4786 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4787                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4788                                     NotEVEX2VEXConvertible;
4790 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4791                                     SchedWriteVecALU, HasBWI, 1>;
4792 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4793                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4794 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4795                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4796 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4797                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4798                                     NotEVEX2VEXConvertible;
4800 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4801                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4802 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4803                                     SchedWriteVecALU, HasBWI, 1>;
4804 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4805                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4806 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4807                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4808                                     NotEVEX2VEXConvertible;
4810 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4811                                     SchedWriteVecALU, HasBWI, 1>;
4812 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4813                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4814 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4815                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4816 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4817                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4818                                     NotEVEX2VEXConvertible;
4820 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4821 let Predicates = [HasDQI, NoVLX] in {
4822   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4823             (EXTRACT_SUBREG
4824                 (VPMULLQZrr
4825                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4826                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4827              sub_ymm)>;
4829   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4830             (EXTRACT_SUBREG
4831                 (VPMULLQZrr
4832                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4833                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4834              sub_xmm)>;
4837 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4838 let Predicates = [HasDQI, NoVLX] in {
4839   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4840             (EXTRACT_SUBREG
4841                 (VPMULLQZrr
4842                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4843                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4844              sub_ymm)>;
4846   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4847             (EXTRACT_SUBREG
4848                 (VPMULLQZrr
4849                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4850                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4851              sub_xmm)>;
4854 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4855   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4856             (EXTRACT_SUBREG
4857                 (Instr
4858                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4859                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4860              sub_ymm)>;
4862   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4863             (EXTRACT_SUBREG
4864                 (Instr
4865                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4866                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4867              sub_xmm)>;
4870 let Predicates = [HasAVX512, NoVLX] in {
4871   defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4872   defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4873   defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4874   defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4877 //===----------------------------------------------------------------------===//
4878 // AVX-512  Logical Instructions
4879 //===----------------------------------------------------------------------===//
4881 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
4882                                    SchedWriteVecLogic, HasAVX512, 1>;
4883 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
4884                                   SchedWriteVecLogic, HasAVX512, 1>;
4885 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
4886                                    SchedWriteVecLogic, HasAVX512, 1>;
4887 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
4888                                     SchedWriteVecLogic, HasAVX512>;
4890 let Predicates = [HasVLX] in {
4891   def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
4892             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
4893   def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
4894             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
4896   def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
4897             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
4898   def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
4899             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
4901   def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
4902             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
4903   def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
4904             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
4906   def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
4907             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
4908   def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
4909             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
4911   def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
4912             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
4913   def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
4914             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
4916   def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
4917             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
4918   def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
4919             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
4921   def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
4922             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
4923   def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
4924             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
4926   def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
4927             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
4928   def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
4929             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
4931   def : Pat<(and VR128X:$src1,
4932                  (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4933             (VPANDDZ128rmb VR128X:$src1, addr:$src2)>;
4934   def : Pat<(or VR128X:$src1,
4935                 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4936             (VPORDZ128rmb VR128X:$src1, addr:$src2)>;
4937   def : Pat<(xor VR128X:$src1,
4938                  (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4939             (VPXORDZ128rmb VR128X:$src1, addr:$src2)>;
4940   def : Pat<(X86andnp VR128X:$src1,
4941                       (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4942             (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>;
4944   def : Pat<(and VR128X:$src1,
4945                  (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4946             (VPANDQZ128rmb VR128X:$src1, addr:$src2)>;
4947   def : Pat<(or VR128X:$src1,
4948                 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4949             (VPORQZ128rmb VR128X:$src1, addr:$src2)>;
4950   def : Pat<(xor VR128X:$src1,
4951                  (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4952             (VPXORQZ128rmb VR128X:$src1, addr:$src2)>;
4953   def : Pat<(X86andnp VR128X:$src1,
4954                       (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4955             (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>;
4957   def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
4958             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
4959   def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
4960             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
4962   def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
4963             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
4964   def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
4965             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
4967   def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
4968             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
4969   def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
4970             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
4972   def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
4973             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
4974   def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
4975             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
4977   def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
4978             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
4979   def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
4980             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
4982   def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
4983             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
4984   def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
4985             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
4987   def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
4988             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
4989   def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
4990             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
4992   def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
4993             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
4994   def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
4995             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
4997   def : Pat<(and VR256X:$src1,
4998                  (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
4999             (VPANDDZ256rmb VR256X:$src1, addr:$src2)>;
5000   def : Pat<(or VR256X:$src1,
5001                 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5002             (VPORDZ256rmb VR256X:$src1, addr:$src2)>;
5003   def : Pat<(xor VR256X:$src1,
5004                  (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5005             (VPXORDZ256rmb VR256X:$src1, addr:$src2)>;
5006   def : Pat<(X86andnp VR256X:$src1,
5007                       (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5008             (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>;
5010   def : Pat<(and VR256X:$src1,
5011                  (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5012             (VPANDQZ256rmb VR256X:$src1, addr:$src2)>;
5013   def : Pat<(or VR256X:$src1,
5014                 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5015             (VPORQZ256rmb VR256X:$src1, addr:$src2)>;
5016   def : Pat<(xor VR256X:$src1,
5017                  (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5018             (VPXORQZ256rmb VR256X:$src1, addr:$src2)>;
5019   def : Pat<(X86andnp VR256X:$src1,
5020                       (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5021             (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>;
5024 let Predicates = [HasAVX512] in {
5025   def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5026             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5027   def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5028             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5030   def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5031             (VPORQZrr VR512:$src1, VR512:$src2)>;
5032   def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5033             (VPORQZrr VR512:$src1, VR512:$src2)>;
5035   def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5036             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5037   def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5038             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5040   def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5041             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5042   def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5043             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5045   def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5046             (VPANDQZrm VR512:$src1, addr:$src2)>;
5047   def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5048             (VPANDQZrm VR512:$src1, addr:$src2)>;
5050   def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5051             (VPORQZrm VR512:$src1, addr:$src2)>;
5052   def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5053             (VPORQZrm VR512:$src1, addr:$src2)>;
5055   def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5056             (VPXORQZrm VR512:$src1, addr:$src2)>;
5057   def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5058             (VPXORQZrm VR512:$src1, addr:$src2)>;
5060   def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5061             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5062   def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5063             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5065   def : Pat<(and VR512:$src1,
5066                  (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5067             (VPANDDZrmb VR512:$src1, addr:$src2)>;
5068   def : Pat<(or VR512:$src1,
5069                 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5070             (VPORDZrmb VR512:$src1, addr:$src2)>;
5071   def : Pat<(xor VR512:$src1,
5072                  (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5073             (VPXORDZrmb VR512:$src1, addr:$src2)>;
5074   def : Pat<(X86andnp VR512:$src1,
5075                       (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5076             (VPANDNDZrmb VR512:$src1, addr:$src2)>;
5078   def : Pat<(and VR512:$src1,
5079                  (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5080             (VPANDQZrmb VR512:$src1, addr:$src2)>;
5081   def : Pat<(or VR512:$src1,
5082                 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5083             (VPORQZrmb VR512:$src1, addr:$src2)>;
5084   def : Pat<(xor VR512:$src1,
5085                  (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5086             (VPXORQZrmb VR512:$src1, addr:$src2)>;
5087   def : Pat<(X86andnp VR512:$src1,
5088                       (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5089             (VPANDNQZrmb VR512:$src1, addr:$src2)>;
5092 // Patterns to catch vselect with different type than logic op.
5093 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5094                                     X86VectorVTInfo _,
5095                                     X86VectorVTInfo IntInfo> {
5096   // Masked register-register logical operations.
5097   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5098                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5099                    _.RC:$src0)),
5100             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5101              _.RC:$src1, _.RC:$src2)>;
5103   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5104                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5105                    _.ImmAllZerosV)),
5106             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5107              _.RC:$src2)>;
5109   // Masked register-memory logical operations.
5110   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5111                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5112                                             (load addr:$src2)))),
5113                    _.RC:$src0)),
5114             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5115              _.RC:$src1, addr:$src2)>;
5116   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5117                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5118                                             (load addr:$src2)))),
5119                    _.ImmAllZerosV)),
5120             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5121              addr:$src2)>;
5124 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5125                                          X86VectorVTInfo _,
5126                                          X86VectorVTInfo IntInfo> {
5127   // Register-broadcast logical operations.
5128   def : Pat<(IntInfo.VT (OpNode _.RC:$src1,
5129                          (bitconvert (_.VT (X86VBroadcast
5130                                             (_.ScalarLdFrag addr:$src2)))))),
5131             (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5132   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5133                    (bitconvert
5134                     (IntInfo.VT (OpNode _.RC:$src1,
5135                                  (bitconvert (_.VT
5136                                               (X86VBroadcast
5137                                                (_.ScalarLdFrag addr:$src2))))))),
5138                    _.RC:$src0)),
5139             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5140              _.RC:$src1, addr:$src2)>;
5141   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5142                    (bitconvert
5143                     (IntInfo.VT (OpNode _.RC:$src1,
5144                                  (bitconvert (_.VT
5145                                               (X86VBroadcast
5146                                                (_.ScalarLdFrag addr:$src2))))))),
5147                    _.ImmAllZerosV)),
5148             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5149              _.RC:$src1, addr:$src2)>;
5152 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5153                                          AVX512VLVectorVTInfo SelectInfo,
5154                                          AVX512VLVectorVTInfo IntInfo> {
5155 let Predicates = [HasVLX] in {
5156   defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5157                                  IntInfo.info128>;
5158   defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5159                                  IntInfo.info256>;
5161 let Predicates = [HasAVX512] in {
5162   defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5163                                  IntInfo.info512>;
5167 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5168                                                AVX512VLVectorVTInfo SelectInfo,
5169                                                AVX512VLVectorVTInfo IntInfo> {
5170 let Predicates = [HasVLX] in {
5171   defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5172                                        SelectInfo.info128, IntInfo.info128>;
5173   defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5174                                        SelectInfo.info256, IntInfo.info256>;
5176 let Predicates = [HasAVX512] in {
5177   defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5178                                        SelectInfo.info512, IntInfo.info512>;
5182 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5183   // i64 vselect with i32/i16/i8 logic op
5184   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5185                                        avx512vl_i32_info>;
5186   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5187                                        avx512vl_i16_info>;
5188   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5189                                        avx512vl_i8_info>;
5191   // i32 vselect with i64/i16/i8 logic op
5192   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5193                                        avx512vl_i64_info>;
5194   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5195                                        avx512vl_i16_info>;
5196   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5197                                        avx512vl_i8_info>;
5199   // f32 vselect with i64/i32/i16/i8 logic op
5200   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5201                                        avx512vl_i64_info>;
5202   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5203                                        avx512vl_i32_info>;
5204   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5205                                        avx512vl_i16_info>;
5206   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5207                                        avx512vl_i8_info>;
5209   // f64 vselect with i64/i32/i16/i8 logic op
5210   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5211                                        avx512vl_i64_info>;
5212   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5213                                        avx512vl_i32_info>;
5214   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5215                                        avx512vl_i16_info>;
5216   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5217                                        avx512vl_i8_info>;
5219   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5220                                              avx512vl_f32_info,
5221                                              avx512vl_i32_info>;
5222   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5223                                              avx512vl_f64_info,
5224                                              avx512vl_i64_info>;
5227 defm : avx512_logical_lowering_types<"VPAND", and>;
5228 defm : avx512_logical_lowering_types<"VPOR",  or>;
5229 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5230 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5232 //===----------------------------------------------------------------------===//
5233 // AVX-512  FP arithmetic
5234 //===----------------------------------------------------------------------===//
5236 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5237                             SDNode OpNode, SDNode VecNode,
5238                             X86FoldableSchedWrite sched, bit IsCommutable> {
5239   let ExeDomain = _.ExeDomain in {
5240   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5241                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5242                            "$src2, $src1", "$src1, $src2",
5243                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5244                            Sched<[sched]>;
5246   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5247                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5248                          "$src2, $src1", "$src1, $src2",
5249                          (_.VT (VecNode _.RC:$src1,
5250                                         _.ScalarIntMemCPat:$src2))>,
5251                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5252   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5253   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5254                          (ins _.FRC:$src1, _.FRC:$src2),
5255                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5256                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5257                           Sched<[sched]> {
5258     let isCommutable = IsCommutable;
5259   }
5260   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5261                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5262                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5263                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5264                          (_.ScalarLdFrag addr:$src2)))]>,
5265                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5266   }
5267   }
5270 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5271                                   SDNode VecNode, X86FoldableSchedWrite sched,
5272                                   bit IsCommutable = 0> {
5273   let ExeDomain = _.ExeDomain in
5274   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5275                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5276                           "$rc, $src2, $src1", "$src1, $src2, $rc",
5277                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5278                           (i32 timm:$rc))>,
5279                           EVEX_B, EVEX_RC, Sched<[sched]>;
5281 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5282                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5283                                 X86FoldableSchedWrite sched, bit IsCommutable> {
5284   let ExeDomain = _.ExeDomain in {
5285   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5286                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5287                            "$src2, $src1", "$src1, $src2",
5288                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5289                            Sched<[sched]>;
5291   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5292                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5293                          "$src2, $src1", "$src1, $src2",
5294                          (_.VT (VecNode _.RC:$src1,
5295                                         _.ScalarIntMemCPat:$src2))>,
5296                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5298   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5299   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5300                          (ins _.FRC:$src1, _.FRC:$src2),
5301                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5302                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5303                           Sched<[sched]> {
5304     let isCommutable = IsCommutable;
5305   }
5306   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5307                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5308                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5309                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5310                          (_.ScalarLdFrag addr:$src2)))]>,
5311                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5312   }
5314   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5315                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5316                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5317                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5318                             EVEX_B, Sched<[sched]>;
5319   }
5322 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5323                                 SDNode VecNode, SDNode RndNode,
5324                                 X86SchedWriteSizes sched, bit IsCommutable> {
5325   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5326                               sched.PS.Scl, IsCommutable>,
5327              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5328                               sched.PS.Scl, IsCommutable>,
5329                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5330   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5331                               sched.PD.Scl, IsCommutable>,
5332              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5333                               sched.PD.Scl, IsCommutable>,
5334                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5337 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5338                               SDNode VecNode, SDNode SaeNode,
5339                               X86SchedWriteSizes sched, bit IsCommutable> {
5340   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5341                               VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5342                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5343   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5344                               VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5345                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5347 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
5348                                  SchedWriteFAddSizes, 1>;
5349 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
5350                                  SchedWriteFMulSizes, 1>;
5351 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
5352                                  SchedWriteFAddSizes, 0>;
5353 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
5354                                  SchedWriteFDivSizes, 0>;
5355 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5356                                SchedWriteFCmpSizes, 0>;
5357 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5358                                SchedWriteFCmpSizes, 0>;
5360 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5361 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5362 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5363                                     X86VectorVTInfo _, SDNode OpNode,
5364                                     X86FoldableSchedWrite sched> {
5365   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5366   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5367                          (ins _.FRC:$src1, _.FRC:$src2),
5368                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5369                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5370                           Sched<[sched]> {
5371     let isCommutable = 1;
5372   }
5373   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5374                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5375                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5376                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5377                          (_.ScalarLdFrag addr:$src2)))]>,
5378                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5379   }
5381 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5382                                          SchedWriteFCmp.Scl>, XS, EVEX_4V,
5383                                          VEX_LIG, EVEX_CD8<32, CD8VT1>;
5385 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5386                                          SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5387                                          VEX_LIG, EVEX_CD8<64, CD8VT1>;
5389 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5390                                          SchedWriteFCmp.Scl>, XS, EVEX_4V,
5391                                          VEX_LIG, EVEX_CD8<32, CD8VT1>;
5393 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5394                                          SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5395                                          VEX_LIG, EVEX_CD8<64, CD8VT1>;
5397 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5398                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
5399                             bit IsCommutable,
5400                             bit IsKCommutable = IsCommutable> {
5401   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5402   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5403                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5404                   "$src2, $src1", "$src1, $src2",
5405                   (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5406                   IsKCommutable, IsKCommutable>,
5407                   EVEX_4V, Sched<[sched]>;
5408   let mayLoad = 1 in {
5409     defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5410                     (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5411                     "$src2, $src1", "$src1, $src2",
5412                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5413                     EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5414     defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5415                      (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5416                      "${src2}"##_.BroadcastStr##", $src1",
5417                      "$src1, ${src2}"##_.BroadcastStr,
5418                      (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5419                                                 (_.ScalarLdFrag addr:$src2))))>,
5420                      EVEX_4V, EVEX_B,
5421                      Sched<[sched.Folded, sched.ReadAfterFold]>;
5422     }
5423   }
5426 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5427                                   SDPatternOperator OpNodeRnd,
5428                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5429   let ExeDomain = _.ExeDomain in
5430   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5431                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5432                   "$rc, $src2, $src1", "$src1, $src2, $rc",
5433                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5434                   EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5437 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5438                                 SDPatternOperator OpNodeSAE,
5439                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5440   let ExeDomain = _.ExeDomain in
5441   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5442                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5443                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5444                   (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5445                   EVEX_4V, EVEX_B, Sched<[sched]>;
5448 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5449                              Predicate prd, X86SchedWriteSizes sched,
5450                              bit IsCommutable = 0,
5451                              bit IsPD128Commutable = IsCommutable> {
5452   let Predicates = [prd] in {
5453   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5454                               sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5455                               EVEX_CD8<32, CD8VF>;
5456   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5457                               sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5458                               EVEX_CD8<64, CD8VF>;
5459   }
5461     // Define only if AVX512VL feature is present.
5462   let Predicates = [prd, HasVLX] in {
5463     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5464                                    sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5465                                    EVEX_CD8<32, CD8VF>;
5466     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5467                                    sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5468                                    EVEX_CD8<32, CD8VF>;
5469     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5470                                    sched.PD.XMM, IsPD128Commutable,
5471                                    IsCommutable>, EVEX_V128, PD, VEX_W,
5472                                    EVEX_CD8<64, CD8VF>;
5473     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5474                                    sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5475                                    EVEX_CD8<64, CD8VF>;
5476   }
5479 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5480                                    X86SchedWriteSizes sched> {
5481   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5482                                     v16f32_info>,
5483                                     EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5484   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5485                                     v8f64_info>,
5486                                     EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5489 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5490                                  X86SchedWriteSizes sched> {
5491   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5492                                   v16f32_info>,
5493                                   EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5494   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5495                                   v8f64_info>,
5496                                   EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5499 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5500                               SchedWriteFAddSizes, 1>,
5501             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5502 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5503                               SchedWriteFMulSizes, 1>,
5504             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5505 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5506                               SchedWriteFAddSizes>,
5507             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5508 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5509                               SchedWriteFDivSizes>,
5510             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5511 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5512                               SchedWriteFCmpSizes, 0>,
5513             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5514 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5515                               SchedWriteFCmpSizes, 0>,
5516             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5517 let isCodeGenOnly = 1 in {
5518   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5519                                  SchedWriteFCmpSizes, 1>;
5520   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5521                                  SchedWriteFCmpSizes, 1>;
5523 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5524                                SchedWriteFLogicSizes, 1>;
5525 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5526                                SchedWriteFLogicSizes, 0>;
5527 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5528                                SchedWriteFLogicSizes, 1>;
5529 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5530                                SchedWriteFLogicSizes, 1>;
5532 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5533                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5534   let ExeDomain = _.ExeDomain in {
5535   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5536                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5537                   "$src2, $src1", "$src1, $src2",
5538                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5539                   EVEX_4V, Sched<[sched]>;
5540   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5541                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5542                   "$src2, $src1", "$src1, $src2",
5543                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5544                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5545   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5546                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5547                    "${src2}"##_.BroadcastStr##", $src1",
5548                    "$src1, ${src2}"##_.BroadcastStr,
5549                    (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5550                                               (_.ScalarLdFrag addr:$src2))))>,
5551                    EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5552   }
5555 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5556                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5557   let ExeDomain = _.ExeDomain in {
5558   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5559                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5560                   "$src2, $src1", "$src1, $src2",
5561                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5562                   Sched<[sched]>;
5563   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5564                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5565                   "$src2, $src1", "$src1, $src2",
5566                   (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
5567                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5568   }
5571 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5572                                 X86SchedWriteWidths sched> {
5573   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5574              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5575                               EVEX_V512, EVEX_CD8<32, CD8VF>;
5576   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5577              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5578                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5579   defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5580              avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
5581                                     X86scalefsRnd, sched.Scl>,
5582                                     EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5583   defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5584              avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
5585                                     X86scalefsRnd, sched.Scl>,
5586                                     EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5588   // Define only if AVX512VL feature is present.
5589   let Predicates = [HasVLX] in {
5590     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5591                                    EVEX_V128, EVEX_CD8<32, CD8VF>;
5592     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5593                                    EVEX_V256, EVEX_CD8<32, CD8VF>;
5594     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5595                                    EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5596     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5597                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5598   }
5600 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5601                                     SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5603 //===----------------------------------------------------------------------===//
5604 // AVX-512  VPTESTM instructions
5605 //===----------------------------------------------------------------------===//
5607 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5608                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
5609                          string Name> {
5610   // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5611   // There are just too many permuations due to commutability and bitcasts.
5612   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5613   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5614                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5615                       "$src2, $src1", "$src1, $src2",
5616                    (null_frag), (null_frag), 1>,
5617                    EVEX_4V, Sched<[sched]>;
5618   let mayLoad = 1 in
5619   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5620                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5621                        "$src2, $src1", "$src1, $src2",
5622                    (null_frag), (null_frag)>,
5623                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5624                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5625   }
5628 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5629                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5630   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5631   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5632                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5633                     "${src2}"##_.BroadcastStr##", $src1",
5634                     "$src1, ${src2}"##_.BroadcastStr,
5635                     (null_frag), (null_frag)>,
5636                     EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5637                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5640 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5641                                   X86SchedWriteWidths sched,
5642                                   AVX512VLVectorVTInfo _> {
5643   let Predicates  = [HasAVX512] in
5644   defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5645            avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5647   let Predicates = [HasAVX512, HasVLX] in {
5648   defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5649               avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5650   defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5651               avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5652   }
5655 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5656                             X86SchedWriteWidths sched> {
5657   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5658                                  avx512vl_i32_info>;
5659   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5660                                  avx512vl_i64_info>, VEX_W;
5663 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5664                             X86SchedWriteWidths sched> {
5665   let Predicates = [HasBWI] in {
5666   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5667                             v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5668   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5669                             v64i8_info, NAME#"B">, EVEX_V512;
5670   }
5671   let Predicates = [HasVLX, HasBWI] in {
5673   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5674                             v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5675   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5676                             v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5677   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5678                             v32i8x_info, NAME#"B">, EVEX_V256;
5679   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5680                             v16i8x_info, NAME#"B">, EVEX_V128;
5681   }
5684 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5685                                    X86SchedWriteWidths sched> :
5686   avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5687   avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5689 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5690                                          SchedWriteVecLogic>, T8PD;
5691 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5692                                          SchedWriteVecLogic>, T8XS;
5694 //===----------------------------------------------------------------------===//
5695 // AVX-512  Shift instructions
5696 //===----------------------------------------------------------------------===//
5698 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5699                             string OpcodeStr, SDNode OpNode,
5700                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5701   let ExeDomain = _.ExeDomain in {
5702   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5703                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5704                       "$src2, $src1", "$src1, $src2",
5705                    (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
5706                    Sched<[sched]>;
5707   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5708                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5709                        "$src2, $src1", "$src1, $src2",
5710                    (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5711                           (i8 imm:$src2)))>,
5712                    Sched<[sched.Folded]>;
5713   }
5716 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5717                              string OpcodeStr, SDNode OpNode,
5718                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5719   let ExeDomain = _.ExeDomain in
5720   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5721                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5722       "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5723      (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
5724      EVEX_B, Sched<[sched.Folded]>;
5727 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5728                             X86FoldableSchedWrite sched, ValueType SrcVT,
5729                             X86VectorVTInfo _> {
5730    // src2 is always 128-bit
5731   let ExeDomain = _.ExeDomain in {
5732   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5733                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5734                       "$src2, $src1", "$src1, $src2",
5735                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5736                    AVX512BIBase, EVEX_4V, Sched<[sched]>;
5737   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5738                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5739                        "$src2, $src1", "$src1, $src2",
5740                    (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5741                    AVX512BIBase,
5742                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5743   }
5746 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5747                               X86SchedWriteWidths sched, ValueType SrcVT,
5748                               AVX512VLVectorVTInfo VTInfo,
5749                               Predicate prd> {
5750   let Predicates = [prd] in
5751   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5752                                VTInfo.info512>, EVEX_V512,
5753                                EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5754   let Predicates = [prd, HasVLX] in {
5755   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5756                                VTInfo.info256>, EVEX_V256,
5757                                EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5758   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5759                                VTInfo.info128>, EVEX_V128,
5760                                EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5761   }
5764 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5765                               string OpcodeStr, SDNode OpNode,
5766                               X86SchedWriteWidths sched,
5767                               bit NotEVEX2VEXConvertibleQ = 0> {
5768   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5769                               avx512vl_i32_info, HasAVX512>;
5770   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5771   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5772                               avx512vl_i64_info, HasAVX512>, VEX_W;
5773   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5774                               avx512vl_i16_info, HasBWI>;
5777 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5778                                   string OpcodeStr, SDNode OpNode,
5779                                   X86SchedWriteWidths sched,
5780                                   AVX512VLVectorVTInfo VTInfo> {
5781   let Predicates = [HasAVX512] in
5782   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5783                               sched.ZMM, VTInfo.info512>,
5784              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5785                                VTInfo.info512>, EVEX_V512;
5786   let Predicates = [HasAVX512, HasVLX] in {
5787   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5788                               sched.YMM, VTInfo.info256>,
5789              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5790                                VTInfo.info256>, EVEX_V256;
5791   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5792                               sched.XMM, VTInfo.info128>,
5793              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5794                                VTInfo.info128>, EVEX_V128;
5795   }
5798 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5799                               string OpcodeStr, SDNode OpNode,
5800                               X86SchedWriteWidths sched> {
5801   let Predicates = [HasBWI] in
5802   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5803                                sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5804   let Predicates = [HasVLX, HasBWI] in {
5805   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5806                                sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5807   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5808                                sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5809   }
5812 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5813                                Format ImmFormR, Format ImmFormM,
5814                                string OpcodeStr, SDNode OpNode,
5815                                X86SchedWriteWidths sched,
5816                                bit NotEVEX2VEXConvertibleQ = 0> {
5817   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5818                                  sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5819   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5820   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5821                                  sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5824 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5825                                  SchedWriteVecShiftImm>,
5826              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5827                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5829 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5830                                  SchedWriteVecShiftImm>,
5831              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5832                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5834 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5835                                  SchedWriteVecShiftImm, 1>,
5836              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5837                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5839 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5840                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5841 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5842                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5844 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5845                                 SchedWriteVecShift>;
5846 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5847                                 SchedWriteVecShift, 1>;
5848 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5849                                 SchedWriteVecShift>;
5851 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5852 let Predicates = [HasAVX512, NoVLX] in {
5853   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5854             (EXTRACT_SUBREG (v8i64
5855               (VPSRAQZrr
5856                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5857                  VR128X:$src2)), sub_ymm)>;
5859   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5860             (EXTRACT_SUBREG (v8i64
5861               (VPSRAQZrr
5862                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5863                  VR128X:$src2)), sub_xmm)>;
5865   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5866             (EXTRACT_SUBREG (v8i64
5867               (VPSRAQZri
5868                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5869                  imm:$src2)), sub_ymm)>;
5871   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5872             (EXTRACT_SUBREG (v8i64
5873               (VPSRAQZri
5874                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5875                  imm:$src2)), sub_xmm)>;
5878 //===-------------------------------------------------------------------===//
5879 // Variable Bit Shifts
5880 //===-------------------------------------------------------------------===//
5882 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5883                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5884   let ExeDomain = _.ExeDomain in {
5885   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5886                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5887                       "$src2, $src1", "$src1, $src2",
5888                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5889                    AVX5128IBase, EVEX_4V, Sched<[sched]>;
5890   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5891                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5892                        "$src2, $src1", "$src1, $src2",
5893                    (_.VT (OpNode _.RC:$src1,
5894                    (_.VT (_.LdFrag addr:$src2))))>,
5895                    AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5896                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5897   }
5900 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5901                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5902   let ExeDomain = _.ExeDomain in
5903   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5904                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5905                     "${src2}"##_.BroadcastStr##", $src1",
5906                     "$src1, ${src2}"##_.BroadcastStr,
5907                     (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5908                                                 (_.ScalarLdFrag addr:$src2)))))>,
5909                     AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5910                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5913 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5914                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5915   let Predicates  = [HasAVX512] in
5916   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5917            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5919   let Predicates = [HasAVX512, HasVLX] in {
5920   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5921               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5922   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5923               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5924   }
5927 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5928                                   SDNode OpNode, X86SchedWriteWidths sched> {
5929   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5930                                  avx512vl_i32_info>;
5931   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5932                                  avx512vl_i64_info>, VEX_W;
5935 // Use 512bit version to implement 128/256 bit in case NoVLX.
5936 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5937                                      SDNode OpNode, list<Predicate> p> {
5938   let Predicates = p in {
5939   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5940                                   (_.info256.VT _.info256.RC:$src2))),
5941             (EXTRACT_SUBREG
5942                 (!cast<Instruction>(OpcodeStr#"Zrr")
5943                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5944                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5945              sub_ymm)>;
5947   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5948                                   (_.info128.VT _.info128.RC:$src2))),
5949             (EXTRACT_SUBREG
5950                 (!cast<Instruction>(OpcodeStr#"Zrr")
5951                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5952                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5953              sub_xmm)>;
5954   }
5956 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
5957                               SDNode OpNode, X86SchedWriteWidths sched> {
5958   let Predicates = [HasBWI] in
5959   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
5960               EVEX_V512, VEX_W;
5961   let Predicates = [HasVLX, HasBWI] in {
5963   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
5964               EVEX_V256, VEX_W;
5965   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
5966               EVEX_V128, VEX_W;
5967   }
5970 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
5971               avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
5973 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
5974               avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
5976 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
5977               avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
5979 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
5980 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
5982 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
5983 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
5984 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
5985 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
5988 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5989 let Predicates = [HasAVX512, NoVLX] in {
5990   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5991             (EXTRACT_SUBREG (v8i64
5992               (VPROLVQZrr
5993                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5994                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5995                        sub_xmm)>;
5996   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5997             (EXTRACT_SUBREG (v8i64
5998               (VPROLVQZrr
5999                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6000                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6001                        sub_ymm)>;
6003   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6004             (EXTRACT_SUBREG (v16i32
6005               (VPROLVDZrr
6006                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6007                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6008                         sub_xmm)>;
6009   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6010             (EXTRACT_SUBREG (v16i32
6011               (VPROLVDZrr
6012                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6013                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6014                         sub_ymm)>;
6016   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6017             (EXTRACT_SUBREG (v8i64
6018               (VPROLQZri
6019                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6020                         imm:$src2)), sub_xmm)>;
6021   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6022             (EXTRACT_SUBREG (v8i64
6023               (VPROLQZri
6024                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6025                        imm:$src2)), sub_ymm)>;
6027   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6028             (EXTRACT_SUBREG (v16i32
6029               (VPROLDZri
6030                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6031                         imm:$src2)), sub_xmm)>;
6032   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6033             (EXTRACT_SUBREG (v16i32
6034               (VPROLDZri
6035                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6036                         imm:$src2)), sub_ymm)>;
6039 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6040 let Predicates = [HasAVX512, NoVLX] in {
6041   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6042             (EXTRACT_SUBREG (v8i64
6043               (VPRORVQZrr
6044                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6045                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6046                        sub_xmm)>;
6047   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6048             (EXTRACT_SUBREG (v8i64
6049               (VPRORVQZrr
6050                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6051                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6052                        sub_ymm)>;
6054   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6055             (EXTRACT_SUBREG (v16i32
6056               (VPRORVDZrr
6057                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6058                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6059                         sub_xmm)>;
6060   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6061             (EXTRACT_SUBREG (v16i32
6062               (VPRORVDZrr
6063                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6064                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6065                         sub_ymm)>;
6067   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6068             (EXTRACT_SUBREG (v8i64
6069               (VPRORQZri
6070                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6071                         imm:$src2)), sub_xmm)>;
6072   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6073             (EXTRACT_SUBREG (v8i64
6074               (VPRORQZri
6075                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6076                        imm:$src2)), sub_ymm)>;
6078   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6079             (EXTRACT_SUBREG (v16i32
6080               (VPRORDZri
6081                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6082                         imm:$src2)), sub_xmm)>;
6083   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6084             (EXTRACT_SUBREG (v16i32
6085               (VPRORDZri
6086                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6087                         imm:$src2)), sub_ymm)>;
6090 //===-------------------------------------------------------------------===//
6091 // 1-src variable permutation VPERMW/D/Q
6092 //===-------------------------------------------------------------------===//
6094 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6095                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6096   let Predicates  = [HasAVX512] in
6097   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6098            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6100   let Predicates = [HasAVX512, HasVLX] in
6101   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6102               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6105 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6106                                  string OpcodeStr, SDNode OpNode,
6107                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6108   let Predicates = [HasAVX512] in
6109   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6110                               sched, VTInfo.info512>,
6111              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6112                                sched, VTInfo.info512>, EVEX_V512;
6113   let Predicates = [HasAVX512, HasVLX] in
6114   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6115                               sched, VTInfo.info256>,
6116              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6117                                sched, VTInfo.info256>, EVEX_V256;
6120 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6121                               Predicate prd, SDNode OpNode,
6122                               X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6123   let Predicates = [prd] in
6124   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6125               EVEX_V512 ;
6126   let Predicates = [HasVLX, prd] in {
6127   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6128               EVEX_V256 ;
6129   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6130               EVEX_V128 ;
6131   }
6134 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6135                                WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6136 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6137                                WriteVarShuffle256, avx512vl_i8_info>;
6139 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6140                                     WriteVarShuffle256, avx512vl_i32_info>;
6141 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6142                                     WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6143 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6144                                      WriteFVarShuffle256, avx512vl_f32_info>;
6145 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6146                                      WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6148 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6149                              X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6150                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6151 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6152                              X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6153                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6155 //===----------------------------------------------------------------------===//
6156 // AVX-512 - VPERMIL
6157 //===----------------------------------------------------------------------===//
6159 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6160                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
6161                              X86VectorVTInfo Ctrl> {
6162   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6163                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6164                   "$src2, $src1", "$src1, $src2",
6165                   (_.VT (OpNode _.RC:$src1,
6166                                (Ctrl.VT Ctrl.RC:$src2)))>,
6167                   T8PD, EVEX_4V, Sched<[sched]>;
6168   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6169                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6170                   "$src2, $src1", "$src1, $src2",
6171                   (_.VT (OpNode
6172                            _.RC:$src1,
6173                            (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6174                   T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6175                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6176   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6177                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6178                    "${src2}"##_.BroadcastStr##", $src1",
6179                    "$src1, ${src2}"##_.BroadcastStr,
6180                    (_.VT (OpNode
6181                             _.RC:$src1,
6182                             (Ctrl.VT (X86VBroadcast
6183                                        (Ctrl.ScalarLdFrag addr:$src2)))))>,
6184                    T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6185                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6188 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6189                                     X86SchedWriteWidths sched,
6190                                     AVX512VLVectorVTInfo _,
6191                                     AVX512VLVectorVTInfo Ctrl> {
6192   let Predicates = [HasAVX512] in {
6193     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6194                                   _.info512, Ctrl.info512>, EVEX_V512;
6195   }
6196   let Predicates = [HasAVX512, HasVLX] in {
6197     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6198                                   _.info128, Ctrl.info128>, EVEX_V128;
6199     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6200                                   _.info256, Ctrl.info256>, EVEX_V256;
6201   }
6204 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6205                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6206   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6207                                       _, Ctrl>;
6208   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6209                                     X86VPermilpi, SchedWriteFShuffle, _>,
6210                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6213 let ExeDomain = SSEPackedSingle in
6214 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6215                                avx512vl_i32_info>;
6216 let ExeDomain = SSEPackedDouble in
6217 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6218                                avx512vl_i64_info>, VEX_W1X;
6220 //===----------------------------------------------------------------------===//
6221 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6222 //===----------------------------------------------------------------------===//
6224 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6225                              X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6226                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6227 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6228                                   X86PShufhw, SchedWriteShuffle>,
6229                                   EVEX, AVX512XSIi8Base;
6230 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6231                                   X86PShuflw, SchedWriteShuffle>,
6232                                   EVEX, AVX512XDIi8Base;
6234 //===----------------------------------------------------------------------===//
6235 // AVX-512 - VPSHUFB
6236 //===----------------------------------------------------------------------===//
6238 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6239                                X86SchedWriteWidths sched> {
6240   let Predicates = [HasBWI] in
6241   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6242                               EVEX_V512;
6244   let Predicates = [HasVLX, HasBWI] in {
6245   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6246                               EVEX_V256;
6247   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6248                               EVEX_V128;
6249   }
6252 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6253                                   SchedWriteVarShuffle>, VEX_WIG;
6255 //===----------------------------------------------------------------------===//
6256 // Move Low to High and High to Low packed FP Instructions
6257 //===----------------------------------------------------------------------===//
6259 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6260           (ins VR128X:$src1, VR128X:$src2),
6261           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6262           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6263           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6264 let isCommutable = 1 in
6265 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6266           (ins VR128X:$src1, VR128X:$src2),
6267           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6268           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6269           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6271 //===----------------------------------------------------------------------===//
6272 // VMOVHPS/PD VMOVLPS Instructions
6273 // All patterns was taken from SSS implementation.
6274 //===----------------------------------------------------------------------===//
6276 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6277                                   SDPatternOperator OpNode,
6278                                   X86VectorVTInfo _> {
6279   let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6280   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6281                   (ins _.RC:$src1, f64mem:$src2),
6282                   !strconcat(OpcodeStr,
6283                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6284                   [(set _.RC:$dst,
6285                      (OpNode _.RC:$src1,
6286                        (_.VT (bitconvert
6287                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6288                   Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6291 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6292 // SSE1. And MOVLPS pattern is even more complex.
6293 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6294                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6295 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6296                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6297 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6298                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6299 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6300                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6302 let Predicates = [HasAVX512] in {
6303   // VMOVHPD patterns
6304   def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6305                     (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6306            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6307   def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6308             (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6310   // VMOVLPD patterns
6311   def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6312             (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6315 let SchedRW = [WriteFStore] in {
6316 let mayStore = 1, hasSideEffects = 0 in
6317 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6318                        (ins f64mem:$dst, VR128X:$src),
6319                        "vmovhps\t{$src, $dst|$dst, $src}",
6320                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6321 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6322                        (ins f64mem:$dst, VR128X:$src),
6323                        "vmovhpd\t{$src, $dst|$dst, $src}",
6324                        [(store (f64 (extractelt
6325                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6326                                      (iPTR 0))), addr:$dst)]>,
6327                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6328 let mayStore = 1, hasSideEffects = 0 in
6329 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6330                        (ins f64mem:$dst, VR128X:$src),
6331                        "vmovlps\t{$src, $dst|$dst, $src}",
6332                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6333 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6334                        (ins f64mem:$dst, VR128X:$src),
6335                        "vmovlpd\t{$src, $dst|$dst, $src}",
6336                        [(store (f64 (extractelt (v2f64 VR128X:$src),
6337                                      (iPTR 0))), addr:$dst)]>,
6338                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6339 } // SchedRW
6341 let Predicates = [HasAVX512] in {
6342   // VMOVHPD patterns
6343   def : Pat<(store (f64 (extractelt
6344                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6345                            (iPTR 0))), addr:$dst),
6346            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6348 //===----------------------------------------------------------------------===//
6349 // FMA - Fused Multiply Operations
6352 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6353                                X86FoldableSchedWrite sched,
6354                                X86VectorVTInfo _, string Suff> {
6355   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6356   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6357           (ins _.RC:$src2, _.RC:$src3),
6358           OpcodeStr, "$src3, $src2", "$src2, $src3",
6359           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6360           AVX512FMA3Base, Sched<[sched]>;
6362   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6363           (ins _.RC:$src2, _.MemOp:$src3),
6364           OpcodeStr, "$src3, $src2", "$src2, $src3",
6365           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6366           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6368   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6369             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6370             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6371             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6372             (OpNode _.RC:$src2,
6373              _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6374              AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6375   }
6378 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6379                                  X86FoldableSchedWrite sched,
6380                                  X86VectorVTInfo _, string Suff> {
6381   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6382   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6383           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6384           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6385           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6386           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6389 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6390                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6391                                    AVX512VLVectorVTInfo _, string Suff> {
6392   let Predicates = [HasAVX512] in {
6393     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6394                                       _.info512, Suff>,
6395                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6396                                         _.info512, Suff>,
6397                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6398   }
6399   let Predicates = [HasVLX, HasAVX512] in {
6400     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6401                                     _.info256, Suff>,
6402                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6403     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6404                                     _.info128, Suff>,
6405                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6406   }
6409 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6410                               SDNode OpNodeRnd> {
6411     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6412                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6413     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6414                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6415                                       VEX_W;
6418 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6419 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6420 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6421 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6422 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6423 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6426 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6427                                X86FoldableSchedWrite sched,
6428                                X86VectorVTInfo _, string Suff> {
6429   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6430   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6431           (ins _.RC:$src2, _.RC:$src3),
6432           OpcodeStr, "$src3, $src2", "$src2, $src3",
6433           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6434           vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6436   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6437           (ins _.RC:$src2, _.MemOp:$src3),
6438           OpcodeStr, "$src3, $src2", "$src2, $src3",
6439           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6440           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6442   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6443          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6444          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6445          "$src2, ${src3}"##_.BroadcastStr,
6446          (_.VT (OpNode _.RC:$src2,
6447                       (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6448                       _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6449          Sched<[sched.Folded, sched.ReadAfterFold]>;
6450   }
6453 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6454                                  X86FoldableSchedWrite sched,
6455                                  X86VectorVTInfo _, string Suff> {
6456   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6457   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6458           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6459           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6460           (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6461           1, 1, vselect, 1>,
6462           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6465 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6466                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6467                                    AVX512VLVectorVTInfo _, string Suff> {
6468   let Predicates = [HasAVX512] in {
6469     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6470                                       _.info512, Suff>,
6471                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6472                                         _.info512, Suff>,
6473                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6474   }
6475   let Predicates = [HasVLX, HasAVX512] in {
6476     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6477                                     _.info256, Suff>,
6478                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6479     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6480                                     _.info128, Suff>,
6481                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6482   }
6485 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6486                               SDNode OpNodeRnd > {
6487     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6488                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6489     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6490                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6491                                       VEX_W;
6494 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6495 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6496 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6497 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6498 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6499 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6501 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6502                                X86FoldableSchedWrite sched,
6503                                X86VectorVTInfo _, string Suff> {
6504   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6505   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6506           (ins _.RC:$src2, _.RC:$src3),
6507           OpcodeStr, "$src3, $src2", "$src2, $src3",
6508           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6509           AVX512FMA3Base, Sched<[sched]>;
6511   // Pattern is 312 order so that the load is in a different place from the
6512   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6513   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6514           (ins _.RC:$src2, _.MemOp:$src3),
6515           OpcodeStr, "$src3, $src2", "$src2, $src3",
6516           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6517           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6519   // Pattern is 312 order so that the load is in a different place from the
6520   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6521   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6522          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6523          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6524          "$src2, ${src3}"##_.BroadcastStr,
6525          (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6526                        _.RC:$src1, _.RC:$src2)), 1, 0>,
6527          AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6528   }
6531 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6532                                  X86FoldableSchedWrite sched,
6533                                  X86VectorVTInfo _, string Suff> {
6534   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6535   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6536           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6537           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6538           (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6539           1, 1, vselect, 1>,
6540           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6543 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6544                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6545                                    AVX512VLVectorVTInfo _, string Suff> {
6546   let Predicates = [HasAVX512] in {
6547     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6548                                       _.info512, Suff>,
6549                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6550                                         _.info512, Suff>,
6551                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6552   }
6553   let Predicates = [HasVLX, HasAVX512] in {
6554     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6555                                     _.info256, Suff>,
6556                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6557     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6558                                     _.info128, Suff>,
6559                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6560   }
6563 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6564                               SDNode OpNodeRnd > {
6565     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6566                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6567     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6568                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6569                                       VEX_W;
6572 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6573 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6574 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6575 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6576 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6577 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6579 // Scalar FMA
6580 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6581                                dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6582 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6583   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6584           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6585           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6586           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6588   let mayLoad = 1 in
6589   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6590           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6591           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6592           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6594   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6595          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6596          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6597          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6599   let isCodeGenOnly = 1, isCommutable = 1 in {
6600     def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6601                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6602                      !strconcat(OpcodeStr,
6603                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6604                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6605     def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6606                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6607                     !strconcat(OpcodeStr,
6608                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6609                     [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6611     def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6612                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6613                      !strconcat(OpcodeStr,
6614                               "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6615                      !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6616                      Sched<[SchedWriteFMA.Scl]>;
6617   }// isCodeGenOnly = 1
6618 }// Constraints = "$src1 = $dst"
6621 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6622                             string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6623                             X86VectorVTInfo _, string SUFF> {
6624   let ExeDomain = _.ExeDomain in {
6625   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6626                 // Operands for intrinsic are in 123 order to preserve passthu
6627                 // semantics.
6628                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6629                          _.FRC:$src3))),
6630                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6631                          (_.ScalarLdFrag addr:$src3)))),
6632                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6633                          _.FRC:$src3, (i32 timm:$rc)))), 0>;
6635   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6636                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6637                                           _.FRC:$src1))),
6638                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6639                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6640                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6641                          _.FRC:$src1, (i32 timm:$rc)))), 1>;
6643   // One pattern is 312 order so that the load is in a different place from the
6644   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6645   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6646                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6647                          _.FRC:$src2))),
6648                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6649                                  _.FRC:$src1, _.FRC:$src2))),
6650                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6651                          _.FRC:$src2, (i32 timm:$rc)))), 1>;
6652   }
6655 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6656                         string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6657   let Predicates = [HasAVX512] in {
6658     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6659                                  OpNodeRnd, f32x_info, "SS">,
6660                                  EVEX_CD8<32, CD8VT1>, VEX_LIG;
6661     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6662                                  OpNodeRnd, f64x_info, "SD">,
6663                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6664   }
6667 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6668 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6669 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6670 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6672 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6673                                       string Suffix, SDNode Move,
6674                                       X86VectorVTInfo _, PatLeaf ZeroFP> {
6675   let Predicates = [HasAVX512] in {
6676     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6677                 (Op _.FRC:$src2,
6678                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6679                     _.FRC:$src3))))),
6680               (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6681                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6682                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6684     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6685                 (Op _.FRC:$src2, _.FRC:$src3,
6686                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6687               (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6688                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6689                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6691     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6692                 (Op _.FRC:$src2,
6693                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6694                     (_.ScalarLdFrag addr:$src3)))))),
6695               (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6696                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6697                addr:$src3)>;
6699     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6700                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6701                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6702               (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6703                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6704                addr:$src3)>;
6706     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6707                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6708                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6709               (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6710                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6711                addr:$src3)>;
6713     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6714                (X86selects VK1WM:$mask,
6715                 (Op _.FRC:$src2,
6716                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6717                     _.FRC:$src3),
6718                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6719               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6720                VR128X:$src1, VK1WM:$mask,
6721                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6722                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6724     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6725                (X86selects VK1WM:$mask,
6726                 (Op _.FRC:$src2,
6727                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6728                     (_.ScalarLdFrag addr:$src3)),
6729                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6730               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6731                VR128X:$src1, VK1WM:$mask,
6732                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6734     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6735                (X86selects VK1WM:$mask,
6736                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6737                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6738                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6739               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6740                VR128X:$src1, VK1WM:$mask,
6741                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6743     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6744                (X86selects VK1WM:$mask,
6745                 (Op _.FRC:$src2, _.FRC:$src3,
6746                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6747                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6748               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6749                VR128X:$src1, VK1WM:$mask,
6750                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6751                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6753     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6754                (X86selects VK1WM:$mask,
6755                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6756                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6757                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6758               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6759                VR128X:$src1, VK1WM:$mask,
6760                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6762     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6763                (X86selects VK1WM:$mask,
6764                 (Op _.FRC:$src2,
6765                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6766                     _.FRC:$src3),
6767                 (_.EltVT ZeroFP)))))),
6768               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6769                VR128X:$src1, VK1WM:$mask,
6770                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6771                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6773     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6774                (X86selects VK1WM:$mask,
6775                 (Op _.FRC:$src2, _.FRC:$src3,
6776                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6777                 (_.EltVT ZeroFP)))))),
6778               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6779                VR128X:$src1, VK1WM:$mask,
6780                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6781                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6783     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6784                (X86selects VK1WM:$mask,
6785                 (Op _.FRC:$src2,
6786                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6787                     (_.ScalarLdFrag addr:$src3)),
6788                 (_.EltVT ZeroFP)))))),
6789               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6790                VR128X:$src1, VK1WM:$mask,
6791                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6793     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6794                (X86selects VK1WM:$mask,
6795                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6796                     _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6797                 (_.EltVT ZeroFP)))))),
6798               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6799                VR128X:$src1, VK1WM:$mask,
6800                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6802     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6803                (X86selects VK1WM:$mask,
6804                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6805                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6806                 (_.EltVT ZeroFP)))))),
6807               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6808                VR128X:$src1, VK1WM:$mask,
6809                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6811     // Patterns with rounding mode.
6812     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6813                 (RndOp _.FRC:$src2,
6814                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6815                        _.FRC:$src3, (i32 timm:$rc)))))),
6816               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6817                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6818                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6820     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6821                 (RndOp _.FRC:$src2, _.FRC:$src3,
6822                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6823                        (i32 timm:$rc)))))),
6824               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6825                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6826                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6828     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6829                (X86selects VK1WM:$mask,
6830                 (RndOp _.FRC:$src2,
6831                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6832                        _.FRC:$src3, (i32 timm:$rc)),
6833                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6834               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6835                VR128X:$src1, VK1WM:$mask,
6836                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6837                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6839     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6840                (X86selects VK1WM:$mask,
6841                 (RndOp _.FRC:$src2, _.FRC:$src3,
6842                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6843                        (i32 timm:$rc)),
6844                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6845               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6846                VR128X:$src1, VK1WM:$mask,
6847                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6848                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6850     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6851                (X86selects VK1WM:$mask,
6852                 (RndOp _.FRC:$src2,
6853                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6854                        _.FRC:$src3, (i32 timm:$rc)),
6855                 (_.EltVT ZeroFP)))))),
6856               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6857                VR128X:$src1, VK1WM:$mask,
6858                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6859                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6861     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6862                (X86selects VK1WM:$mask,
6863                 (RndOp _.FRC:$src2, _.FRC:$src3,
6864                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6865                        (i32 timm:$rc)),
6866                 (_.EltVT ZeroFP)))))),
6867               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6868                VR128X:$src1, VK1WM:$mask,
6869                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6870                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6871   }
6874 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
6875                                   X86Movss, v4f32x_info, fp32imm0>;
6876 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6877                                   X86Movss, v4f32x_info, fp32imm0>;
6878 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6879                                   X86Movss, v4f32x_info, fp32imm0>;
6880 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6881                                   X86Movss, v4f32x_info, fp32imm0>;
6883 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
6884                                   X86Movsd, v2f64x_info, fp64imm0>;
6885 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6886                                   X86Movsd, v2f64x_info, fp64imm0>;
6887 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6888                                   X86Movsd, v2f64x_info, fp64imm0>;
6889 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6890                                   X86Movsd, v2f64x_info, fp64imm0>;
6892 //===----------------------------------------------------------------------===//
6893 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6894 //===----------------------------------------------------------------------===//
6895 let Constraints = "$src1 = $dst" in {
6896 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6897                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6898   // NOTE: The SDNode have the multiply operands first with the add last.
6899   // This enables commuted load patterns to be autogenerated by tablegen.
6900   let ExeDomain = _.ExeDomain in {
6901   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6902           (ins _.RC:$src2, _.RC:$src3),
6903           OpcodeStr, "$src3, $src2", "$src2, $src3",
6904           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6905          AVX512FMA3Base, Sched<[sched]>;
6907   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6908           (ins _.RC:$src2, _.MemOp:$src3),
6909           OpcodeStr, "$src3, $src2", "$src2, $src3",
6910           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6911           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6913   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6914             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6915             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6916             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6917             (OpNode _.RC:$src2,
6918                     (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
6919                     _.RC:$src1)>,
6920             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6921   }
6923 } // Constraints = "$src1 = $dst"
6925 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6926                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6927   let Predicates = [HasIFMA] in {
6928     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6929                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6930   }
6931   let Predicates = [HasVLX, HasIFMA] in {
6932     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6933                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6934     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6935                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6936   }
6939 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6940                                          SchedWriteVecIMul, avx512vl_i64_info>,
6941                                          VEX_W;
6942 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6943                                          SchedWriteVecIMul, avx512vl_i64_info>,
6944                                          VEX_W;
6946 //===----------------------------------------------------------------------===//
6947 // AVX-512  Scalar convert from sign integer to float/double
6948 //===----------------------------------------------------------------------===//
6950 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
6951                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
6952                     X86MemOperand x86memop, PatFrag ld_frag, string asm,
6953                     string mem> {
6954   let hasSideEffects = 0, isCodeGenOnly = 1 in {
6955     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6956               (ins DstVT.FRC:$src1, SrcRC:$src),
6957               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
6958               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
6959     let mayLoad = 1 in
6960       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6961               (ins DstVT.FRC:$src1, x86memop:$src),
6962               asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
6963               EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6964   } // hasSideEffects = 0
6965   def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6966                 (ins DstVT.RC:$src1, SrcRC:$src2),
6967                 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6968                 [(set DstVT.RC:$dst,
6969                       (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
6970                EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
6972   def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6973                 (ins DstVT.RC:$src1, x86memop:$src2),
6974                 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6975                 [(set DstVT.RC:$dst,
6976                       (OpNode (DstVT.VT DstVT.RC:$src1),
6977                                (ld_frag addr:$src2)))]>,
6978                 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6979   def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6980                   (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
6981                   DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
6984 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
6985                                X86FoldableSchedWrite sched, RegisterClass SrcRC,
6986                                X86VectorVTInfo DstVT, string asm,
6987                                string mem> {
6988   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6989               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
6990               !strconcat(asm,
6991                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
6992               [(set DstVT.RC:$dst,
6993                     (OpNode (DstVT.VT DstVT.RC:$src1),
6994                              SrcRC:$src2,
6995                              (i32 timm:$rc)))]>,
6996               EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
6997   def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
6998                   (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
6999                   DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7002 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7003                                 X86FoldableSchedWrite sched,
7004                                 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7005                                 X86MemOperand x86memop, PatFrag ld_frag,
7006                                 string asm, string mem> {
7007   defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7008               avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7009                             ld_frag, asm, mem>, VEX_LIG;
7012 let Predicates = [HasAVX512] in {
7013 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7014                                  WriteCvtI2SS, GR32,
7015                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7016                                  XS, EVEX_CD8<32, CD8VT1>;
7017 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7018                                  WriteCvtI2SS, GR64,
7019                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7020                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7021 defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7022                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
7023                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7024 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7025                                  WriteCvtI2SD, GR64,
7026                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7027                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7029 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7030               (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7031 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7032               (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7034 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7035           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7036 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7037           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7038 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7039           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7040 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7041           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7043 def : Pat<(f32 (sint_to_fp GR32:$src)),
7044           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7045 def : Pat<(f32 (sint_to_fp GR64:$src)),
7046           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7047 def : Pat<(f64 (sint_to_fp GR32:$src)),
7048           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7049 def : Pat<(f64 (sint_to_fp GR64:$src)),
7050           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7052 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7053                                   WriteCvtI2SS, GR32,
7054                                   v4f32x_info, i32mem, loadi32,
7055                                   "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7056 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7057                                   WriteCvtI2SS, GR64,
7058                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7059                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7060 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7061                                   i32mem, loadi32, "cvtusi2sd", "l">,
7062                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7063 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7064                                   WriteCvtI2SD, GR64,
7065                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7066                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7068 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7069               (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7070 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7071               (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7073 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7074           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7075 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7076           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7077 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7078           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7079 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7080           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7082 def : Pat<(f32 (uint_to_fp GR32:$src)),
7083           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7084 def : Pat<(f32 (uint_to_fp GR64:$src)),
7085           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7086 def : Pat<(f64 (uint_to_fp GR32:$src)),
7087           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7088 def : Pat<(f64 (uint_to_fp GR64:$src)),
7089           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7092 //===----------------------------------------------------------------------===//
7093 // AVX-512  Scalar convert from float/double to integer
7094 //===----------------------------------------------------------------------===//
7096 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7097                                   X86VectorVTInfo DstVT, SDNode OpNode,
7098                                   SDNode OpNodeRnd,
7099                                   X86FoldableSchedWrite sched, string asm,
7100                                   string aliasStr> {
7101   let Predicates = [HasAVX512] in {
7102     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7103                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7104                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7105                 EVEX, VEX_LIG, Sched<[sched]>;
7106     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7107                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7108                  [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7109                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7110                  Sched<[sched]>;
7111     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7112                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7113                 [(set DstVT.RC:$dst, (OpNode
7114                       (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7115                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7116   } // Predicates = [HasAVX512]
7118   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7119           (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7120   def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7121           (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7122   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7123           (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7124                                           SrcVT.IntScalarMemOp:$src), 0, "att">;
7127 // Convert float/double to signed/unsigned int 32/64
7128 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7129                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7130                                    XS, EVEX_CD8<32, CD8VT1>;
7131 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7132                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7133                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7134 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7135                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7136                                    XS, EVEX_CD8<32, CD8VT1>;
7137 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7138                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7139                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7140 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7141                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7142                                    XD, EVEX_CD8<64, CD8VT1>;
7143 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7144                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7145                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7146 defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7147                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7148                                    XD, EVEX_CD8<64, CD8VT1>;
7149 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7150                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7151                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7153 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7154 // which produce unnecessary vmovs{s,d} instructions
7155 let Predicates = [HasAVX512] in {
7156 def : Pat<(v4f32 (X86Movss
7157                    (v4f32 VR128X:$dst),
7158                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7159           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7161 def : Pat<(v4f32 (X86Movss
7162                    (v4f32 VR128X:$dst),
7163                    (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7164           (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7166 def : Pat<(v4f32 (X86Movss
7167                    (v4f32 VR128X:$dst),
7168                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7169           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7171 def : Pat<(v4f32 (X86Movss
7172                    (v4f32 VR128X:$dst),
7173                    (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7174           (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7176 def : Pat<(v2f64 (X86Movsd
7177                    (v2f64 VR128X:$dst),
7178                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7179           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7181 def : Pat<(v2f64 (X86Movsd
7182                    (v2f64 VR128X:$dst),
7183                    (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7184           (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7186 def : Pat<(v2f64 (X86Movsd
7187                    (v2f64 VR128X:$dst),
7188                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7189           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7191 def : Pat<(v2f64 (X86Movsd
7192                    (v2f64 VR128X:$dst),
7193                    (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7194           (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7196 def : Pat<(v4f32 (X86Movss
7197                    (v4f32 VR128X:$dst),
7198                    (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7199           (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7201 def : Pat<(v4f32 (X86Movss
7202                    (v4f32 VR128X:$dst),
7203                    (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7204           (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7206 def : Pat<(v4f32 (X86Movss
7207                    (v4f32 VR128X:$dst),
7208                    (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7209           (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7211 def : Pat<(v4f32 (X86Movss
7212                    (v4f32 VR128X:$dst),
7213                    (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7214           (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7216 def : Pat<(v2f64 (X86Movsd
7217                    (v2f64 VR128X:$dst),
7218                    (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7219           (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7221 def : Pat<(v2f64 (X86Movsd
7222                    (v2f64 VR128X:$dst),
7223                    (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7224           (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7226 def : Pat<(v2f64 (X86Movsd
7227                    (v2f64 VR128X:$dst),
7228                    (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7229           (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7231 def : Pat<(v2f64 (X86Movsd
7232                    (v2f64 VR128X:$dst),
7233                    (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7234           (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7235 } // Predicates = [HasAVX512]
7237 // Convert float/double to signed/unsigned int 32/64 with truncation
7238 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7239                             X86VectorVTInfo _DstRC, SDNode OpNode,
7240                             SDNode OpNodeInt, SDNode OpNodeSAE,
7241                             X86FoldableSchedWrite sched, string aliasStr>{
7242 let Predicates = [HasAVX512] in {
7243   let isCodeGenOnly = 1 in {
7244   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7245               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7246               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7247               EVEX, VEX_LIG, Sched<[sched]>;
7248   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7249               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7250               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7251               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7252   }
7254   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7255             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7256            [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7257            EVEX, VEX_LIG, Sched<[sched]>;
7258   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7259             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7260             [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7261                                   EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7262   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7263               (ins _SrcRC.IntScalarMemOp:$src),
7264               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7265               [(set _DstRC.RC:$dst,
7266                 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7267               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7268 } //HasAVX512
7270   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7271           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7272   def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7273           (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7274   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7275           (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7276                                           _SrcRC.IntScalarMemOp:$src), 0, "att">;
7279 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7280                         fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7281                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7282 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7283                         fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7284                         "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7285 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7286                         fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7287                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7288 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7289                         fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7290                         "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7292 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7293                         fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7294                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7295 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7296                         fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7297                         "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7298 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7299                         fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7300                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7301 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7302                         fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7303                         "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7305 //===----------------------------------------------------------------------===//
7306 // AVX-512  Convert form float to double and back
7307 //===----------------------------------------------------------------------===//
7309 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7310                                 X86VectorVTInfo _Src, SDNode OpNode,
7311                                 X86FoldableSchedWrite sched> {
7312   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7313                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7314                          "$src2, $src1", "$src1, $src2",
7315                          (_.VT (OpNode (_.VT _.RC:$src1),
7316                                        (_Src.VT _Src.RC:$src2)))>,
7317                          EVEX_4V, VEX_LIG, Sched<[sched]>;
7318   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7319                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7320                          "$src2, $src1", "$src1, $src2",
7321                          (_.VT (OpNode (_.VT _.RC:$src1),
7322                                   (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
7323                          EVEX_4V, VEX_LIG,
7324                          Sched<[sched.Folded, sched.ReadAfterFold]>;
7326   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7327     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7328                (ins _.FRC:$src1, _Src.FRC:$src2),
7329                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7330                EVEX_4V, VEX_LIG, Sched<[sched]>;
7331     let mayLoad = 1 in
7332     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7333                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7334                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7335                EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7336   }
7339 // Scalar Coversion with SAE - suppress all exceptions
7340 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7341                                     X86VectorVTInfo _Src, SDNode OpNodeSAE,
7342                                     X86FoldableSchedWrite sched> {
7343   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7344                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7345                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7346                         (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7347                                          (_Src.VT _Src.RC:$src2)))>,
7348                         EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7351 // Scalar Conversion with rounding control (RC)
7352 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7353                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
7354                                    X86FoldableSchedWrite sched> {
7355   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7356                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7357                         "$rc, $src2, $src1", "$src1, $src2, $rc",
7358                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7359                                          (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7360                         EVEX_4V, VEX_LIG, Sched<[sched]>,
7361                         EVEX_B, EVEX_RC;
7363 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7364                                       SDNode OpNode, SDNode OpNodeRnd,
7365                                       X86FoldableSchedWrite sched,
7366                                       X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7367   let Predicates = [HasAVX512] in {
7368     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7369              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7370                                OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7371   }
7374 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7375                                       SDNode OpNode, SDNode OpNodeSAE,
7376                                       X86FoldableSchedWrite sched,
7377                                       X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7378   let Predicates = [HasAVX512] in {
7379     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7380              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7381              EVEX_CD8<32, CD8VT1>, XS;
7382   }
7384 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7385                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7386                                          f32x_info>;
7387 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7388                                           X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7389                                           f64x_info>;
7391 def : Pat<(f64 (fpextend FR32X:$src)),
7392           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7393           Requires<[HasAVX512]>;
7394 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7395           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7396           Requires<[HasAVX512, OptForSize]>;
7398 def : Pat<(f32 (fpround FR64X:$src)),
7399           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7400            Requires<[HasAVX512]>;
7402 def : Pat<(v4f32 (X86Movss
7403                    (v4f32 VR128X:$dst),
7404                    (v4f32 (scalar_to_vector
7405                      (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7406           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7407           Requires<[HasAVX512]>;
7409 def : Pat<(v2f64 (X86Movsd
7410                    (v2f64 VR128X:$dst),
7411                    (v2f64 (scalar_to_vector
7412                      (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7413           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7414           Requires<[HasAVX512]>;
7416 //===----------------------------------------------------------------------===//
7417 // AVX-512  Vector convert from signed/unsigned integer to float/double
7418 //          and from float/double to signed/unsigned integer
7419 //===----------------------------------------------------------------------===//
7421 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7422                           X86VectorVTInfo _Src, SDNode OpNode,
7423                           X86FoldableSchedWrite sched,
7424                           string Broadcast = _.BroadcastStr,
7425                           string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7426                           RegisterClass MaskRC = _.KRCWM,
7427                           dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7429   defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
7430                          (ins _Src.RC:$src),
7431                          (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7432                          (ins MaskRC:$mask, _Src.RC:$src),
7433                           OpcodeStr, "$src", "$src",
7434                          (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7435                          (vselect MaskRC:$mask,
7436                                   (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7437                                   _.RC:$src0),
7438                          vselect, "$src0 = $dst">,
7439                          EVEX, Sched<[sched]>;
7441   defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7442                          (ins MemOp:$src),
7443                          (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7444                          (ins MaskRC:$mask, MemOp:$src),
7445                          OpcodeStr#Alias, "$src", "$src",
7446                          LdDAG,
7447                          (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
7448                          vselect, "$src0 = $dst">,
7449                          EVEX, Sched<[sched.Folded]>;
7451   defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7452                          (ins _Src.ScalarMemOp:$src),
7453                          (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7454                          (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7455                          OpcodeStr,
7456                          "${src}"##Broadcast, "${src}"##Broadcast,
7457                          (_.VT (OpNode (_Src.VT
7458                                   (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
7459                             )),
7460                          (vselect MaskRC:$mask,
7461                                   (_.VT
7462                                    (OpNode
7463                                     (_Src.VT
7464                                      (X86VBroadcast
7465                                       (_Src.ScalarLdFrag addr:$src))))),
7466                                   _.RC:$src0),
7467                          vselect, "$src0 = $dst">,
7468                          EVEX, EVEX_B, Sched<[sched.Folded]>;
7470 // Coversion with SAE - suppress all exceptions
7471 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7472                               X86VectorVTInfo _Src, SDNode OpNodeSAE,
7473                               X86FoldableSchedWrite sched> {
7474   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7475                         (ins _Src.RC:$src), OpcodeStr,
7476                         "{sae}, $src", "$src, {sae}",
7477                         (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7478                         EVEX, EVEX_B, Sched<[sched]>;
7481 // Conversion with rounding control (RC)
7482 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7483                          X86VectorVTInfo _Src, SDNode OpNodeRnd,
7484                          X86FoldableSchedWrite sched> {
7485   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7486                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7487                         "$rc, $src", "$src, $rc",
7488                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7489                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7492 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7493 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7494                                 X86VectorVTInfo _Src, SDNode OpNode,
7495                                 X86FoldableSchedWrite sched,
7496                                 string Broadcast = _.BroadcastStr,
7497                                 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7498                                 RegisterClass MaskRC = _.KRCWM>
7499   : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
7500                    MemOp, MaskRC,
7501                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7503 // Extend Float to Double
7504 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7505                            X86SchedWriteWidths sched> {
7506   let Predicates = [HasAVX512] in {
7507     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7508                             fpextend, sched.ZMM>,
7509              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7510                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7511   }
7512   let Predicates = [HasVLX] in {
7513     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7514                                X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7515     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7516                                sched.YMM>, EVEX_V256;
7517   }
7520 // Truncate Double to Float
7521 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7522   let Predicates = [HasAVX512] in {
7523     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
7524              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7525                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
7526   }
7527   let Predicates = [HasVLX] in {
7528     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7529                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
7530                                EVEX_V128;
7531     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
7532                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7533   }
7535   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7536                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7537   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7538                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7539                   VK2WM:$mask, VR128X:$src), 0, "att">;
7540   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
7541                   "$dst {${mask}} {z}, $src}",
7542                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7543                   VK2WM:$mask, VR128X:$src), 0, "att">;
7544   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7545                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7546   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7547                   "$dst {${mask}}, ${src}{1to2}}",
7548                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7549                   VK2WM:$mask, f64mem:$src), 0, "att">;
7550   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7551                   "$dst {${mask}} {z}, ${src}{1to2}}",
7552                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7553                   VK2WM:$mask, f64mem:$src), 0, "att">;
7555   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7556                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7557   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7558                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7559                   VK4WM:$mask, VR256X:$src), 0, "att">;
7560   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
7561                   "$dst {${mask}} {z}, $src}",
7562                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7563                   VK4WM:$mask, VR256X:$src), 0, "att">;
7564   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7565                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7566   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7567                   "$dst {${mask}}, ${src}{1to4}}",
7568                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7569                   VK4WM:$mask, f64mem:$src), 0, "att">;
7570   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7571                   "$dst {${mask}} {z}, ${src}{1to4}}",
7572                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7573                   VK4WM:$mask, f64mem:$src), 0, "att">;
7576 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7577                                   VEX_W, PD, EVEX_CD8<64, CD8VF>;
7578 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7579                                   PS, EVEX_CD8<32, CD8VH>;
7581 let Predicates = [HasAVX512] in {
7582   def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
7583             (VCVTPD2PSZrr VR512:$src)>;
7584   def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7585                      VR256X:$src0),
7586             (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
7587   def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7588                      v8f32x_info.ImmAllZerosV),
7589             (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
7591   def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7592             (VCVTPD2PSZrm addr:$src)>;
7593   def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7594                      VR256X:$src0),
7595             (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7596   def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7597                      v8f32x_info.ImmAllZerosV),
7598             (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
7600   def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))),
7601             (VCVTPD2PSZrmb addr:$src)>;
7602   def : Pat<(vselect VK8WM:$mask,
7603                      (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
7604                      (v8f32 VR256X:$src0)),
7605             (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7606   def : Pat<(vselect VK8WM:$mask,
7607                      (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
7608                      v8f32x_info.ImmAllZerosV),
7609             (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
7612 let Predicates = [HasVLX] in {
7613   def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
7614             (VCVTPD2PSZ256rr VR256X:$src)>;
7615   def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7616                      VR128X:$src0),
7617             (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
7618   def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7619                      v4f32x_info.ImmAllZerosV),
7620             (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
7622   def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
7623             (VCVTPD2PSZ256rm addr:$src)>;
7624   def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7625                      VR128X:$src0),
7626             (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7627   def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7628                      v4f32x_info.ImmAllZerosV),
7629             (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
7631   def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7632             (VCVTPD2PSZ256rmb addr:$src)>;
7633   def : Pat<(vselect VK4WM:$mask,
7634                      (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7635                      VR128X:$src0),
7636             (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7637   def : Pat<(vselect VK4WM:$mask,
7638                      (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7639                      v4f32x_info.ImmAllZerosV),
7640             (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
7642   // Special patterns to allow use of X86vmfpround for masking. Instruction
7643   // patterns have been disabled with null_frag.
7644   def : Pat<(X86vfpround (v2f64 VR128X:$src)),
7645             (VCVTPD2PSZ128rr VR128X:$src)>;
7646   def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7647                           VK2WM:$mask),
7648             (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7649   def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7650                           VK2WM:$mask),
7651             (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7653   def : Pat<(X86vfpround (loadv2f64 addr:$src)),
7654             (VCVTPD2PSZ128rm addr:$src)>;
7655   def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7656                           VK2WM:$mask),
7657             (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7658   def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7659                           VK2WM:$mask),
7660             (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7662   def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))),
7663             (VCVTPD2PSZ128rmb addr:$src)>;
7664   def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
7665                           (v4f32 VR128X:$src0), VK2WM:$mask),
7666             (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7667   def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
7668                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7669             (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7672 // Convert Signed/Unsigned Doubleword to Double
7673 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7674                            SDNode OpNode128, X86SchedWriteWidths sched> {
7675   // No rounding in this op
7676   let Predicates = [HasAVX512] in
7677     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7678                             sched.ZMM>, EVEX_V512;
7680   let Predicates = [HasVLX] in {
7681     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7682                                OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
7683                                (v2f64 (OpNode128 (bc_v4i32
7684                                 (v2i64
7685                                  (scalar_to_vector (loadi64 addr:$src))))))>,
7686                                EVEX_V128;
7687     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7688                                sched.YMM>, EVEX_V256;
7689   }
7692 // Convert Signed/Unsigned Doubleword to Float
7693 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7694                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7695   let Predicates = [HasAVX512] in
7696     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7697                             sched.ZMM>,
7698              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7699                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7701   let Predicates = [HasVLX] in {
7702     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7703                                sched.XMM>, EVEX_V128;
7704     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7705                                sched.YMM>, EVEX_V256;
7706   }
7709 // Convert Float to Signed/Unsigned Doubleword with truncation
7710 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7711                             SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7712   let Predicates = [HasAVX512] in {
7713     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7714                             sched.ZMM>,
7715              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7716                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
7717   }
7718   let Predicates = [HasVLX] in {
7719     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7720                                sched.XMM>, EVEX_V128;
7721     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7722                                sched.YMM>, EVEX_V256;
7723   }
7726 // Convert Float to Signed/Unsigned Doubleword
7727 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7728                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7729   let Predicates = [HasAVX512] in {
7730     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7731                             sched.ZMM>,
7732              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7733                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7734   }
7735   let Predicates = [HasVLX] in {
7736     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7737                                sched.XMM>, EVEX_V128;
7738     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7739                                sched.YMM>, EVEX_V256;
7740   }
7743 // Convert Double to Signed/Unsigned Doubleword with truncation
7744 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7745                             SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7746   let Predicates = [HasAVX512] in {
7747     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7748                             sched.ZMM>,
7749              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7750                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
7751   }
7752   let Predicates = [HasVLX] in {
7753     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7754     // memory forms of these instructions in Asm Parser. They have the same
7755     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7756     // due to the same reason.
7757     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7758                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7759                                VK2WM>, EVEX_V128;
7760     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7761                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7762   }
7764   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7765                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7766                   VR128X:$src), 0, "att">;
7767   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7768                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7769                   VK2WM:$mask, VR128X:$src), 0, "att">;
7770   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7771                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7772                   VK2WM:$mask, VR128X:$src), 0, "att">;
7773   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7774                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7775                   f64mem:$src), 0, "att">;
7776   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7777                   "$dst {${mask}}, ${src}{1to2}}",
7778                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7779                   VK2WM:$mask, f64mem:$src), 0, "att">;
7780   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7781                   "$dst {${mask}} {z}, ${src}{1to2}}",
7782                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7783                   VK2WM:$mask, f64mem:$src), 0, "att">;
7785   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7786                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7787                   VR256X:$src), 0, "att">;
7788   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7789                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7790                   VK4WM:$mask, VR256X:$src), 0, "att">;
7791   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7792                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7793                   VK4WM:$mask, VR256X:$src), 0, "att">;
7794   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7795                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7796                   f64mem:$src), 0, "att">;
7797   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7798                   "$dst {${mask}}, ${src}{1to4}}",
7799                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7800                   VK4WM:$mask, f64mem:$src), 0, "att">;
7801   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7802                   "$dst {${mask}} {z}, ${src}{1to4}}",
7803                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7804                   VK4WM:$mask, f64mem:$src), 0, "att">;
7807 // Convert Double to Signed/Unsigned Doubleword
7808 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7809                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7810   let Predicates = [HasAVX512] in {
7811     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7812                             sched.ZMM>,
7813              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7814                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7815   }
7816   let Predicates = [HasVLX] in {
7817     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7818     // memory forms of these instructions in Asm Parcer. They have the same
7819     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7820     // due to the same reason.
7821     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7822                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7823                                VK2WM>, EVEX_V128;
7824     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7825                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7826   }
7828   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7829                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7830   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7831                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7832                   VK2WM:$mask, VR128X:$src), 0, "att">;
7833   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7834                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7835                   VK2WM:$mask, VR128X:$src), 0, "att">;
7836   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7837                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7838                   f64mem:$src), 0, "att">;
7839   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7840                   "$dst {${mask}}, ${src}{1to2}}",
7841                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7842                   VK2WM:$mask, f64mem:$src), 0, "att">;
7843   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7844                   "$dst {${mask}} {z}, ${src}{1to2}}",
7845                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7846                   VK2WM:$mask, f64mem:$src), 0, "att">;
7848   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7849                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7850   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7851                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7852                   VK4WM:$mask, VR256X:$src), 0, "att">;
7853   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7854                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7855                   VK4WM:$mask, VR256X:$src), 0, "att">;
7856   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7857                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7858                   f64mem:$src), 0, "att">;
7859   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7860                   "$dst {${mask}}, ${src}{1to4}}",
7861                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7862                   VK4WM:$mask, f64mem:$src), 0, "att">;
7863   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7864                   "$dst {${mask}} {z}, ${src}{1to4}}",
7865                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7866                   VK4WM:$mask, f64mem:$src), 0, "att">;
7869 // Convert Double to Signed/Unsigned Quardword
7870 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7871                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7872   let Predicates = [HasDQI] in {
7873     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7874                             sched.ZMM>,
7875              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7876                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7877   }
7878   let Predicates = [HasDQI, HasVLX] in {
7879     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7880                                sched.XMM>, EVEX_V128;
7881     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7882                                sched.YMM>, EVEX_V256;
7883   }
7886 // Convert Double to Signed/Unsigned Quardword with truncation
7887 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7888                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7889   let Predicates = [HasDQI] in {
7890     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7891                             sched.ZMM>,
7892              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7893                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7894   }
7895   let Predicates = [HasDQI, HasVLX] in {
7896     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7897                                sched.XMM>, EVEX_V128;
7898     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7899                                sched.YMM>, EVEX_V256;
7900   }
7903 // Convert Signed/Unsigned Quardword to Double
7904 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7905                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7906   let Predicates = [HasDQI] in {
7907     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7908                             sched.ZMM>,
7909              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7910                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7911   }
7912   let Predicates = [HasDQI, HasVLX] in {
7913     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7914                                sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7915     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7916                                sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7917   }
7920 // Convert Float to Signed/Unsigned Quardword
7921 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7922                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7923   let Predicates = [HasDQI] in {
7924     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7925                             sched.ZMM>,
7926              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7927                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7928   }
7929   let Predicates = [HasDQI, HasVLX] in {
7930     // Explicitly specified broadcast string, since we take only 2 elements
7931     // from v4f32x_info source
7932     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7933                                sched.XMM, "{1to2}", "", f64mem, VK2WM,
7934                                (v2i64 (OpNode (bc_v4f32
7935                                 (v2f64
7936                                  (scalar_to_vector (loadf64 addr:$src))))))>,
7937                                EVEX_V128;
7938     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7939                                sched.YMM>, EVEX_V256;
7940   }
7943 // Convert Float to Signed/Unsigned Quardword with truncation
7944 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7945                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7946   let Predicates = [HasDQI] in {
7947     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7948              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7949                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7950   }
7951   let Predicates = [HasDQI, HasVLX] in {
7952     // Explicitly specified broadcast string, since we take only 2 elements
7953     // from v4f32x_info source
7954     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7955                                sched.XMM, "{1to2}", "", f64mem, VK2WM,
7956                                (v2i64 (OpNode (bc_v4f32
7957                                 (v2f64
7958                                  (scalar_to_vector (loadf64 addr:$src))))))>,
7959                                EVEX_V128;
7960     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7961                                sched.YMM>, EVEX_V256;
7962   }
7965 // Convert Signed/Unsigned Quardword to Float
7966 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7967                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7968   let Predicates = [HasDQI] in {
7969     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7970                             sched.ZMM>,
7971              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7972                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7973   }
7974   let Predicates = [HasDQI, HasVLX] in {
7975     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7976     // memory forms of these instructions in Asm Parcer. They have the same
7977     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7978     // due to the same reason.
7979     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
7980                                sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
7981                                EVEX_V128, NotEVEX2VEXConvertible;
7982     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7983                                sched.YMM, "{1to4}", "{y}">, EVEX_V256,
7984                                NotEVEX2VEXConvertible;
7985   }
7987   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7988                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7989                   VR128X:$src), 0, "att">;
7990   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7991                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7992                   VK2WM:$mask, VR128X:$src), 0, "att">;
7993   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7994                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7995                   VK2WM:$mask, VR128X:$src), 0, "att">;
7996   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7997                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7998                   i64mem:$src), 0, "att">;
7999   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
8000                   "$dst {${mask}}, ${src}{1to2}}",
8001                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8002                   VK2WM:$mask, i64mem:$src), 0, "att">;
8003   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8004                   "$dst {${mask}} {z}, ${src}{1to2}}",
8005                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8006                   VK2WM:$mask, i64mem:$src), 0, "att">;
8008   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8009                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8010                   VR256X:$src), 0, "att">;
8011   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
8012                   "$dst {${mask}}, $src}",
8013                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8014                   VK4WM:$mask, VR256X:$src), 0, "att">;
8015   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
8016                   "$dst {${mask}} {z}, $src}",
8017                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8018                   VK4WM:$mask, VR256X:$src), 0, "att">;
8019   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8020                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8021                   i64mem:$src), 0, "att">;
8022   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
8023                   "$dst {${mask}}, ${src}{1to4}}",
8024                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8025                   VK4WM:$mask, i64mem:$src), 0, "att">;
8026   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8027                   "$dst {${mask}} {z}, ${src}{1to4}}",
8028                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8029                   VK4WM:$mask, i64mem:$src), 0, "att">;
8032 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
8033                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8035 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
8036                                 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8037                                 PS, EVEX_CD8<32, CD8VF>;
8039 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
8040                                 X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
8041                                 XS, EVEX_CD8<32, CD8VF>;
8043 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
8044                                  X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
8045                                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
8047 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8048                                  X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
8049                                  EVEX_CD8<32, CD8VF>;
8051 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8052                                  X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
8053                                  PS, VEX_W, EVEX_CD8<64, CD8VF>;
8055 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8056                                   X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8057                                   EVEX_CD8<32, CD8VH>;
8059 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8060                                  X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8061                                  EVEX_CD8<32, CD8VF>;
8063 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8064                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8065                                  EVEX_CD8<32, CD8VF>;
8067 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8068                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8069                                  VEX_W, EVEX_CD8<64, CD8VF>;
8071 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8072                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8073                                  PS, EVEX_CD8<32, CD8VF>;
8075 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8076                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8077                                  PS, EVEX_CD8<64, CD8VF>;
8079 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8080                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8081                                  PD, EVEX_CD8<64, CD8VF>;
8083 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8084                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8085                                  EVEX_CD8<32, CD8VH>;
8087 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8088                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8089                                  PD, EVEX_CD8<64, CD8VF>;
8091 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8092                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8093                                  EVEX_CD8<32, CD8VH>;
8095 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8096                                  X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
8097                                  PD, EVEX_CD8<64, CD8VF>;
8099 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8100                                  X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
8101                                  EVEX_CD8<32, CD8VH>;
8103 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8104                                  X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
8105                                  PD, EVEX_CD8<64, CD8VF>;
8107 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8108                                  X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
8109                                  EVEX_CD8<32, CD8VH>;
8111 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8112                             X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8113                             EVEX_CD8<64, CD8VF>;
8115 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8116                             X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8117                             EVEX_CD8<64, CD8VF>;
8119 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
8120                             X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8121                             EVEX_CD8<64, CD8VF>;
8123 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
8124                             X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8125                             EVEX_CD8<64, CD8VF>;
8127 let Predicates = [HasVLX] in {
8128   // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8129   // patterns have been disabled with null_frag.
8130   def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8131             (VCVTPD2DQZ128rr VR128X:$src)>;
8132   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8133                           VK2WM:$mask),
8134             (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8135   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8136                           VK2WM:$mask),
8137             (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8139   def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8140             (VCVTPD2DQZ128rm addr:$src)>;
8141   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8142                           VK2WM:$mask),
8143             (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8144   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8145                           VK2WM:$mask),
8146             (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8148   def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8149             (VCVTPD2DQZ128rmb addr:$src)>;
8150   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8151                           (v4i32 VR128X:$src0), VK2WM:$mask),
8152             (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8153   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8154                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8155             (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8157   // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8158   // patterns have been disabled with null_frag.
8159   def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
8160             (VCVTTPD2DQZ128rr VR128X:$src)>;
8161   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8162                           VK2WM:$mask),
8163             (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8164   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8165                           VK2WM:$mask),
8166             (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8168   def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
8169             (VCVTTPD2DQZ128rm addr:$src)>;
8170   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8171                           VK2WM:$mask),
8172             (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8173   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8174                           VK2WM:$mask),
8175             (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8177   def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8178             (VCVTTPD2DQZ128rmb addr:$src)>;
8179   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8180                           (v4i32 VR128X:$src0), VK2WM:$mask),
8181             (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8182   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8183                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8184             (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8186   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8187   // patterns have been disabled with null_frag.
8188   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8189             (VCVTPD2UDQZ128rr VR128X:$src)>;
8190   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8191                            VK2WM:$mask),
8192             (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8193   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8194                            VK2WM:$mask),
8195             (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8197   def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8198             (VCVTPD2UDQZ128rm addr:$src)>;
8199   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8200                            VK2WM:$mask),
8201             (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8202   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8203                            VK2WM:$mask),
8204             (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8206   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8207             (VCVTPD2UDQZ128rmb addr:$src)>;
8208   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8209                            (v4i32 VR128X:$src0), VK2WM:$mask),
8210             (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8211   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8212                            v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8213             (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8215   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8216   // patterns have been disabled with null_frag.
8217   def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
8218             (VCVTTPD2UDQZ128rr VR128X:$src)>;
8219   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8220                           VK2WM:$mask),
8221             (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8222   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8223                           VK2WM:$mask),
8224             (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8226   def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
8227             (VCVTTPD2UDQZ128rm addr:$src)>;
8228   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8229                           VK2WM:$mask),
8230             (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8231   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8232                           VK2WM:$mask),
8233             (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8235   def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8236             (VCVTTPD2UDQZ128rmb addr:$src)>;
8237   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8238                           (v4i32 VR128X:$src0), VK2WM:$mask),
8239             (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8240   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8241                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8242             (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8245 let Predicates = [HasDQI, HasVLX] in {
8246   def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8247             (VCVTPS2QQZ128rm addr:$src)>;
8248   def : Pat<(v2i64 (vselect VK2WM:$mask,
8249                             (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8250                             VR128X:$src0)),
8251             (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8252   def : Pat<(v2i64 (vselect VK2WM:$mask,
8253                             (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8254                             v2i64x_info.ImmAllZerosV)),
8255             (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8257   def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8258             (VCVTPS2UQQZ128rm addr:$src)>;
8259   def : Pat<(v2i64 (vselect VK2WM:$mask,
8260                             (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8261                             VR128X:$src0)),
8262             (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8263   def : Pat<(v2i64 (vselect VK2WM:$mask,
8264                             (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8265                             v2i64x_info.ImmAllZerosV)),
8266             (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8268   def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8269             (VCVTTPS2QQZ128rm addr:$src)>;
8270   def : Pat<(v2i64 (vselect VK2WM:$mask,
8271                             (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8272                             VR128X:$src0)),
8273             (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8274   def : Pat<(v2i64 (vselect VK2WM:$mask,
8275                             (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8276                             v2i64x_info.ImmAllZerosV)),
8277             (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8279   def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8280             (VCVTTPS2UQQZ128rm addr:$src)>;
8281   def : Pat<(v2i64 (vselect VK2WM:$mask,
8282                             (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8283                             VR128X:$src0)),
8284             (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8285   def : Pat<(v2i64 (vselect VK2WM:$mask,
8286                             (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8287                             v2i64x_info.ImmAllZerosV)),
8288             (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8291 let Predicates = [HasAVX512, NoVLX] in {
8292 def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))),
8293           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8294            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8295                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8297 def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))),
8298           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8299            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8300                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8302 def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
8303           (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8304            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8305                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
8307 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8308           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8309            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8310                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8312 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8313           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8314            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8315                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8317 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8318           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8319            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8320                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
8322 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8323           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8324            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8325                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8328 let Predicates = [HasVLX] in {
8329   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8330             (VCVTDQ2PDZ128rm addr:$src)>;
8331   def : Pat<(v2f64 (vselect VK2WM:$mask,
8332                             (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8333                             VR128X:$src0)),
8334             (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8335   def : Pat<(v2f64 (vselect VK2WM:$mask,
8336                             (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8337                             v2f64x_info.ImmAllZerosV)),
8338             (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8340   def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8341             (VCVTUDQ2PDZ128rm addr:$src)>;
8342   def : Pat<(v2f64 (vselect VK2WM:$mask,
8343                             (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8344                             VR128X:$src0)),
8345             (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8346   def : Pat<(v2f64 (vselect VK2WM:$mask,
8347                             (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8348                             v2f64x_info.ImmAllZerosV)),
8349             (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8352 let Predicates = [HasDQI, HasVLX] in {
8353   // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8354   // patterns have been disabled with null_frag.
8355   def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
8356             (VCVTQQ2PSZ128rr VR128X:$src)>;
8357   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8358                            VK2WM:$mask),
8359             (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8360   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8361                            VK2WM:$mask),
8362             (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8364   def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))),
8365             (VCVTQQ2PSZ128rm addr:$src)>;
8366   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8367                            VK2WM:$mask),
8368             (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8369   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8370                            VK2WM:$mask),
8371             (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8373   def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
8374             (VCVTQQ2PSZ128rmb addr:$src)>;
8375   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8376                            (v4f32 VR128X:$src0), VK2WM:$mask),
8377             (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8378   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8379                            v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8380             (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8382   // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8383   // patterns have been disabled with null_frag.
8384   def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))),
8385             (VCVTUQQ2PSZ128rr VR128X:$src)>;
8386   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8387                            VK2WM:$mask),
8388             (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8389   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8390                            VK2WM:$mask),
8391             (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8393   def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))),
8394             (VCVTUQQ2PSZ128rm addr:$src)>;
8395   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8396                            VK2WM:$mask),
8397             (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8398   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8399                            VK2WM:$mask),
8400             (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8402   def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
8403             (VCVTUQQ2PSZ128rmb addr:$src)>;
8404   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8405                            (v4f32 VR128X:$src0), VK2WM:$mask),
8406             (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8407   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8408                            v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8409             (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8412 let Predicates = [HasDQI, NoVLX] in {
8413 def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))),
8414           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8415            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8416                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8418 def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))),
8419           (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8420            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8421                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
8423 def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))),
8424           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8425            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8426                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8428 def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))),
8429           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8430            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8431                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8433 def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))),
8434           (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8435            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8436                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
8438 def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
8439           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8440            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8441                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8443 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8444           (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8445            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8446                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
8448 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8449           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8450            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8451                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8453 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8454           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8455            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8456                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8458 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8459           (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8460            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8461                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
8463 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8464           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8465            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8466                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8468 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8469           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8470            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8471                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8474 //===----------------------------------------------------------------------===//
8475 // Half precision conversion instructions
8476 //===----------------------------------------------------------------------===//
8478 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8479                            X86MemOperand x86memop, PatFrag ld_frag,
8480                            X86FoldableSchedWrite sched> {
8481   defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8482                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8483                             (X86cvtph2ps (_src.VT _src.RC:$src))>,
8484                             T8PD, Sched<[sched]>;
8485   defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8486                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8487                             (X86cvtph2ps (_src.VT
8488                                           (ld_frag addr:$src)))>,
8489                             T8PD, Sched<[sched.Folded]>;
8492 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8493                                X86FoldableSchedWrite sched> {
8494   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8495                              (ins _src.RC:$src), "vcvtph2ps",
8496                              "{sae}, $src", "$src, {sae}",
8497                              (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8498                              T8PD, EVEX_B, Sched<[sched]>;
8501 let Predicates = [HasAVX512] in
8502   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8503                                     WriteCvtPH2PSZ>,
8504                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8505                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8507 let Predicates = [HasVLX] in {
8508   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8509                        load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8510                        EVEX_CD8<32, CD8VH>;
8511   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8512                        load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8513                        EVEX_CD8<32, CD8VH>;
8515   // Pattern match vcvtph2ps of a scalar i64 load.
8516   def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
8517             (VCVTPH2PSZ128rm addr:$src)>;
8518   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8519               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8520             (VCVTPH2PSZ128rm addr:$src)>;
8523 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8524                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8525 let ExeDomain = GenericDomain in {
8526   def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8527              (ins _src.RC:$src1, i32u8imm:$src2),
8528              "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8529              [(set _dest.RC:$dst,
8530                    (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
8531              Sched<[RR]>;
8532   let Constraints = "$src0 = $dst" in
8533   def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8534              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8535              "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8536              [(set _dest.RC:$dst,
8537                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8538                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
8539              Sched<[RR]>, EVEX_K;
8540   def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8541              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8542              "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8543              [(set _dest.RC:$dst,
8544                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8545                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8546              Sched<[RR]>, EVEX_KZ;
8547   let hasSideEffects = 0, mayStore = 1 in {
8548     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8549                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8550                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8551                Sched<[MR]>;
8552     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8553                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8554                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8555                 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8556   }
8560 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8561                                SchedWrite Sched> {
8562   let hasSideEffects = 0 in
8563   defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8564                    (outs _dest.RC:$dst),
8565                    (ins _src.RC:$src1, i32u8imm:$src2),
8566                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8567                    EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8570 let Predicates = [HasAVX512] in {
8571   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8572                                     WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8573                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8574                                         EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8575   let Predicates = [HasVLX] in {
8576     defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8577                                          WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8578                                          EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8579     defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8580                                          WriteCvtPS2PH, WriteCvtPS2PHSt>,
8581                                          EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8582   }
8584   def : Pat<(store (f64 (extractelt
8585                          (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8586                          (iPTR 0))), addr:$dst),
8587             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8588   def : Pat<(store (i64 (extractelt
8589                          (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8590                          (iPTR 0))), addr:$dst),
8591             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8592   def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
8593             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
8594   def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
8595             (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
8598 // Patterns for matching conversions from float to half-float and vice versa.
8599 let Predicates = [HasVLX] in {
8600   // Use MXCSR.RC for rounding instead of explicitly specifying the default
8601   // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8602   // configurations we support (the default). However, falling back to MXCSR is
8603   // more consistent with other instructions, which are always controlled by it.
8604   // It's encoded as 0b100.
8605   def : Pat<(fp_to_f16 FR32X:$src),
8606             (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8607               (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8609   def : Pat<(f16_to_fp GR16:$src),
8610             (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8611               (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8613   def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8614             (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8615               (v8i16 (VCVTPS2PHZ128rr
8616                (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8619 //  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8620 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8621                             string OpcodeStr, X86FoldableSchedWrite sched> {
8622   let hasSideEffects = 0 in
8623   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8624                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8625                   EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8628 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8629   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8630                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8631   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8632                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8633   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8634                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8635   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8636                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8639 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8640   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8641                                  "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8642                                  EVEX_CD8<32, CD8VT1>;
8643   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8644                                   "ucomisd", WriteFCom>, PD, EVEX,
8645                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8646   let Pattern = []<dag> in {
8647     defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8648                                    "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8649                                    EVEX_CD8<32, CD8VT1>;
8650     defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8651                                    "comisd", WriteFCom>, PD, EVEX,
8652                                     VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8653   }
8654   let isCodeGenOnly = 1 in {
8655     defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8656                           sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8657                           EVEX_CD8<32, CD8VT1>;
8658     defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8659                           sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8660                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8662     defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8663                           sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8664                           EVEX_CD8<32, CD8VT1>;
8665     defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8666                           sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8667                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8668   }
8671 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8672 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8673                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8674   let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8675   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8676                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8677                            "$src2, $src1", "$src1, $src2",
8678                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8679                            EVEX_4V, VEX_LIG, Sched<[sched]>;
8680   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8681                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8682                          "$src2, $src1", "$src1, $src2",
8683                          (OpNode (_.VT _.RC:$src1),
8684                           _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
8685                           Sched<[sched.Folded, sched.ReadAfterFold]>;
8689 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8690                                f32x_info>, EVEX_CD8<32, CD8VT1>,
8691                                T8PD;
8692 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8693                                f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8694                                T8PD;
8695 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8696                                  SchedWriteFRsqrt.Scl, f32x_info>,
8697                                  EVEX_CD8<32, CD8VT1>, T8PD;
8698 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8699                                  SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8700                                  EVEX_CD8<64, CD8VT1>, T8PD;
8702 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8703 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8704                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8705   let ExeDomain = _.ExeDomain in {
8706   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8707                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8708                          (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8709                          Sched<[sched]>;
8710   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8711                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8712                          (OpNode (_.VT
8713                            (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8714                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8715   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8716                           (ins _.ScalarMemOp:$src), OpcodeStr,
8717                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8718                           (OpNode (_.VT
8719                             (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8720                           EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8721   }
8724 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8725                                 X86SchedWriteWidths sched> {
8726   defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8727                            v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8728   defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8729                            v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8731   // Define only if AVX512VL feature is present.
8732   let Predicates = [HasVLX] in {
8733     defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8734                                 OpNode, sched.XMM, v4f32x_info>,
8735                                EVEX_V128, EVEX_CD8<32, CD8VF>;
8736     defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8737                                 OpNode, sched.YMM, v8f32x_info>,
8738                                EVEX_V256, EVEX_CD8<32, CD8VF>;
8739     defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8740                                 OpNode, sched.XMM, v2f64x_info>,
8741                                EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8742     defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8743                                 OpNode, sched.YMM, v4f64x_info>,
8744                                EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8745   }
8748 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8749 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8751 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8752 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8753                          SDNode OpNode, SDNode OpNodeSAE,
8754                          X86FoldableSchedWrite sched> {
8755   let ExeDomain = _.ExeDomain in {
8756   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8757                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8758                            "$src2, $src1", "$src1, $src2",
8759                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8760                            Sched<[sched]>;
8762   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8763                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8764                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8765                             (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8766                             EVEX_B, Sched<[sched]>;
8768   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8769                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8770                          "$src2, $src1", "$src1, $src2",
8771                          (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
8772                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8773   }
8776 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8777                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8778   defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8779                            sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8780   defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8781                            sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8784 let Predicates = [HasERI] in {
8785   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8786                                SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8787   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8788                                SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8791 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8792                               SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8793 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8795 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8796                          SDNode OpNode, X86FoldableSchedWrite sched> {
8797   let ExeDomain = _.ExeDomain in {
8798   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8799                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8800                          (OpNode (_.VT _.RC:$src))>,
8801                          Sched<[sched]>;
8803   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8804                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8805                          (OpNode (_.VT
8806                              (bitconvert (_.LdFrag addr:$src))))>,
8807                           Sched<[sched.Folded, sched.ReadAfterFold]>;
8809   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8810                          (ins _.ScalarMemOp:$src), OpcodeStr,
8811                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8812                          (OpNode (_.VT
8813                                   (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8814                          EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8815   }
8817 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8818                          SDNode OpNode, X86FoldableSchedWrite sched> {
8819   let ExeDomain = _.ExeDomain in
8820   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8821                         (ins _.RC:$src), OpcodeStr,
8822                         "{sae}, $src", "$src, {sae}",
8823                         (OpNode (_.VT _.RC:$src))>,
8824                         EVEX_B, Sched<[sched]>;
8827 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8828                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8829    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8830               avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8831               T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8832    defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8833               avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8834               T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8837 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8838                                   SDNode OpNode, X86SchedWriteWidths sched> {
8839   // Define only if AVX512VL feature is present.
8840   let Predicates = [HasVLX] in {
8841     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8842                                 sched.XMM>,
8843                                 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8844     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8845                                 sched.YMM>,
8846                                 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8847     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8848                                 sched.XMM>,
8849                                 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8850     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8851                                 sched.YMM>,
8852                                 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8853   }
8856 let Predicates = [HasERI] in {
8857  defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8858                             SchedWriteFRsqrt>, EVEX;
8859  defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8860                             SchedWriteFRcp>, EVEX;
8861  defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8862                             SchedWriteFAdd>, EVEX;
8864 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8865                             SchedWriteFRnd>,
8866                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8867                                           SchedWriteFRnd>, EVEX;
8869 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8870                                     X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8871   let ExeDomain = _.ExeDomain in
8872   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8873                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8874                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8875                          EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8878 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8879                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8880   let ExeDomain = _.ExeDomain in {
8881   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8882                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8883                          (_.VT (fsqrt _.RC:$src))>, EVEX,
8884                          Sched<[sched]>;
8885   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8886                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8887                          (fsqrt (_.VT
8888                            (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8889                            Sched<[sched.Folded, sched.ReadAfterFold]>;
8890   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8891                           (ins _.ScalarMemOp:$src), OpcodeStr,
8892                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8893                           (fsqrt (_.VT
8894                             (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8895                           EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8896   }
8899 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8900                                   X86SchedWriteSizes sched> {
8901   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8902                                 sched.PS.ZMM, v16f32_info>,
8903                                 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8904   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8905                                 sched.PD.ZMM, v8f64_info>,
8906                                 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8907   // Define only if AVX512VL feature is present.
8908   let Predicates = [HasVLX] in {
8909     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8910                                      sched.PS.XMM, v4f32x_info>,
8911                                      EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8912     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8913                                      sched.PS.YMM, v8f32x_info>,
8914                                      EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8915     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8916                                      sched.PD.XMM, v2f64x_info>,
8917                                      EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8918     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8919                                      sched.PD.YMM, v4f64x_info>,
8920                                      EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8921   }
8924 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8925                                         X86SchedWriteSizes sched> {
8926   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8927                                       sched.PS.ZMM, v16f32_info>,
8928                                       EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8929   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8930                                       sched.PD.ZMM, v8f64_info>,
8931                                       EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8934 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8935                               X86VectorVTInfo _, string Name> {
8936   let ExeDomain = _.ExeDomain in {
8937     defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8938                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8939                          "$src2, $src1", "$src1, $src2",
8940                          (X86fsqrts (_.VT _.RC:$src1),
8941                                     (_.VT _.RC:$src2))>,
8942                          Sched<[sched]>;
8943     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8944                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8945                          "$src2, $src1", "$src1, $src2",
8946                          (X86fsqrts (_.VT _.RC:$src1),
8947                                     _.ScalarIntMemCPat:$src2)>,
8948                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8949     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8950                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8951                          "$rc, $src2, $src1", "$src1, $src2, $rc",
8952                          (X86fsqrtRnds (_.VT _.RC:$src1),
8953                                      (_.VT _.RC:$src2),
8954                                      (i32 timm:$rc))>,
8955                          EVEX_B, EVEX_RC, Sched<[sched]>;
8957     let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8958       def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8959                 (ins _.FRC:$src1, _.FRC:$src2),
8960                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8961                 Sched<[sched]>;
8962       let mayLoad = 1 in
8963         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8964                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8965                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8966                   Sched<[sched.Folded, sched.ReadAfterFold]>;
8967     }
8968   }
8970   let Predicates = [HasAVX512] in {
8971     def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8972               (!cast<Instruction>(Name#Zr)
8973                   (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8974   }
8976   let Predicates = [HasAVX512, OptForSize] in {
8977     def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8978               (!cast<Instruction>(Name#Zm)
8979                   (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8980   }
8983 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8984                                   X86SchedWriteSizes sched> {
8985   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8986                         EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8987   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
8988                         EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
8991 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
8992              avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
8994 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
8996 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8997                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8998   let ExeDomain = _.ExeDomain in {
8999   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9000                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9001                            "$src3, $src2, $src1", "$src1, $src2, $src3",
9002                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9003                            (i32 imm:$src3)))>,
9004                            Sched<[sched]>;
9006   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9007                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9008                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9009                          (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9010                          (i32 imm:$src3)))>, EVEX_B,
9011                          Sched<[sched]>;
9013   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9014                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9015                          OpcodeStr,
9016                          "$src3, $src2, $src1", "$src1, $src2, $src3",
9017                          (_.VT (X86RndScales _.RC:$src1,
9018                                 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
9019                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9021   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9022     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9023                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9024                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9025                []>, Sched<[sched]>;
9027     let mayLoad = 1 in
9028       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9029                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9030                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9031                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
9032   }
9033   }
9035   let Predicates = [HasAVX512] in {
9036     def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2),
9037               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9038                _.FRC:$src1, imm:$src2))>;
9039   }
9041   let Predicates = [HasAVX512, OptForSize] in {
9042     def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2),
9043               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9044                addr:$src1, imm:$src2))>;
9045   }
9048 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9049                                            SchedWriteFRnd.Scl, f32x_info>,
9050                                            AVX512AIi8Base, EVEX_4V, VEX_LIG,
9051                                            EVEX_CD8<32, CD8VT1>;
9053 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9054                                            SchedWriteFRnd.Scl, f64x_info>,
9055                                            VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9056                                            EVEX_CD8<64, CD8VT1>;
9058 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9059                                 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9060                                 dag OutMask, Predicate BasePredicate> {
9061   let Predicates = [BasePredicate] in {
9062     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9063                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9064                (extractelt _.VT:$dst, (iPTR 0))))),
9065               (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9066                _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9068     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9069                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9070                ZeroFP))),
9071               (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9072                OutMask, _.VT:$src2, _.VT:$src1)>;
9073   }
9076 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9077                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9078                             fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9079 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9080                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9081                             fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9084 //-------------------------------------------------
9085 // Integer truncate and extend operations
9086 //-------------------------------------------------
9088 // PatFrags that contain a select and a truncate op. The take operands in the
9089 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9090 // either to the multiclasses.
9091 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9092                            (vselect node:$mask,
9093                                     (trunc node:$src), node:$src0)>;
9094 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9095                             (vselect node:$mask,
9096                                      (X86vtruncs node:$src), node:$src0)>;
9097 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9098                              (vselect node:$mask,
9099                                       (X86vtruncus node:$src), node:$src0)>;
9101 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9102                               SDPatternOperator MaskNode,
9103                               X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9104                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9105   let ExeDomain = DestInfo.ExeDomain in {
9106   def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9107              (ins SrcInfo.RC:$src),
9108              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9109              [(set DestInfo.RC:$dst,
9110                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9111              EVEX, Sched<[sched]>;
9112   let Constraints = "$src0 = $dst" in
9113   def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9114              (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9115              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9116              [(set DestInfo.RC:$dst,
9117                    (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9118                              (DestInfo.VT DestInfo.RC:$src0),
9119                              SrcInfo.KRCWM:$mask))]>,
9120              EVEX, EVEX_K, Sched<[sched]>;
9121   def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9122              (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9123              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9124              [(set DestInfo.RC:$dst,
9125                    (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9126                              DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9127              EVEX, EVEX_KZ, Sched<[sched]>;
9128   }
9130   let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9131     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9132                (ins x86memop:$dst, SrcInfo.RC:$src),
9133                OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9134                EVEX, Sched<[sched.Folded]>;
9136     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9137                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9138                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9139                EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9140   }//mayStore = 1, hasSideEffects = 0
9143 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9144                                     X86VectorVTInfo DestInfo,
9145                                     PatFrag truncFrag, PatFrag mtruncFrag,
9146                                     string Name> {
9148   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9149             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9150                                     addr:$dst, SrcInfo.RC:$src)>;
9152   def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9153                         SrcInfo.KRCWM:$mask),
9154             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9155                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9158 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9159                         SDNode OpNode256, SDNode OpNode512,
9160                         SDPatternOperator MaskNode128,
9161                         SDPatternOperator MaskNode256,
9162                         SDPatternOperator MaskNode512,
9163                         X86FoldableSchedWrite sched,
9164                         AVX512VLVectorVTInfo VTSrcInfo,
9165                         X86VectorVTInfo DestInfoZ128,
9166                         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9167                         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9168                         X86MemOperand x86memopZ, PatFrag truncFrag,
9169                         PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9171   let Predicates = [HasVLX, prd] in {
9172     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9173                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9174                 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9175                              truncFrag, mtruncFrag, NAME>, EVEX_V128;
9177     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9178                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9179                 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9180                              truncFrag, mtruncFrag, NAME>, EVEX_V256;
9181   }
9182   let Predicates = [prd] in
9183     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9184                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9185                 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9186                              truncFrag, mtruncFrag, NAME>, EVEX_V512;
9189 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9190                            SDPatternOperator MaskNode,
9191                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9192                            PatFrag MaskedStoreNode, SDNode InVecNode,
9193                            SDPatternOperator InVecMaskNode> {
9194   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9195                           InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9196                           avx512vl_i64_info, v16i8x_info, v16i8x_info,
9197                           v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9198                           MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9201 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9202                            SDPatternOperator MaskNode,
9203                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9204                            PatFrag MaskedStoreNode, SDNode InVecNode,
9205                            SDPatternOperator InVecMaskNode> {
9206   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9207                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9208                           avx512vl_i64_info, v8i16x_info, v8i16x_info,
9209                           v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9210                           MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9213 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9214                            SDPatternOperator MaskNode,
9215                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9216                            PatFrag MaskedStoreNode, SDNode InVecNode,
9217                            SDPatternOperator InVecMaskNode> {
9218   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9219                           InVecMaskNode, MaskNode, MaskNode, sched,
9220                           avx512vl_i64_info, v4i32x_info, v4i32x_info,
9221                           v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9222                           MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9225 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9226                            SDPatternOperator MaskNode,
9227                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9228                            PatFrag MaskedStoreNode, SDNode InVecNode,
9229                            SDPatternOperator InVecMaskNode> {
9230   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9231                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9232                           avx512vl_i32_info, v16i8x_info, v16i8x_info,
9233                           v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9234                           MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9237 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9238                            SDPatternOperator MaskNode,
9239                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9240                            PatFrag MaskedStoreNode, SDNode InVecNode,
9241                            SDPatternOperator InVecMaskNode> {
9242   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9243                           InVecMaskNode, MaskNode, MaskNode, sched,
9244                           avx512vl_i32_info, v8i16x_info, v8i16x_info,
9245                           v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9246                           MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9249 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9250                            SDPatternOperator MaskNode,
9251                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9252                            PatFrag MaskedStoreNode, SDNode InVecNode,
9253                            SDPatternOperator InVecMaskNode> {
9254   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9255                           InVecMaskNode, MaskNode, MaskNode, sched,
9256                           avx512vl_i16_info, v16i8x_info, v16i8x_info,
9257                           v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9258                           MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9261 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
9262                                   WriteShuffle256, truncstorevi8,
9263                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9264 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
9265                                   WriteShuffle256, truncstore_s_vi8,
9266                                   masked_truncstore_s_vi8, X86vtruncs,
9267                                   X86vmtruncs>;
9268 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9269                                   select_truncus, WriteShuffle256,
9270                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9271                                   X86vtruncus, X86vmtruncus>;
9273 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9274                                   WriteShuffle256, truncstorevi16,
9275                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9276 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9277                                   WriteShuffle256, truncstore_s_vi16,
9278                                   masked_truncstore_s_vi16, X86vtruncs,
9279                                   X86vmtruncs>;
9280 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9281                                   select_truncus, WriteShuffle256,
9282                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9283                                   X86vtruncus, X86vmtruncus>;
9285 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9286                                   WriteShuffle256, truncstorevi32,
9287                                   masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9288 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9289                                   WriteShuffle256, truncstore_s_vi32,
9290                                   masked_truncstore_s_vi32, X86vtruncs,
9291                                   X86vmtruncs>;
9292 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9293                                   select_truncus, WriteShuffle256,
9294                                   truncstore_us_vi32, masked_truncstore_us_vi32,
9295                                   X86vtruncus, X86vmtruncus>;
9297 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9298                                   WriteShuffle256, truncstorevi8,
9299                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9300 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9301                                   WriteShuffle256, truncstore_s_vi8,
9302                                   masked_truncstore_s_vi8, X86vtruncs,
9303                                   X86vmtruncs>;
9304 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9305                                   select_truncus, WriteShuffle256,
9306                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9307                                   X86vtruncus, X86vmtruncus>;
9309 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9310                                   WriteShuffle256, truncstorevi16,
9311                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9312 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9313                                   WriteShuffle256, truncstore_s_vi16,
9314                                   masked_truncstore_s_vi16, X86vtruncs,
9315                                   X86vmtruncs>;
9316 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9317                                   select_truncus, WriteShuffle256,
9318                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9319                                   X86vtruncus, X86vmtruncus>;
9321 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9322                                   WriteShuffle256, truncstorevi8,
9323                                   masked_truncstorevi8, X86vtrunc,
9324                                   X86vmtrunc>;
9325 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9326                                   WriteShuffle256, truncstore_s_vi8,
9327                                   masked_truncstore_s_vi8, X86vtruncs,
9328                                   X86vmtruncs>;
9329 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9330                                   select_truncus, WriteShuffle256,
9331                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9332                                   X86vtruncus, X86vmtruncus>;
9334 let Predicates = [HasAVX512, NoVLX] in {
9335 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9336          (v8i16 (EXTRACT_SUBREG
9337                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9338                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9339 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9340          (v4i32 (EXTRACT_SUBREG
9341                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9342                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
9345 let Predicates = [HasBWI, NoVLX] in {
9346 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9347          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9348                                             VR256X:$src, sub_ymm))), sub_xmm))>;
9351 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9352 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9353                            X86VectorVTInfo DestInfo,
9354                            X86VectorVTInfo SrcInfo> {
9355   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9356                                  DestInfo.RC:$src0,
9357                                  SrcInfo.KRCWM:$mask)),
9358             (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9359                                                  SrcInfo.KRCWM:$mask,
9360                                                  SrcInfo.RC:$src)>;
9362   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9363                                  DestInfo.ImmAllZerosV,
9364                                  SrcInfo.KRCWM:$mask)),
9365             (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9366                                                   SrcInfo.RC:$src)>;
9369 let Predicates = [HasVLX] in {
9370 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9371 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9372 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9375 let Predicates = [HasAVX512] in {
9376 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9377 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9378 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9380 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9381 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9382 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9384 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9385 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9386 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9389 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9390               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9391               X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9392   let ExeDomain = DestInfo.ExeDomain in {
9393   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9394                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9395                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9396                   EVEX, Sched<[sched]>;
9398   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9399                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9400                   (DestInfo.VT (LdFrag addr:$src))>,
9401                 EVEX, Sched<[sched.Folded]>;
9402   }
9405 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9406           SDNode OpNode, SDNode InVecNode, string ExtTy,
9407           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9408   let Predicates = [HasVLX, HasBWI] in {
9409     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9410                     v16i8x_info, i64mem, LdFrag, InVecNode>,
9411                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9413     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9414                     v16i8x_info, i128mem, LdFrag, OpNode>,
9415                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9416   }
9417   let Predicates = [HasBWI] in {
9418     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9419                     v32i8x_info, i256mem, LdFrag, OpNode>,
9420                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9421   }
9424 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9425           SDNode OpNode, SDNode InVecNode, string ExtTy,
9426           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9427   let Predicates = [HasVLX, HasAVX512] in {
9428     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9429                    v16i8x_info, i32mem, LdFrag, InVecNode>,
9430                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9432     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9433                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9434                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9435   }
9436   let Predicates = [HasAVX512] in {
9437     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9438                    v16i8x_info, i128mem, LdFrag, OpNode>,
9439                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9440   }
9443 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9444           SDNode OpNode, SDNode InVecNode, string ExtTy,
9445           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9446   let Predicates = [HasVLX, HasAVX512] in {
9447     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9448                    v16i8x_info, i16mem, LdFrag, InVecNode>,
9449                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9451     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9452                    v16i8x_info, i32mem, LdFrag, InVecNode>,
9453                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9454   }
9455   let Predicates = [HasAVX512] in {
9456     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9457                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9458                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9459   }
9462 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9463          SDNode OpNode, SDNode InVecNode, string ExtTy,
9464          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9465   let Predicates = [HasVLX, HasAVX512] in {
9466     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9467                    v8i16x_info, i64mem, LdFrag, InVecNode>,
9468                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9470     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9471                    v8i16x_info, i128mem, LdFrag, OpNode>,
9472                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9473   }
9474   let Predicates = [HasAVX512] in {
9475     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9476                    v16i16x_info, i256mem, LdFrag, OpNode>,
9477                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9478   }
9481 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9482          SDNode OpNode, SDNode InVecNode, string ExtTy,
9483          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9484   let Predicates = [HasVLX, HasAVX512] in {
9485     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9486                    v8i16x_info, i32mem, LdFrag, InVecNode>,
9487                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9489     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9490                    v8i16x_info, i64mem, LdFrag, InVecNode>,
9491                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9492   }
9493   let Predicates = [HasAVX512] in {
9494     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9495                    v8i16x_info, i128mem, LdFrag, OpNode>,
9496                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9497   }
9500 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9501          SDNode OpNode, SDNode InVecNode, string ExtTy,
9502          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9504   let Predicates = [HasVLX, HasAVX512] in {
9505     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9506                    v4i32x_info, i64mem, LdFrag, InVecNode>,
9507                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9509     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9510                    v4i32x_info, i128mem, LdFrag, OpNode>,
9511                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9512   }
9513   let Predicates = [HasAVX512] in {
9514     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9515                    v8i32x_info, i256mem, LdFrag, OpNode>,
9516                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9517   }
9520 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9521 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9522 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9523 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9524 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9525 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9527 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9528 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9529 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9530 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9531 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9532 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9535 // Patterns that we also need any extend versions of. aext_vector_inreg
9536 // is currently legalized to zext_vector_inreg.
9537 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9538   // 256-bit patterns
9539   let Predicates = [HasVLX, HasBWI] in {
9540     def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9541               (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9542   }
9544   let Predicates = [HasVLX] in {
9545     def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9546               (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9548     def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9549               (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9550   }
9552   // 512-bit patterns
9553   let Predicates = [HasBWI] in {
9554     def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9555               (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9556   }
9557   let Predicates = [HasAVX512] in {
9558     def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9559               (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9560     def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9561               (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9563     def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9564               (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9566     def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9567               (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9568   }
9571 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9572                                  SDNode InVecOp> :
9573     AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9574   // 128-bit patterns
9575   let Predicates = [HasVLX, HasBWI] in {
9576   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9577             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9578   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9579             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9580   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9581             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9582   }
9583   let Predicates = [HasVLX] in {
9584   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9585             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9586   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9587             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9589   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9590             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9592   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9593             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9594   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9595             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9596   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9597             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9599   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9600             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9601   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9602             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9604   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9605             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9606   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9607             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9608   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9609             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9610   }
9611   let Predicates = [HasVLX] in {
9612   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9613             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9614   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9615             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9617   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9618             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9619   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9620             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9622   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9623             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9624   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9625             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9626   }
9627   // 512-bit patterns
9628   let Predicates = [HasAVX512] in {
9629   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9630             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9631   }
9634 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9635 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9637 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9638 // ext+trunc aggresively making it impossible to legalize the DAG to this
9639 // pattern directly.
9640 let Predicates = [HasAVX512, NoBWI] in {
9641 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9642          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9643 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9644          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9647 //===----------------------------------------------------------------------===//
9648 // GATHER - SCATTER Operations
9650 // FIXME: Improve scheduling of gather/scatter instructions.
9651 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9652                          X86MemOperand memop, PatFrag GatherNode,
9653                          RegisterClass MaskRC = _.KRCWM> {
9654   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9655       ExeDomain = _.ExeDomain in
9656   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9657             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9658             !strconcat(OpcodeStr#_.Suffix,
9659             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9660             [(set _.RC:$dst, MaskRC:$mask_wb,
9661               (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
9662                      vectoraddr:$src2))]>, EVEX, EVEX_K,
9663              EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9666 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9667                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9668   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9669                                       vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9670   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9671                                       vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
9672 let Predicates = [HasVLX] in {
9673   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9674                               vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9675   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9676                               vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9677   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9678                               vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9679   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9680                               vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9684 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9685                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9686   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9687                                        mgatherv16i32>, EVEX_V512;
9688   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9689                                        mgatherv8i64>, EVEX_V512;
9690 let Predicates = [HasVLX] in {
9691   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9692                                           vy256xmem, mgatherv8i32>, EVEX_V256;
9693   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9694                                           vy128xmem, mgatherv4i64>, EVEX_V256;
9695   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9696                                           vx128xmem, mgatherv4i32>, EVEX_V128;
9697   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9698                                           vx64xmem, mgatherv2i64, VK2WM>,
9699                                           EVEX_V128;
9704 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9705                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9707 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9708                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9710 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9711                           X86MemOperand memop, PatFrag ScatterNode,
9712                           RegisterClass MaskRC = _.KRCWM> {
9714 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9716   def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9717             (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9718             !strconcat(OpcodeStr#_.Suffix,
9719             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9720             [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9721                                     MaskRC:$mask,  vectoraddr:$dst))]>,
9722             EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9723             Sched<[WriteStore]>;
9726 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9727                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9728   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9729                                       vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9730   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9731                                       vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
9732 let Predicates = [HasVLX] in {
9733   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9734                               vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9735   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9736                               vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9737   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9738                               vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9739   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9740                               vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9744 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9745                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9746   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9747                                        mscatterv16i32>, EVEX_V512;
9748   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9749                                        mscatterv8i64>, EVEX_V512;
9750 let Predicates = [HasVLX] in {
9751   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9752                                           vy256xmem, mscatterv8i32>, EVEX_V256;
9753   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9754                                           vy128xmem, mscatterv4i64>, EVEX_V256;
9755   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9756                                           vx128xmem, mscatterv4i32>, EVEX_V128;
9757   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9758                                           vx64xmem, mscatterv2i64, VK2WM>,
9759                                           EVEX_V128;
9763 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9764                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9766 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9767                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9769 // prefetch
9770 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9771                        RegisterClass KRC, X86MemOperand memop> {
9772   let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9773   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9774             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9775             EVEX, EVEX_K, Sched<[WriteLoad]>;
9778 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9779                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9781 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9782                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9784 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9785                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9787 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9788                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9790 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9791                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9793 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9794                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9796 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9797                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9799 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9800                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9802 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9803                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9805 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9806                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9808 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9809                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9811 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9812                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9814 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9815                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9817 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9818                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9820 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9821                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9823 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9824                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9826 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9827 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9828                   !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9829                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9830                   EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9832 // Also need a pattern for anyextend.
9833 def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
9834           (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
9837 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9838                                  string OpcodeStr, Predicate prd> {
9839 let Predicates = [prd] in
9840   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9842   let Predicates = [prd, HasVLX] in {
9843     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9844     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9845   }
9848 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9849 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9850 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9851 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9853 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9854     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9855                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9856                         [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9857                         EVEX, Sched<[WriteMove]>;
9860 // Use 512bit version to implement 128/256 bit in case NoVLX.
9861 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9862                                            X86VectorVTInfo _,
9863                                            string Name> {
9865   def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9866             (_.KVT (COPY_TO_REGCLASS
9867                      (!cast<Instruction>(Name#"Zrr")
9868                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9869                                       _.RC:$src, _.SubRegIdx)),
9870                    _.KRC))>;
9873 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9874                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9875   let Predicates = [prd] in
9876     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9877                                             EVEX_V512;
9879   let Predicates = [prd, HasVLX] in {
9880     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9881                                               EVEX_V256;
9882     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9883                                                EVEX_V128;
9884   }
9885   let Predicates = [prd, NoVLX] in {
9886     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9887     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9888   }
9891 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9892                                               avx512vl_i8_info, HasBWI>;
9893 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9894                                               avx512vl_i16_info, HasBWI>, VEX_W;
9895 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9896                                               avx512vl_i32_info, HasDQI>;
9897 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9898                                               avx512vl_i64_info, HasDQI>, VEX_W;
9900 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9901 // is available, but BWI is not. We can't handle this in lowering because
9902 // a target independent DAG combine likes to combine sext and trunc.
9903 let Predicates = [HasDQI, NoBWI] in {
9904   def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9905             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9906   def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9907             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9909   def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
9910             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9911   def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
9912             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9915 let Predicates = [HasDQI, NoBWI, HasVLX] in {
9916   def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9917             (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9919   def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
9920             (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9923 //===----------------------------------------------------------------------===//
9924 // AVX-512 - COMPRESS and EXPAND
9927 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9928                                  string OpcodeStr, X86FoldableSchedWrite sched> {
9929   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9930               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9931               (null_frag)>, AVX5128IBase,
9932               Sched<[sched]>;
9934   let mayStore = 1, hasSideEffects = 0 in
9935   def mr : AVX5128I<opc, MRMDestMem, (outs),
9936               (ins _.MemOp:$dst, _.RC:$src),
9937               OpcodeStr # "\t{$src, $dst|$dst, $src}",
9938               []>, EVEX_CD8<_.EltSize, CD8VT1>,
9939               Sched<[sched.Folded]>;
9941   def mrk : AVX5128I<opc, MRMDestMem, (outs),
9942               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9943               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9944               []>,
9945               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9946               Sched<[sched.Folded]>;
9949 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9950   def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9951             (!cast<Instruction>(Name#_.ZSuffix##mrk)
9952                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9954   def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9955             (!cast<Instruction>(Name#_.ZSuffix##rrk)
9956                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9957   def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9958             (!cast<Instruction>(Name#_.ZSuffix##rrkz)
9959                             _.KRCWM:$mask, _.RC:$src)>;
9962 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9963                                  X86FoldableSchedWrite sched,
9964                                  AVX512VLVectorVTInfo VTInfo,
9965                                  Predicate Pred = HasAVX512> {
9966   let Predicates = [Pred] in
9967   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9968            compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9970   let Predicates = [Pred, HasVLX] in {
9971     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9972                 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9973     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9974                 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9975   }
9978 // FIXME: Is there a better scheduler class for VPCOMPRESS?
9979 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9980                                           avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9981 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9982                                           avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9983 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9984                                           avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9985 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9986                                           avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9988 // expand
9989 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9990                                  string OpcodeStr, X86FoldableSchedWrite sched> {
9991   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9992               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9993               (null_frag)>, AVX5128IBase,
9994               Sched<[sched]>;
9996   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9997               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9998               (null_frag)>,
9999             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10000             Sched<[sched.Folded, sched.ReadAfterFold]>;
10003 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10005   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10006             (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10007                                         _.KRCWM:$mask, addr:$src)>;
10009   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10010             (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10011                                         _.KRCWM:$mask, addr:$src)>;
10013   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10014                                                (_.VT _.RC:$src0))),
10015             (!cast<Instruction>(Name#_.ZSuffix##rmk)
10016                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10018   def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10019             (!cast<Instruction>(Name#_.ZSuffix##rrk)
10020                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10021   def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10022             (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10023                             _.KRCWM:$mask, _.RC:$src)>;
10026 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10027                                X86FoldableSchedWrite sched,
10028                                AVX512VLVectorVTInfo VTInfo,
10029                                Predicate Pred = HasAVX512> {
10030   let Predicates = [Pred] in
10031   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10032            expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10034   let Predicates = [Pred, HasVLX] in {
10035     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10036                 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10037     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10038                 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10039   }
10042 // FIXME: Is there a better scheduler class for VPEXPAND?
10043 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10044                                       avx512vl_i32_info>, EVEX;
10045 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10046                                       avx512vl_i64_info>, EVEX, VEX_W;
10047 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10048                                       avx512vl_f32_info>, EVEX;
10049 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10050                                       avx512vl_f64_info>, EVEX, VEX_W;
10052 //handle instruction  reg_vec1 = op(reg_vec,imm)
10053 //                               op(mem_vec,imm)
10054 //                               op(broadcast(eltVt),imm)
10055 //all instruction created with FROUND_CURRENT
10056 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10057                                       X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10058   let ExeDomain = _.ExeDomain in {
10059   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10060                       (ins _.RC:$src1, i32u8imm:$src2),
10061                       OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10062                       (OpNode (_.VT _.RC:$src1),
10063                               (i32 imm:$src2))>, Sched<[sched]>;
10064   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10065                     (ins _.MemOp:$src1, i32u8imm:$src2),
10066                     OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10067                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10068                             (i32 imm:$src2))>,
10069                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10070   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10071                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10072                     OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10073                     "${src1}"##_.BroadcastStr##", $src2",
10074                     (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
10075                             (i32 imm:$src2))>, EVEX_B,
10076                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10077   }
10080 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10081 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10082                                           SDNode OpNode, X86FoldableSchedWrite sched,
10083                                           X86VectorVTInfo _> {
10084   let ExeDomain = _.ExeDomain in
10085   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10086                       (ins _.RC:$src1, i32u8imm:$src2),
10087                       OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10088                       "$src1, {sae}, $src2",
10089                       (OpNode (_.VT _.RC:$src1),
10090                               (i32 imm:$src2))>,
10091                       EVEX_B, Sched<[sched]>;
10094 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10095             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10096             SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10097   let Predicates = [prd] in {
10098     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10099                                            _.info512>,
10100                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10101                                                sched.ZMM, _.info512>, EVEX_V512;
10102   }
10103   let Predicates = [prd, HasVLX] in {
10104     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10105                                            _.info128>, EVEX_V128;
10106     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10107                                            _.info256>, EVEX_V256;
10108   }
10111 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10112 //                               op(reg_vec2,mem_vec,imm)
10113 //                               op(reg_vec2,broadcast(eltVt),imm)
10114 //all instruction created with FROUND_CURRENT
10115 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10116                                 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10117   let ExeDomain = _.ExeDomain in {
10118   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10119                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10120                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10121                       (OpNode (_.VT _.RC:$src1),
10122                               (_.VT _.RC:$src2),
10123                               (i32 imm:$src3))>,
10124                       Sched<[sched]>;
10125   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10126                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10127                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10128                     (OpNode (_.VT _.RC:$src1),
10129                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
10130                             (i32 imm:$src3))>,
10131                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10132   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10133                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10134                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10135                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
10136                     (OpNode (_.VT _.RC:$src1),
10137                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10138                             (i32 imm:$src3))>, EVEX_B,
10139                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10140   }
10143 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10144 //                               op(reg_vec2,mem_vec,imm)
10145 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10146                               X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10147                               X86VectorVTInfo SrcInfo>{
10148   let ExeDomain = DestInfo.ExeDomain in {
10149   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10150                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10151                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10152                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10153                                (SrcInfo.VT SrcInfo.RC:$src2),
10154                                (i8 imm:$src3)))>,
10155                   Sched<[sched]>;
10156   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10157                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10158                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10159                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10160                              (SrcInfo.VT (bitconvert
10161                                                 (SrcInfo.LdFrag addr:$src2))),
10162                              (i8 imm:$src3)))>,
10163                 Sched<[sched.Folded, sched.ReadAfterFold]>;
10164   }
10167 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10168 //                               op(reg_vec2,mem_vec,imm)
10169 //                               op(reg_vec2,broadcast(eltVt),imm)
10170 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10171                            X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10172   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10174   let ExeDomain = _.ExeDomain in
10175   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10176                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10177                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10178                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
10179                     (OpNode (_.VT _.RC:$src1),
10180                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10181                             (i8 imm:$src3))>, EVEX_B,
10182                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10185 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10186 //                                      op(reg_vec2,mem_scalar,imm)
10187 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10188                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10189   let ExeDomain = _.ExeDomain in {
10190   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10191                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10192                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10193                       (OpNode (_.VT _.RC:$src1),
10194                               (_.VT _.RC:$src2),
10195                               (i32 imm:$src3))>,
10196                       Sched<[sched]>;
10197   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10198                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10199                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10200                     (OpNode (_.VT _.RC:$src1),
10201                             (_.VT (scalar_to_vector
10202                                       (_.ScalarLdFrag addr:$src2))),
10203                             (i32 imm:$src3))>,
10204                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10205   }
10208 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10209 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10210                                     SDNode OpNode, X86FoldableSchedWrite sched,
10211                                     X86VectorVTInfo _> {
10212   let ExeDomain = _.ExeDomain in
10213   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10214                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10215                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10216                       "$src1, $src2, {sae}, $src3",
10217                       (OpNode (_.VT _.RC:$src1),
10218                               (_.VT _.RC:$src2),
10219                               (i32 imm:$src3))>,
10220                       EVEX_B, Sched<[sched]>;
10223 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10224 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10225                                     X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10226   let ExeDomain = _.ExeDomain in
10227   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10228                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10229                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10230                       "$src1, $src2, {sae}, $src3",
10231                       (OpNode (_.VT _.RC:$src1),
10232                               (_.VT _.RC:$src2),
10233                               (i32 imm:$src3))>,
10234                       EVEX_B, Sched<[sched]>;
10237 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10238             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10239             SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10240   let Predicates = [prd] in {
10241     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10242                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10243                                   EVEX_V512;
10245   }
10246   let Predicates = [prd, HasVLX] in {
10247     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10248                                   EVEX_V128;
10249     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10250                                   EVEX_V256;
10251   }
10254 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10255                    X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10256                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10257   let Predicates = [Pred] in {
10258     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10259                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10260   }
10261   let Predicates = [Pred, HasVLX] in {
10262     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10263                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10264     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10265                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10266   }
10269 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10270                                   bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10271                                   Predicate Pred = HasAVX512> {
10272   let Predicates = [Pred] in {
10273     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10274                                 EVEX_V512;
10275   }
10276   let Predicates = [Pred, HasVLX] in {
10277     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10278                                 EVEX_V128;
10279     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10280                                 EVEX_V256;
10281   }
10284 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10285                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10286                   SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10287   let Predicates = [prd] in {
10288      defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10289               avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10290   }
10293 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10294                     bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10295                     SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10296   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10297                             opcPs, OpNode, OpNodeSAE, sched, prd>,
10298                             EVEX_CD8<32, CD8VF>;
10299   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10300                             opcPd, OpNode, OpNodeSAE, sched, prd>,
10301                             EVEX_CD8<64, CD8VF>, VEX_W;
10304 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10305                               X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
10306                               AVX512AIi8Base, EVEX;
10307 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10308                               X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
10309                               AVX512AIi8Base, EVEX;
10310 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10311                               X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
10312                               AVX512AIi8Base, EVEX;
10314 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10315                                                 0x50, X86VRange, X86VRangeSAE,
10316                                                 SchedWriteFAdd, HasDQI>,
10317       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10318 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10319                                                 0x50, X86VRange, X86VRangeSAE,
10320                                                 SchedWriteFAdd, HasDQI>,
10321       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10323 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10324       f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10325       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10326 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10327       0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10328       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10330 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10331       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10332       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10333 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10334       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10335       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10337 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10338       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10339       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10340 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10341       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10342       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10344 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10345                                           X86FoldableSchedWrite sched,
10346                                           X86VectorVTInfo _,
10347                                           X86VectorVTInfo CastInfo,
10348                                           string EVEX2VEXOvrd> {
10349   let ExeDomain = _.ExeDomain in {
10350   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10351                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10352                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10353                   (_.VT (bitconvert
10354                          (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10355                                                   (i8 imm:$src3)))))>,
10356                   Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10357   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10358                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10359                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10360                 (_.VT
10361                  (bitconvert
10362                   (CastInfo.VT (X86Shuf128 _.RC:$src1,
10363                                            (CastInfo.LdFrag addr:$src2),
10364                                            (i8 imm:$src3)))))>,
10365                 Sched<[sched.Folded, sched.ReadAfterFold]>,
10366                 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10367   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10368                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10369                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10370                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
10371                     (_.VT
10372                      (bitconvert
10373                       (CastInfo.VT
10374                        (X86Shuf128 _.RC:$src1,
10375                                    (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
10376                                    (i8 imm:$src3)))))>, EVEX_B,
10377                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10378   }
10381 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10382                                    AVX512VLVectorVTInfo _,
10383                                    AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10384                                    string EVEX2VEXOvrd>{
10385   let Predicates = [HasAVX512] in
10386   defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10387                                           _.info512, CastInfo.info512, "">, EVEX_V512;
10389   let Predicates = [HasAVX512, HasVLX] in
10390   defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10391                                              _.info256, CastInfo.info256,
10392                                              EVEX2VEXOvrd>, EVEX_V256;
10395 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10396       avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10397 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10398       avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10399 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10400       avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10401 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10402       avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10404 let Predicates = [HasAVX512] in {
10405 // Provide fallback in case the load node that is used in the broadcast
10406 // patterns above is used by additional users, which prevents the pattern
10407 // selection.
10408 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10409           (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10410                           (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10411                           0)>;
10412 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10413           (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10414                           (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10415                           0)>;
10417 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10418           (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10419                           (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10420                           0)>;
10421 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10422           (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10423                           (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10424                           0)>;
10426 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10427           (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10428                           (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10429                           0)>;
10431 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10432           (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10433                           (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10434                           0)>;
10437 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10438                          X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10439   // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10440   // instantiation of this class.
10441   let ExeDomain = _.ExeDomain in {
10442   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10443                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10444                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10445                   (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
10446                   Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10447   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10448                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10449                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10450                 (_.VT (X86VAlign _.RC:$src1,
10451                                  (bitconvert (_.LdFrag addr:$src2)),
10452                                  (i8 imm:$src3)))>,
10453                 Sched<[sched.Folded, sched.ReadAfterFold]>,
10454                 EVEX2VEXOverride<"VPALIGNRrmi">;
10456   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10457                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10458                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10459                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10460                    (X86VAlign _.RC:$src1,
10461                               (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10462                               (i8 imm:$src3))>, EVEX_B,
10463                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10464   }
10467 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10468                                 AVX512VLVectorVTInfo _> {
10469   let Predicates = [HasAVX512] in {
10470     defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10471                                 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10472   }
10473   let Predicates = [HasAVX512, HasVLX] in {
10474     defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10475                                 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10476     // We can't really override the 256-bit version so change it back to unset.
10477     let EVEX2VEXOverride = ? in
10478     defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10479                                 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10480   }
10483 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10484                                    avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10485 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10486                                    avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10487                                    VEX_W;
10489 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10490                                          SchedWriteShuffle, avx512vl_i8_info,
10491                                          avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10493 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10494 // into vpalignr.
10495 def ValignqImm32XForm : SDNodeXForm<imm, [{
10496   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10497 }]>;
10498 def ValignqImm8XForm : SDNodeXForm<imm, [{
10499   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10500 }]>;
10501 def ValigndImm8XForm : SDNodeXForm<imm, [{
10502   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10503 }]>;
10505 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10506                                         X86VectorVTInfo From, X86VectorVTInfo To,
10507                                         SDNodeXForm ImmXForm> {
10508   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10509                             (bitconvert
10510                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10511                                               imm:$src3))),
10512                             To.RC:$src0)),
10513             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10514                                                   To.RC:$src1, To.RC:$src2,
10515                                                   (ImmXForm imm:$src3))>;
10517   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10518                             (bitconvert
10519                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10520                                               imm:$src3))),
10521                             To.ImmAllZerosV)),
10522             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10523                                                    To.RC:$src1, To.RC:$src2,
10524                                                    (ImmXForm imm:$src3))>;
10526   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10527                             (bitconvert
10528                              (From.VT (OpNode From.RC:$src1,
10529                                               (From.LdFrag addr:$src2),
10530                                       imm:$src3))),
10531                             To.RC:$src0)),
10532             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10533                                                   To.RC:$src1, addr:$src2,
10534                                                   (ImmXForm imm:$src3))>;
10536   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10537                             (bitconvert
10538                              (From.VT (OpNode From.RC:$src1,
10539                                               (From.LdFrag addr:$src2),
10540                                       imm:$src3))),
10541                             To.ImmAllZerosV)),
10542             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10543                                                    To.RC:$src1, addr:$src2,
10544                                                    (ImmXForm imm:$src3))>;
10547 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10548                                            X86VectorVTInfo From,
10549                                            X86VectorVTInfo To,
10550                                            SDNodeXForm ImmXForm> :
10551       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10552   def : Pat<(From.VT (OpNode From.RC:$src1,
10553                              (bitconvert (To.VT (X86VBroadcast
10554                                                 (To.ScalarLdFrag addr:$src2)))),
10555                              imm:$src3)),
10556             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10557                                                   (ImmXForm imm:$src3))>;
10559   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10560                             (bitconvert
10561                              (From.VT (OpNode From.RC:$src1,
10562                                       (bitconvert
10563                                        (To.VT (X86VBroadcast
10564                                                (To.ScalarLdFrag addr:$src2)))),
10565                                       imm:$src3))),
10566                             To.RC:$src0)),
10567             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10568                                                    To.RC:$src1, addr:$src2,
10569                                                    (ImmXForm imm:$src3))>;
10571   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10572                             (bitconvert
10573                              (From.VT (OpNode From.RC:$src1,
10574                                       (bitconvert
10575                                        (To.VT (X86VBroadcast
10576                                                (To.ScalarLdFrag addr:$src2)))),
10577                                       imm:$src3))),
10578                             To.ImmAllZerosV)),
10579             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10580                                                     To.RC:$src1, addr:$src2,
10581                                                     (ImmXForm imm:$src3))>;
10584 let Predicates = [HasAVX512] in {
10585   // For 512-bit we lower to the widest element type we can. So we only need
10586   // to handle converting valignq to valignd.
10587   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10588                                          v16i32_info, ValignqImm32XForm>;
10591 let Predicates = [HasVLX] in {
10592   // For 128-bit we lower to the widest element type we can. So we only need
10593   // to handle converting valignq to valignd.
10594   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10595                                          v4i32x_info, ValignqImm32XForm>;
10596   // For 256-bit we lower to the widest element type we can. So we only need
10597   // to handle converting valignq to valignd.
10598   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10599                                          v8i32x_info, ValignqImm32XForm>;
10602 let Predicates = [HasVLX, HasBWI] in {
10603   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10604   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10605                                       v16i8x_info, ValignqImm8XForm>;
10606   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10607                                       v16i8x_info, ValigndImm8XForm>;
10610 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10611                 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10612                 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10614 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10615                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10616   let ExeDomain = _.ExeDomain in {
10617   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10618                     (ins _.RC:$src1), OpcodeStr,
10619                     "$src1", "$src1",
10620                     (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
10621                     Sched<[sched]>;
10623   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10624                   (ins _.MemOp:$src1), OpcodeStr,
10625                   "$src1", "$src1",
10626                   (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
10627             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10628             Sched<[sched.Folded]>;
10629   }
10632 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10633                             X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10634            avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10635   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10636                   (ins _.ScalarMemOp:$src1), OpcodeStr,
10637                   "${src1}"##_.BroadcastStr,
10638                   "${src1}"##_.BroadcastStr,
10639                   (_.VT (OpNode (X86VBroadcast
10640                                     (_.ScalarLdFrag addr:$src1))))>,
10641              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10642              Sched<[sched.Folded]>;
10645 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10646                               X86SchedWriteWidths sched,
10647                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10648   let Predicates = [prd] in
10649     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10650                              EVEX_V512;
10652   let Predicates = [prd, HasVLX] in {
10653     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10654                               EVEX_V256;
10655     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10656                               EVEX_V128;
10657   }
10660 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10661                                X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10662                                Predicate prd> {
10663   let Predicates = [prd] in
10664     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10665                               EVEX_V512;
10667   let Predicates = [prd, HasVLX] in {
10668     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10669                                  EVEX_V256;
10670     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10671                                  EVEX_V128;
10672   }
10675 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10676                                  SDNode OpNode, X86SchedWriteWidths sched,
10677                                  Predicate prd> {
10678   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10679                                avx512vl_i64_info, prd>, VEX_W;
10680   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10681                                avx512vl_i32_info, prd>;
10684 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10685                                  SDNode OpNode, X86SchedWriteWidths sched,
10686                                  Predicate prd> {
10687   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10688                               avx512vl_i16_info, prd>, VEX_WIG;
10689   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10690                               avx512vl_i8_info, prd>, VEX_WIG;
10693 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10694                                   bits<8> opc_d, bits<8> opc_q,
10695                                   string OpcodeStr, SDNode OpNode,
10696                                   X86SchedWriteWidths sched> {
10697   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10698                                     HasAVX512>,
10699               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10700                                     HasBWI>;
10703 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10704                                     SchedWriteVecALU>;
10706 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10707 let Predicates = [HasAVX512, NoVLX] in {
10708   def : Pat<(v4i64 (abs VR256X:$src)),
10709             (EXTRACT_SUBREG
10710                 (VPABSQZrr
10711                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10712              sub_ymm)>;
10713   def : Pat<(v2i64 (abs VR128X:$src)),
10714             (EXTRACT_SUBREG
10715                 (VPABSQZrr
10716                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10717              sub_xmm)>;
10720 // Use 512bit version to implement 128/256 bit.
10721 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10722                                  AVX512VLVectorVTInfo _, Predicate prd> {
10723   let Predicates = [prd, NoVLX] in {
10724     def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
10725               (EXTRACT_SUBREG
10726                 (!cast<Instruction>(InstrStr # "Zrr")
10727                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10728                                  _.info256.RC:$src1,
10729                                  _.info256.SubRegIdx)),
10730               _.info256.SubRegIdx)>;
10732     def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
10733               (EXTRACT_SUBREG
10734                 (!cast<Instruction>(InstrStr # "Zrr")
10735                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10736                                  _.info128.RC:$src1,
10737                                  _.info128.SubRegIdx)),
10738               _.info128.SubRegIdx)>;
10739   }
10742 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10743                                         SchedWriteVecIMul, HasCDI>;
10745 // FIXME: Is there a better scheduler class for VPCONFLICT?
10746 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10747                                         SchedWriteVecALU, HasCDI>;
10749 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10750 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10751 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10753 //===---------------------------------------------------------------------===//
10754 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10755 //===---------------------------------------------------------------------===//
10757 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10758 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10759                                      SchedWriteVecALU, HasVPOPCNTDQ>;
10761 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10762 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10764 //===---------------------------------------------------------------------===//
10765 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10766 //===---------------------------------------------------------------------===//
10768 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10769                             X86SchedWriteWidths sched> {
10770   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10771                                       avx512vl_f32_info, HasAVX512>, XS;
10774 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10775                                   SchedWriteFShuffle>;
10776 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10777                                   SchedWriteFShuffle>;
10779 //===----------------------------------------------------------------------===//
10780 // AVX-512 - MOVDDUP
10781 //===----------------------------------------------------------------------===//
10783 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
10784                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10785   let ExeDomain = _.ExeDomain in {
10786   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10787                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
10788                    (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
10789                    Sched<[sched]>;
10790   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10791                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10792                  (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
10793                  EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10794                  Sched<[sched.Folded]>;
10795   }
10798 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10799                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10800   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10801                            VTInfo.info512>, EVEX_V512;
10803   let Predicates = [HasAVX512, HasVLX] in {
10804     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10805                                 VTInfo.info256>, EVEX_V256;
10806     defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
10807                                    VTInfo.info128>, EVEX_V128;
10808   }
10811 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10812                           X86SchedWriteWidths sched> {
10813   defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10814                                         avx512vl_f64_info>, XD, VEX_W;
10817 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10819 let Predicates = [HasVLX] in {
10820 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10821           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10822 def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10823           (VMOVDDUPZ128rm addr:$src)>;
10824 def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
10825           (VMOVDDUPZ128rm addr:$src)>;
10827 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10828                    (v2f64 VR128X:$src0)),
10829           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10830                            (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10831 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10832                    immAllZerosV),
10833           (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10835 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10836                    (v2f64 VR128X:$src0)),
10837           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10838 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10839                    immAllZerosV),
10840           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10842 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10843                    (v2f64 VR128X:$src0)),
10844           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10845 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10846                    immAllZerosV),
10847           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10850 //===----------------------------------------------------------------------===//
10851 // AVX-512 - Unpack Instructions
10852 //===----------------------------------------------------------------------===//
10854 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10855                                  SchedWriteFShuffleSizes, 0, 1>;
10856 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10857                                  SchedWriteFShuffleSizes>;
10859 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10860                                        SchedWriteShuffle, HasBWI>;
10861 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10862                                        SchedWriteShuffle, HasBWI>;
10863 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10864                                        SchedWriteShuffle, HasBWI>;
10865 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10866                                        SchedWriteShuffle, HasBWI>;
10868 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10869                                        SchedWriteShuffle, HasAVX512>;
10870 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10871                                        SchedWriteShuffle, HasAVX512>;
10872 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10873                                         SchedWriteShuffle, HasAVX512>;
10874 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10875                                         SchedWriteShuffle, HasAVX512>;
10877 //===----------------------------------------------------------------------===//
10878 // AVX-512 - Extract & Insert Integer Instructions
10879 //===----------------------------------------------------------------------===//
10881 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10882                                                             X86VectorVTInfo _> {
10883   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10884               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10885               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10886               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10887                        addr:$dst)]>,
10888               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10891 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10892   let Predicates = [HasBWI] in {
10893     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10894                   (ins _.RC:$src1, u8imm:$src2),
10895                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10896                   [(set GR32orGR64:$dst,
10897                         (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10898                   EVEX, TAPD, Sched<[WriteVecExtract]>;
10900     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10901   }
10904 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10905   let Predicates = [HasBWI] in {
10906     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10907                   (ins _.RC:$src1, u8imm:$src2),
10908                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10909                   [(set GR32orGR64:$dst,
10910                         (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10911                   EVEX, PD, Sched<[WriteVecExtract]>;
10913     let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10914     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10915                    (ins _.RC:$src1, u8imm:$src2),
10916                    OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10917                    EVEX, TAPD, FoldGenData<NAME#rr>,
10918                    Sched<[WriteVecExtract]>;
10920     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10921   }
10924 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10925                                                             RegisterClass GRC> {
10926   let Predicates = [HasDQI] in {
10927     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10928                   (ins _.RC:$src1, u8imm:$src2),
10929                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10930                   [(set GRC:$dst,
10931                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10932                   EVEX, TAPD, Sched<[WriteVecExtract]>;
10934     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10935                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10936                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10937                 [(store (extractelt (_.VT _.RC:$src1),
10938                                     imm:$src2),addr:$dst)]>,
10939                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10940                 Sched<[WriteVecExtractSt]>;
10941   }
10944 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10945 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10946 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10947 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10949 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10950                                             X86VectorVTInfo _, PatFrag LdFrag> {
10951   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10952       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10953       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10954       [(set _.RC:$dst,
10955           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10956       EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10959 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10960                                             X86VectorVTInfo _, PatFrag LdFrag> {
10961   let Predicates = [HasBWI] in {
10962     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10963         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10964         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10965         [(set _.RC:$dst,
10966             (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10967         Sched<[WriteVecInsert]>;
10969     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10970   }
10973 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10974                                          X86VectorVTInfo _, RegisterClass GRC> {
10975   let Predicates = [HasDQI] in {
10976     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10977         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10978         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10979         [(set _.RC:$dst,
10980             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10981         EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10983     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10984                                     _.ScalarLdFrag>, TAPD;
10985   }
10988 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10989                                      extloadi8>, TAPD, VEX_WIG;
10990 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10991                                      extloadi16>, PD, VEX_WIG;
10992 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10993 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10995 //===----------------------------------------------------------------------===//
10996 // VSHUFPS - VSHUFPD Operations
10997 //===----------------------------------------------------------------------===//
10999 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11000                         AVX512VLVectorVTInfo VTInfo_FP>{
11001   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11002                                     SchedWriteFShuffle>,
11003                                     EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11004                                     AVX512AIi8Base, EVEX_4V;
11007 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11008 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11010 //===----------------------------------------------------------------------===//
11011 // AVX-512 - Byte shift Left/Right
11012 //===----------------------------------------------------------------------===//
11014 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
11015 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11016                                Format MRMm, string OpcodeStr,
11017                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11018   def rr : AVX512<opc, MRMr,
11019              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11020              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11021              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
11022              Sched<[sched]>;
11023   def rm : AVX512<opc, MRMm,
11024            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11025            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11026            [(set _.RC:$dst,(_.VT (OpNode
11027                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
11028                                  (i8 imm:$src2))))]>,
11029            Sched<[sched.Folded, sched.ReadAfterFold]>;
11032 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11033                                    Format MRMm, string OpcodeStr,
11034                                    X86SchedWriteWidths sched, Predicate prd>{
11035   let Predicates = [prd] in
11036     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11037                                  sched.ZMM, v64i8_info>, EVEX_V512;
11038   let Predicates = [prd, HasVLX] in {
11039     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11040                                     sched.YMM, v32i8x_info>, EVEX_V256;
11041     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11042                                     sched.XMM, v16i8x_info>, EVEX_V128;
11043   }
11045 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11046                                        SchedWriteShuffle, HasBWI>,
11047                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11048 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11049                                        SchedWriteShuffle, HasBWI>,
11050                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11052 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11053                                 string OpcodeStr, X86FoldableSchedWrite sched,
11054                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11055   def rr : AVX512BI<opc, MRMSrcReg,
11056              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11057              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11058              [(set _dst.RC:$dst,(_dst.VT
11059                                 (OpNode (_src.VT _src.RC:$src1),
11060                                         (_src.VT _src.RC:$src2))))]>,
11061              Sched<[sched]>;
11062   def rm : AVX512BI<opc, MRMSrcMem,
11063            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11064            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11065            [(set _dst.RC:$dst,(_dst.VT
11066                               (OpNode (_src.VT _src.RC:$src1),
11067                               (_src.VT (bitconvert
11068                                         (_src.LdFrag addr:$src2))))))]>,
11069            Sched<[sched.Folded, sched.ReadAfterFold]>;
11072 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11073                                     string OpcodeStr, X86SchedWriteWidths sched,
11074                                     Predicate prd> {
11075   let Predicates = [prd] in
11076     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11077                                   v8i64_info, v64i8_info>, EVEX_V512;
11078   let Predicates = [prd, HasVLX] in {
11079     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11080                                      v4i64x_info, v32i8x_info>, EVEX_V256;
11081     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11082                                      v2i64x_info, v16i8x_info>, EVEX_V128;
11083   }
11086 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11087                                         SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11089 // Transforms to swizzle an immediate to enable better matching when
11090 // memory operand isn't in the right place.
11091 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
11092   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11093   uint8_t Imm = N->getZExtValue();
11094   // Swap bits 1/4 and 3/6.
11095   uint8_t NewImm = Imm & 0xa5;
11096   if (Imm & 0x02) NewImm |= 0x10;
11097   if (Imm & 0x10) NewImm |= 0x02;
11098   if (Imm & 0x08) NewImm |= 0x40;
11099   if (Imm & 0x40) NewImm |= 0x08;
11100   return getI8Imm(NewImm, SDLoc(N));
11101 }]>;
11102 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
11103   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11104   uint8_t Imm = N->getZExtValue();
11105   // Swap bits 2/4 and 3/5.
11106   uint8_t NewImm = Imm & 0xc3;
11107   if (Imm & 0x04) NewImm |= 0x10;
11108   if (Imm & 0x10) NewImm |= 0x04;
11109   if (Imm & 0x08) NewImm |= 0x20;
11110   if (Imm & 0x20) NewImm |= 0x08;
11111   return getI8Imm(NewImm, SDLoc(N));
11112 }]>;
11113 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11114   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11115   uint8_t Imm = N->getZExtValue();
11116   // Swap bits 1/2 and 5/6.
11117   uint8_t NewImm = Imm & 0x99;
11118   if (Imm & 0x02) NewImm |= 0x04;
11119   if (Imm & 0x04) NewImm |= 0x02;
11120   if (Imm & 0x20) NewImm |= 0x40;
11121   if (Imm & 0x40) NewImm |= 0x20;
11122   return getI8Imm(NewImm, SDLoc(N));
11123 }]>;
11124 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11125   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11126   uint8_t Imm = N->getZExtValue();
11127   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11128   uint8_t NewImm = Imm & 0x81;
11129   if (Imm & 0x02) NewImm |= 0x04;
11130   if (Imm & 0x04) NewImm |= 0x10;
11131   if (Imm & 0x08) NewImm |= 0x40;
11132   if (Imm & 0x10) NewImm |= 0x02;
11133   if (Imm & 0x20) NewImm |= 0x08;
11134   if (Imm & 0x40) NewImm |= 0x20;
11135   return getI8Imm(NewImm, SDLoc(N));
11136 }]>;
11137 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11138   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11139   uint8_t Imm = N->getZExtValue();
11140   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11141   uint8_t NewImm = Imm & 0x81;
11142   if (Imm & 0x02) NewImm |= 0x10;
11143   if (Imm & 0x04) NewImm |= 0x02;
11144   if (Imm & 0x08) NewImm |= 0x20;
11145   if (Imm & 0x10) NewImm |= 0x04;
11146   if (Imm & 0x20) NewImm |= 0x40;
11147   if (Imm & 0x40) NewImm |= 0x08;
11148   return getI8Imm(NewImm, SDLoc(N));
11149 }]>;
11151 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11152                           X86FoldableSchedWrite sched, X86VectorVTInfo _,
11153                           string Name>{
11154   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11155   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11156                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11157                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11158                       (OpNode (_.VT _.RC:$src1),
11159                               (_.VT _.RC:$src2),
11160                               (_.VT _.RC:$src3),
11161                               (i8 imm:$src4)), 1, 1>,
11162                       AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11163   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11164                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11165                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11166                     (OpNode (_.VT _.RC:$src1),
11167                             (_.VT _.RC:$src2),
11168                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
11169                             (i8 imm:$src4)), 1, 0>,
11170                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11171                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11172   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11173                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11174                     OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11175                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
11176                     (OpNode (_.VT _.RC:$src1),
11177                             (_.VT _.RC:$src2),
11178                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11179                             (i8 imm:$src4)), 1, 0>, EVEX_B,
11180                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11181                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11182   }// Constraints = "$src1 = $dst"
11184   // Additional patterns for matching passthru operand in other positions.
11185   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11186                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11187                    _.RC:$src1)),
11188             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11189              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11190   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11191                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11192                    _.RC:$src1)),
11193             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11194              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11196   // Additional patterns for matching loads in other positions.
11197   def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11198                           _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11199             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11200                                    addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11201   def : Pat<(_.VT (OpNode _.RC:$src1,
11202                           (bitconvert (_.LdFrag addr:$src3)),
11203                           _.RC:$src2, (i8 imm:$src4))),
11204             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11205                                    addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11207   // Additional patterns for matching zero masking with loads in other
11208   // positions.
11209   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11210                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11211                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11212                    _.ImmAllZerosV)),
11213             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11214              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11215   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11216                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11217                     _.RC:$src2, (i8 imm:$src4)),
11218                    _.ImmAllZerosV)),
11219             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11220              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11222   // Additional patterns for matching masked loads with different
11223   // operand orders.
11224   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11225                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11226                     _.RC:$src2, (i8 imm:$src4)),
11227                    _.RC:$src1)),
11228             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11229              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11230   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11231                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11232                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11233                    _.RC:$src1)),
11234             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11235              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11236   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11237                    (OpNode _.RC:$src2, _.RC:$src1,
11238                     (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11239                    _.RC:$src1)),
11240             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11241              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11242   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11243                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11244                     _.RC:$src1, (i8 imm:$src4)),
11245                    _.RC:$src1)),
11246             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11247              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11248   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11249                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11250                     _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11251                    _.RC:$src1)),
11252             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11253              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11255   // Additional patterns for matching broadcasts in other positions.
11256   def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11257                           _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11258             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11259                                    addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11260   def : Pat<(_.VT (OpNode _.RC:$src1,
11261                           (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11262                           _.RC:$src2, (i8 imm:$src4))),
11263             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11264                                    addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11266   // Additional patterns for matching zero masking with broadcasts in other
11267   // positions.
11268   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11269                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11270                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11271                    _.ImmAllZerosV)),
11272             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11273              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11274              (VPTERNLOG321_imm8 imm:$src4))>;
11275   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11276                    (OpNode _.RC:$src1,
11277                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11278                     _.RC:$src2, (i8 imm:$src4)),
11279                    _.ImmAllZerosV)),
11280             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11281              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11282              (VPTERNLOG132_imm8 imm:$src4))>;
11284   // Additional patterns for matching masked broadcasts with different
11285   // operand orders.
11286   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11287                    (OpNode _.RC:$src1,
11288                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11289                     _.RC:$src2, (i8 imm:$src4)),
11290                    _.RC:$src1)),
11291             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11292              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11293   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11294                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11295                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11296                    _.RC:$src1)),
11297             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11298              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11299   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11300                    (OpNode _.RC:$src2, _.RC:$src1,
11301                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11302                     (i8 imm:$src4)), _.RC:$src1)),
11303             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11304              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11305   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11306                    (OpNode _.RC:$src2,
11307                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11308                     _.RC:$src1, (i8 imm:$src4)),
11309                    _.RC:$src1)),
11310             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11311              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11312   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11313                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11314                     _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11315                    _.RC:$src1)),
11316             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11317              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11320 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11321                                  AVX512VLVectorVTInfo _> {
11322   let Predicates = [HasAVX512] in
11323     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11324                                _.info512, NAME>, EVEX_V512;
11325   let Predicates = [HasAVX512, HasVLX] in {
11326     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11327                                _.info128, NAME>, EVEX_V128;
11328     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11329                                _.info256, NAME>, EVEX_V256;
11330   }
11333 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11334                                         avx512vl_i32_info>;
11335 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11336                                         avx512vl_i64_info>, VEX_W;
11338 // Patterns to implement vnot using vpternlog instead of creating all ones
11339 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11340 // so that the result is only dependent on src0. But we use the same source
11341 // for all operands to prevent a false dependency.
11342 // TODO: We should maybe have a more generalized algorithm for folding to
11343 // vpternlog.
11344 let Predicates = [HasAVX512] in {
11345   def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11346             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11347   def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11348             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11349   def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11350             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11351   def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11352             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11355 let Predicates = [HasAVX512, NoVLX] in {
11356   def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11357             (EXTRACT_SUBREG
11358              (VPTERNLOGQZrri
11359               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11360               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11361               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11362               (i8 15)), sub_xmm)>;
11363   def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11364             (EXTRACT_SUBREG
11365              (VPTERNLOGQZrri
11366               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11367               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11368               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11369               (i8 15)), sub_xmm)>;
11370   def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11371             (EXTRACT_SUBREG
11372              (VPTERNLOGQZrri
11373               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11374               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11375               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11376               (i8 15)), sub_xmm)>;
11377   def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11378             (EXTRACT_SUBREG
11379              (VPTERNLOGQZrri
11380               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11381               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11382               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11383               (i8 15)), sub_xmm)>;
11385   def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11386             (EXTRACT_SUBREG
11387              (VPTERNLOGQZrri
11388               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11389               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11390               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11391               (i8 15)), sub_ymm)>;
11392   def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11393             (EXTRACT_SUBREG
11394              (VPTERNLOGQZrri
11395               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11396               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11397               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11398               (i8 15)), sub_ymm)>;
11399   def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11400             (EXTRACT_SUBREG
11401              (VPTERNLOGQZrri
11402               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11403               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11404               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11405               (i8 15)), sub_ymm)>;
11406   def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11407             (EXTRACT_SUBREG
11408              (VPTERNLOGQZrri
11409               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11410               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11411               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11412               (i8 15)), sub_ymm)>;
11415 let Predicates = [HasVLX] in {
11416   def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11417             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11418   def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11419             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11420   def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11421             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11422   def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11423             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11425   def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11426             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11427   def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11428             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11429   def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11430             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11431   def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11432             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11435 //===----------------------------------------------------------------------===//
11436 // AVX-512 - FixupImm
11437 //===----------------------------------------------------------------------===//
11439 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11440                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11441                                   X86VectorVTInfo TblVT>{
11442   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11443     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11444                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11445                          OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11446                         (X86VFixupimm (_.VT _.RC:$src1),
11447                                       (_.VT _.RC:$src2),
11448                                       (TblVT.VT _.RC:$src3),
11449                                       (i32 imm:$src4))>, Sched<[sched]>;
11450     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11451                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11452                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11453                       (X86VFixupimm (_.VT _.RC:$src1),
11454                                     (_.VT _.RC:$src2),
11455                                     (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11456                                     (i32 imm:$src4))>,
11457                       Sched<[sched.Folded, sched.ReadAfterFold]>;
11458     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11459                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11460                     OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11461                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
11462                       (X86VFixupimm (_.VT _.RC:$src1),
11463                                     (_.VT _.RC:$src2),
11464                                     (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
11465                                     (i32 imm:$src4))>,
11466                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11467   } // Constraints = "$src1 = $dst"
11470 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11471                                       X86FoldableSchedWrite sched,
11472                                       X86VectorVTInfo _, X86VectorVTInfo TblVT>
11473   : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11474 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11475   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11476                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11477                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11478                       "$src2, $src3, {sae}, $src4",
11479                       (X86VFixupimmSAE (_.VT _.RC:$src1),
11480                                        (_.VT _.RC:$src2),
11481                                        (TblVT.VT _.RC:$src3),
11482                                        (i32 imm:$src4))>,
11483                       EVEX_B, Sched<[sched]>;
11484   }
11487 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11488                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11489                                   X86VectorVTInfo _src3VT> {
11490   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11491       ExeDomain = _.ExeDomain in {
11492     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11493                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11494                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11495                       (X86VFixupimms (_.VT _.RC:$src1),
11496                                      (_.VT _.RC:$src2),
11497                                      (_src3VT.VT _src3VT.RC:$src3),
11498                                      (i32 imm:$src4))>, Sched<[sched]>;
11499     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11500                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11501                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11502                       "$src2, $src3, {sae}, $src4",
11503                       (X86VFixupimmSAEs (_.VT _.RC:$src1),
11504                                         (_.VT _.RC:$src2),
11505                                         (_src3VT.VT _src3VT.RC:$src3),
11506                                         (i32 imm:$src4))>,
11507                       EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11508     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11509                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11510                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11511                      (X86VFixupimms (_.VT _.RC:$src1),
11512                                     (_.VT _.RC:$src2),
11513                                     (_src3VT.VT (scalar_to_vector
11514                                               (_src3VT.ScalarLdFrag addr:$src3))),
11515                                     (i32 imm:$src4))>,
11516                      Sched<[sched.Folded, sched.ReadAfterFold]>;
11517   }
11520 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11521                                       AVX512VLVectorVTInfo _Vec, 
11522                                       AVX512VLVectorVTInfo _Tbl> {
11523   let Predicates = [HasAVX512] in
11524     defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11525                                 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11526                                 EVEX_4V, EVEX_V512;
11527   let Predicates = [HasAVX512, HasVLX] in {
11528     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11529                             _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11530                             EVEX_4V, EVEX_V128;
11531     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11532                             _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11533                             EVEX_4V, EVEX_V256;
11534   }
11537 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11538                                            SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11539                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11540 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11541                                            SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11542                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11543 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11544                          avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11545 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11546                          avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11548 // Patterns used to select SSE scalar fp arithmetic instructions from
11549 // either:
11551 // (1) a scalar fp operation followed by a blend
11553 // The effect is that the backend no longer emits unnecessary vector
11554 // insert instructions immediately after SSE scalar fp instructions
11555 // like addss or mulss.
11557 // For example, given the following code:
11558 //   __m128 foo(__m128 A, __m128 B) {
11559 //     A[0] += B[0];
11560 //     return A;
11561 //   }
11563 // Previously we generated:
11564 //   addss %xmm0, %xmm1
11565 //   movss %xmm1, %xmm0
11567 // We now generate:
11568 //   addss %xmm1, %xmm0
11570 // (2) a vector packed single/double fp operation followed by a vector insert
11572 // The effect is that the backend converts the packed fp instruction
11573 // followed by a vector insert into a single SSE scalar fp instruction.
11575 // For example, given the following code:
11576 //   __m128 foo(__m128 A, __m128 B) {
11577 //     __m128 C = A + B;
11578 //     return (__m128) {c[0], a[1], a[2], a[3]};
11579 //   }
11581 // Previously we generated:
11582 //   addps %xmm0, %xmm1
11583 //   movss %xmm1, %xmm0
11585 // We now generate:
11586 //   addss %xmm1, %xmm0
11588 // TODO: Some canonicalization in lowering would simplify the number of
11589 // patterns we have to try to match.
11590 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11591                                            X86VectorVTInfo _, PatLeaf ZeroFP> {
11592   let Predicates = [HasAVX512] in {
11593     // extracted scalar math op with insert via movss
11594     def : Pat<(MoveNode
11595                (_.VT VR128X:$dst),
11596                (_.VT (scalar_to_vector
11597                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11598                           _.FRC:$src)))),
11599               (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11600                (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11601     def : Pat<(MoveNode
11602                (_.VT VR128X:$dst),
11603                (_.VT (scalar_to_vector
11604                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11605                           (_.ScalarLdFrag addr:$src))))),
11606               (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11608     // extracted masked scalar math op with insert via movss
11609     def : Pat<(MoveNode (_.VT VR128X:$src1),
11610                (scalar_to_vector
11611                 (X86selects VK1WM:$mask,
11612                             (Op (_.EltVT
11613                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11614                                 _.FRC:$src2),
11615                             _.FRC:$src0))),
11616               (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11617                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11618                VK1WM:$mask, _.VT:$src1,
11619                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11620     def : Pat<(MoveNode (_.VT VR128X:$src1),
11621                (scalar_to_vector
11622                 (X86selects VK1WM:$mask,
11623                             (Op (_.EltVT
11624                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11625                                 (_.ScalarLdFrag addr:$src2)),
11626                             _.FRC:$src0))),
11627               (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11628                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11629                VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11631     // extracted masked scalar math op with insert via movss
11632     def : Pat<(MoveNode (_.VT VR128X:$src1),
11633                (scalar_to_vector
11634                 (X86selects VK1WM:$mask,
11635                             (Op (_.EltVT
11636                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11637                                 _.FRC:$src2), (_.EltVT ZeroFP)))),
11638       (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 
11639           VK1WM:$mask, _.VT:$src1,
11640           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11641     def : Pat<(MoveNode (_.VT VR128X:$src1),
11642                (scalar_to_vector
11643                 (X86selects VK1WM:$mask,
11644                             (Op (_.EltVT
11645                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11646                                 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11647       (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11648   }
11651 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11652 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11653 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11654 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11656 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11657 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11658 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11659 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11661 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11662                                              SDNode Move, X86VectorVTInfo _> {
11663   let Predicates = [HasAVX512] in {
11664     def : Pat<(_.VT (Move _.VT:$dst,
11665                      (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11666               (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11667   }
11670 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11671 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11673 //===----------------------------------------------------------------------===//
11674 // AES instructions
11675 //===----------------------------------------------------------------------===//
11677 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11678   let Predicates = [HasVLX, HasVAES] in {
11679     defm Z128 : AESI_binop_rm_int<Op, OpStr,
11680                                   !cast<Intrinsic>(IntPrefix),
11681                                   loadv2i64, 0, VR128X, i128mem>,
11682                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11683     defm Z256 : AESI_binop_rm_int<Op, OpStr,
11684                                   !cast<Intrinsic>(IntPrefix##"_256"),
11685                                   loadv4i64, 0, VR256X, i256mem>,
11686                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11687     }
11688     let Predicates = [HasAVX512, HasVAES] in
11689     defm Z    : AESI_binop_rm_int<Op, OpStr,
11690                                   !cast<Intrinsic>(IntPrefix##"_512"),
11691                                   loadv8i64, 0, VR512, i512mem>,
11692                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11695 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11696 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11697 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11698 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11700 //===----------------------------------------------------------------------===//
11701 // PCLMUL instructions - Carry less multiplication
11702 //===----------------------------------------------------------------------===//
11704 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11705 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11706                               EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11708 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11709 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11710                               EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11712 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11713                                 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11714                                 EVEX_CD8<64, CD8VF>, VEX_WIG;
11717 // Aliases
11718 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11719 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11720 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11722 //===----------------------------------------------------------------------===//
11723 // VBMI2
11724 //===----------------------------------------------------------------------===//
11726 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11727                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11728   let Constraints = "$src1 = $dst",
11729       ExeDomain   = VTI.ExeDomain in {
11730     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11731                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11732                 "$src3, $src2", "$src2, $src3",
11733                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11734                 AVX512FMA3Base, Sched<[sched]>;
11735     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11736                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11737                 "$src3, $src2", "$src2, $src3",
11738                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11739                         (VTI.VT (VTI.LdFrag addr:$src3))))>,
11740                 AVX512FMA3Base,
11741                 Sched<[sched.Folded, sched.ReadAfterFold]>;
11742   }
11745 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11746                                X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11747          : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11748   let Constraints = "$src1 = $dst",
11749       ExeDomain   = VTI.ExeDomain in
11750   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11751               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11752               "${src3}"##VTI.BroadcastStr##", $src2",
11753               "$src2, ${src3}"##VTI.BroadcastStr,
11754               (OpNode VTI.RC:$src1, VTI.RC:$src2,
11755                (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
11756               AVX512FMA3Base, EVEX_B,
11757               Sched<[sched.Folded, sched.ReadAfterFold]>;
11760 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11761                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11762   let Predicates = [HasVBMI2] in
11763   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11764                                    EVEX_V512;
11765   let Predicates = [HasVBMI2, HasVLX] in {
11766     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11767                                    EVEX_V256;
11768     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11769                                    EVEX_V128;
11770   }
11773 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11774                                       X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11775   let Predicates = [HasVBMI2] in
11776   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11777                                     EVEX_V512;
11778   let Predicates = [HasVBMI2, HasVLX] in {
11779     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11780                                     EVEX_V256;
11781     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11782                                     EVEX_V128;
11783   }
11785 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11786                            SDNode OpNode, X86SchedWriteWidths sched> {
11787   defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11788              avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11789   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11790              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11791   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11792              avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11795 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11796                            SDNode OpNode, X86SchedWriteWidths sched> {
11797   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11798              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11799              VEX_W, EVEX_CD8<16, CD8VF>;
11800   defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11801              OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11802   defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11803              sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11806 // Concat & Shift
11807 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11808 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11809 defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11810 defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11812 // Compress
11813 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11814                                          avx512vl_i8_info, HasVBMI2>, EVEX,
11815                                          NotMemoryFoldable;
11816 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11817                                           avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11818                                           NotMemoryFoldable;
11819 // Expand
11820 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11821                                       avx512vl_i8_info, HasVBMI2>, EVEX;
11822 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11823                                       avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11825 //===----------------------------------------------------------------------===//
11826 // VNNI
11827 //===----------------------------------------------------------------------===//
11829 let Constraints = "$src1 = $dst" in
11830 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11831                     X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11832                     bit IsCommutable> {
11833   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11834                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11835                                    "$src3, $src2", "$src2, $src3",
11836                                    (VTI.VT (OpNode VTI.RC:$src1,
11837                                             VTI.RC:$src2, VTI.RC:$src3)),
11838                                    IsCommutable, IsCommutable>,
11839                                    EVEX_4V, T8PD, Sched<[sched]>;
11840   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11841                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11842                                    "$src3, $src2", "$src2, $src3",
11843                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11844                                             (VTI.VT (VTI.LdFrag addr:$src3))))>,
11845                                    EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11846                                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11847   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11848                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11849                                    OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11850                                    "$src2, ${src3}"##VTI.BroadcastStr,
11851                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
11852                                     (VTI.VT (X86VBroadcast
11853                                              (VTI.ScalarLdFrag addr:$src3))))>,
11854                                    EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11855                                    T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
11858 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11859                        X86SchedWriteWidths sched, bit IsCommutable> {
11860   let Predicates = [HasVNNI] in
11861   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
11862                            IsCommutable>, EVEX_V512;
11863   let Predicates = [HasVNNI, HasVLX] in {
11864     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
11865                            IsCommutable>, EVEX_V256;
11866     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
11867                            IsCommutable>, EVEX_V128;
11868   }
11871 // FIXME: Is there a better scheduler class for VPDP?
11872 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
11873 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
11874 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
11875 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
11877 def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
11878                              (X86vpmaddwd node:$lhs, node:$rhs), [{
11879   return N->hasOneUse();
11880 }]>;
11882 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
11883 let Predicates = [HasVNNI] in {
11884   def : Pat<(v16i32 (add VR512:$src1,
11885                          (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
11886             (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
11887   def : Pat<(v16i32 (add VR512:$src1,
11888                          (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
11889             (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
11891 let Predicates = [HasVNNI,HasVLX] in {
11892   def : Pat<(v8i32 (add VR256X:$src1,
11893                         (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
11894             (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
11895   def : Pat<(v8i32 (add VR256X:$src1,
11896                         (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
11897             (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
11898   def : Pat<(v4i32 (add VR128X:$src1,
11899                         (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
11900             (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
11901   def : Pat<(v4i32 (add VR128X:$src1,
11902                         (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
11903             (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
11906 //===----------------------------------------------------------------------===//
11907 // Bit Algorithms
11908 //===----------------------------------------------------------------------===//
11910 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11911 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11912                                    avx512vl_i8_info, HasBITALG>;
11913 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11914                                    avx512vl_i16_info, HasBITALG>, VEX_W;
11916 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11917 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11919 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
11920                                  (X86Vpshufbitqmb node:$src1, node:$src2), [{
11921   return N->hasOneUse();
11922 }]>;
11924 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11925   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11926                                 (ins VTI.RC:$src1, VTI.RC:$src2),
11927                                 "vpshufbitqmb",
11928                                 "$src2, $src1", "$src1, $src2",
11929                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11930                                 (VTI.VT VTI.RC:$src2)),
11931                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11932                                 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11933                                 Sched<[sched]>;
11934   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11935                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
11936                                 "vpshufbitqmb",
11937                                 "$src2, $src1", "$src1, $src2",
11938                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11939                                 (VTI.VT (VTI.LdFrag addr:$src2))),
11940                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11941                                 (VTI.VT (VTI.LdFrag addr:$src2)))>,
11942                                 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11943                                 Sched<[sched.Folded, sched.ReadAfterFold]>;
11946 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11947   let Predicates = [HasBITALG] in
11948   defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11949   let Predicates = [HasBITALG, HasVLX] in {
11950     defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11951     defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11952   }
11955 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11956 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11958 //===----------------------------------------------------------------------===//
11959 // GFNI
11960 //===----------------------------------------------------------------------===//
11962 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11963                                    X86SchedWriteWidths sched> {
11964   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11965   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11966                                 EVEX_V512;
11967   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11968     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11969                                 EVEX_V256;
11970     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11971                                 EVEX_V128;
11972   }
11975 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11976                                           SchedWriteVecALU>,
11977                                           EVEX_CD8<8, CD8VF>, T8PD;
11979 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11980                                       X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11981                                       X86VectorVTInfo BcstVTI>
11982            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
11983   let ExeDomain = VTI.ExeDomain in
11984   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11985                 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
11986                 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
11987                 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
11988                 (OpNode (VTI.VT VTI.RC:$src1),
11989                  (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
11990                  (i8 imm:$src3))>, EVEX_B,
11991                  Sched<[sched.Folded, sched.ReadAfterFold]>;
11994 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11995                                      X86SchedWriteWidths sched> {
11996   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11997   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
11998                                            v64i8_info, v8i64_info>, EVEX_V512;
11999   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12000     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12001                                            v32i8x_info, v4i64x_info>, EVEX_V256;
12002     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12003                                            v16i8x_info, v2i64x_info>, EVEX_V128;
12004   }
12007 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12008                          X86GF2P8affineinvqb, SchedWriteVecIMul>,
12009                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12010 defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12011                          X86GF2P8affineqb, SchedWriteVecIMul>,
12012                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12015 //===----------------------------------------------------------------------===//
12016 // AVX5124FMAPS
12017 //===----------------------------------------------------------------------===//
12019 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12020     Constraints = "$src1 = $dst" in {
12021 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12022                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12023                     "v4fmaddps", "$src3, $src2", "$src2, $src3",
12024                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12025                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12027 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12028                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12029                      "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12030                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12031                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12033 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12034                     (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12035                     "v4fmaddss", "$src3, $src2", "$src2, $src3",
12036                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12037                     Sched<[SchedWriteFMA.Scl.Folded]>;
12039 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12040                      (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12041                      "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12042                      []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12043                      Sched<[SchedWriteFMA.Scl.Folded]>;
12046 //===----------------------------------------------------------------------===//
12047 // AVX5124VNNIW
12048 //===----------------------------------------------------------------------===//
12050 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12051     Constraints = "$src1 = $dst" in {
12052 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12053                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12054                      "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12055                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12056                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12058 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12059                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12060                      "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12061                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12062                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12065 let hasSideEffects = 0 in {
12066   let mayStore = 1 in
12067   def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12068   let mayLoad = 1 in
12069   def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12072 //===----------------------------------------------------------------------===//
12073 // VP2INTERSECT
12074 //===----------------------------------------------------------------------===//
12076 multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
12077   def rr : I<0x68, MRMSrcReg,
12078                   (outs _.KRPC:$dst),
12079                   (ins _.RC:$src1, _.RC:$src2),
12080                   !strconcat("vp2intersect", _.Suffix,
12081                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12082                   [(set _.KRPC:$dst, (X86vp2intersect
12083                             _.RC:$src1, (_.VT _.RC:$src2)))]>,
12084                   EVEX_4V, T8XD;
12086   def rm : I<0x68, MRMSrcMem,
12087                   (outs _.KRPC:$dst),
12088                   (ins  _.RC:$src1, _.MemOp:$src2),
12089                   !strconcat("vp2intersect", _.Suffix,
12090                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12091                   [(set _.KRPC:$dst, (X86vp2intersect
12092                             _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12093                   EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
12095   def rmb : I<0x68, MRMSrcMem,
12096                   (outs _.KRPC:$dst),
12097                   (ins _.RC:$src1, _.ScalarMemOp:$src2),
12098                   !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12099                              ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12100                   [(set _.KRPC:$dst, (X86vp2intersect
12101                              _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>,
12102                   EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
12105 multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
12106   let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12107     defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
12109   let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12110     defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
12111     defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
12112   }
12115 defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
12116 defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
12118 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12119                              X86SchedWriteWidths sched,
12120                              AVX512VLVectorVTInfo _SrcVTInfo,
12121                              AVX512VLVectorVTInfo _DstVTInfo,
12122                              SDNode OpNode, Predicate prd,
12123                              bit IsCommutable = 0> {
12124   let Predicates = [prd] in
12125     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12126                                    _SrcVTInfo.info512, _DstVTInfo.info512,
12127                                    _SrcVTInfo.info512, IsCommutable>,
12128                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
12129   let Predicates = [HasVLX, prd] in {
12130     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12131                                       _SrcVTInfo.info256, _DstVTInfo.info256,
12132                                       _SrcVTInfo.info256, IsCommutable>,
12133                                      EVEX_V256, EVEX_CD8<32, CD8VF>;
12134     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12135                                       _SrcVTInfo.info128, _DstVTInfo.info128,
12136                                       _SrcVTInfo.info128, IsCommutable>,
12137                                       EVEX_V128, EVEX_CD8<32, CD8VF>;
12138   }
12141 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12142                                         SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
12143                                         avx512vl_f32_info, avx512vl_i16_info,
12144                                         X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12146 // Truncate Float to BFloat16
12147 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12148                              X86SchedWriteWidths sched> {
12149   let Predicates = [HasBF16] in {
12150     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12151                             X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12152   }
12153   let Predicates = [HasBF16, HasVLX] in {
12154     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12155                                null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12156                                VK4WM>, EVEX_V128;
12157     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12158                                X86cvtneps2bf16,
12159                                sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12161     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12162                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12163                     VR128X:$src), 0>;
12164     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12165                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12166                     f128mem:$src), 0, "intel">;
12167     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12168                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12169                     VR256X:$src), 0>;
12170     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12171                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12172                     f256mem:$src), 0, "intel">;
12173   }
12176 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12177                                        SchedWriteCvtPD2PS>, T8XS,
12178                                        EVEX_CD8<32, CD8VF>;
12180 let Predicates = [HasBF16, HasVLX] in {
12181   // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12182   // patterns have been disabled with null_frag.
12183   def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12184             (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12185   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12186                               VK4WM:$mask),
12187             (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12188   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12189                               VK4WM:$mask),
12190             (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12192   def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12193             (VCVTNEPS2BF16Z128rm addr:$src)>;
12194   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12195                               VK4WM:$mask),
12196             (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12197   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12198                               VK4WM:$mask),
12199             (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12201   def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12202                                      (X86VBroadcast (loadf32 addr:$src))))),
12203             (VCVTNEPS2BF16Z128rmb addr:$src)>;
12204   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
12205                               (v8i16 VR128X:$src0), VK4WM:$mask),
12206             (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12207   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
12208                               v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12209             (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12212 let Constraints = "$src1 = $dst" in {
12213 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12214                               X86VectorVTInfo _, X86VectorVTInfo src_v> {
12215   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12216                            (ins _.RC:$src2, _.RC:$src3),
12217                            OpcodeStr, "$src3, $src2", "$src2, $src3",
12218                            (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
12219                            EVEX_4V;
12221   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12222                                (ins _.RC:$src2, _.MemOp:$src3),
12223                                OpcodeStr, "$src3, $src2", "$src2, $src3",
12224                                (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12225                                (src_v.VT (bitconvert
12226                                (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
12228   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12229                   (ins _.RC:$src2, _.ScalarMemOp:$src3),
12230                   OpcodeStr,
12231                   !strconcat("${src3}", _.BroadcastStr,", $src2"),
12232                   !strconcat("$src2, ${src3}", _.BroadcastStr),
12233                   (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12234                   (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>,
12235                   EVEX_B, EVEX_4V;
12238 } // Constraints = "$src1 = $dst"
12240 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12241                                  AVX512VLVectorVTInfo _,
12242                                  AVX512VLVectorVTInfo src_v, Predicate prd> {
12243   let Predicates = [prd] in {
12244     defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
12245                                    src_v.info512>, EVEX_V512;
12246   }
12247   let Predicates = [HasVLX, prd] in {
12248     defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
12249                                    src_v.info256>, EVEX_V256;
12250     defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
12251                                    src_v.info128>, EVEX_V128;
12252   }
12255 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
12256                                        avx512vl_f32_info, avx512vl_i32_info,
12257                                        HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;